\documentclass[10pt]{article}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{xspace}
\makeatletter
\newcommand{\cramped} % "Cramped" list style.
{\parskip\@outerparskip\@topsep\parskip % put after \begin{itemize}
\@topsepadd2pt\itemsep0pt %
\settowidth{\labelwidth}{\@itemlabel} %
\advance\leftmargin-\labelsep %
\advance\leftmargin-\labelwidth %
\advance\@totalleftmargin-\leftmargin %
\advance\linewidth\leftmargin %
\parshape1\@totalleftmargin\linewidth} %
\makeatother
\newtheorem{_defi}{Definition}
\newtheorem{_thm}{Theorem}
\newtheorem{_lem}{Lemma}
\newtheorem{_cor}[_thm]{Corollary}
\newtheorem{_prop}[_defi]{Proposition}
\newenvironment{proof}{\vspace{.3\baselineskip}\noindent{\bf
Proof:}}{{\hfill $\Box$\newline}\vspace{.3\baselineskip}}
\newenvironment{proof-name}[1]{\vspace{.3\baselineskip}\noindent{\bf
Proof of #1:}}{{\hfill $\Box$\newline}\vspace{.3\baselineskip}}
\newenvironment{defi}[1]{\begin{_defi} \label{def:#1}}{{\hfill $\Box$}\end{_defi}}
\newenvironment{defi-name}[2]{\begin{_defi}[#2] \label{def:#1}}{{\hfill $\Box$} \end{_defi}}
\newenvironment{thm}[1]{\begin{_thm} \label{thm:#1}}{\end{_thm}}
\newenvironment{thm-name}[2]{\begin{_thm}[#2] \label{thm:#1}} {\end{_thm}}
\newenvironment{lem}[1]{\begin{_lem} \label{lem:#1}}{\end{_lem}}
\newenvironment{lem-name}[2]{\begin{_lem}[#2] \label{lem:#1}}{\end{_lem}}
\newenvironment{cor}[1]{\begin{_cor} \label{cor:#1}}{\end{_cor}}
\newenvironment{prop}[1]{\begin{_prop} \label{prop:#1}}{\end{_prop}}
\newenvironment{prop-name}[2]{\begin{_prop}[#2] \label{prop:#1}}{\end{_prop}}
\newcommand{\defref}[1]{Definition \ref{def:#1}}
\newcommand{\thmref}[1]{Theorem \ref{thm:#1}}
\newcommand{\lemref}[1]{Lemma \ref{lem:#1}}
\newcommand{\corref}[1]{Corollary \ref{cor:#1}}
\newcommand{\propref}[1]{Proposition \ref{prop:#1}}
\renewcommand{\eqref}[1]{(\ref{eq:#1})}
\newcommand{\mysection}[1]{\section{#1} \label{sec:#1}}
\newcommand{\secref}[1]{Section \ref{sec:#1}}
\newcommand{\apxref}[1]{Appendix \ref{sec:#1}}
\newcommand{\curly}[1]{\mathcal{#1}}
\newcommand{\mangler}[1]{$^\dagger$ \marginpar{$\dagger$ #1}}
\newcommand{\half}{{\textstyle\frac{1}{2}}}
\newcommand{\third}{{\textstyle\frac{1}{3}}}
\newcommand{\quarter}{{\textstyle\frac{1}{4}}}
\newcommand{\mult}{\!\cdot\!}
\newcommand{\textfrac}[2]{\smash{\textstyle{\frac{#1}{#2}}}}
\newcommand{\Oh}{O}
\newcommand{\oh}{o}
\newcommand{\ie}{i.e. }
\newcommand{\E}{\text{E}}
\newcommand{\A}{\curly{A}}
\renewcommand{\H}{H_{\curly{A}}}
\newcommand{\state}{\mathtt{state}}
\newcommand{\set}{\mathtt{set}}
\newcommand{\interval}{\mathtt{intervals}}
\newcommand{\indices}{\mathtt{indices}}
\newcommand{\eq}{\mathtt{eq}}
\newcommand{\x}{\mathbf{x}}
\newcommand{\y}{\mathbf{y}}
\renewcommand{\time}{k(n)\!\cdot\! n}
\newcommand{\relation}{\Delta}
\newcommand{\decision}{D_\Delta}
\newcommand{\fail}{x_{\text{fail}}}
\newcommand{\kfailone}{k_{1}}
\newcommand{\kfailtwo}{k_{2}}
\newcommand{\Fail}{X_{\text{fail}}}
\newcommand{\hard}{x_{\text{hard}}}
\newcommand{\YES}{\text{``YES''}}
\newcommand{\NO}{\text{``NO''}}
\newcommand{\sparse}{\zeta(n)\!-\!\text{sparse}}
\newcommand{\full}{\lambda(n)\!-\!\text{full}}
\newcommand{\fullinv}{\smash{\textstyle{\frac{1}{\lambda(n)}}}}
\newcommand{\sparseinv}{\smash{\textstyle{\frac{1}{\zeta(n)}}}}
\newcommand{\ed}{element distinctness\xspace}
\newcommand{\hd}{Hamming distance\xspace}
\newcommand{\equality}{equality\xspace}
\newcommand{\sort}{sorting\xspace}
\newcommand{\uh}{universal hashing\xspace}
\begin{document}
\title{On Ajtai's Lower Bound Technique for $R$-way Branching Programs
and the Hamming Distance Problem} \author{Jakob
Pagter\thanks{E-mail: \texttt{pagter@daimi.au.dk}. Partially supported
by the IST Programme of the EU under contract number
IST-1999-14186 (ALCOM-FT). Part of this work was done while the
author was visiting University of Toronto.}}
\date{{\bf BRICS}\thanks{%
\protect\begin{tabular}[t]{@{}l@{}} \textbf{B}asic
\textbf{R}esearch in \textbf{C}omputer
\textbf{S}cience,\protect\\ Centre of the Danish National
Research Foundation.
\protect\end{tabular}}\\
Department of Computer Science\\
University of Aarhus\\
Denmark}
\maketitle
\begin{abstract}
Miklos Ajtai [\emph{Determinism versus Non-Determinism for Linear
Time RAMs with Memory Restrictions}, 31st Symposium on Theory of
Computation (STOC), 1999] has proved that any $R$-way branching
program deciding the so-called \hd problem (given $n$ elements
decide whether any two of them have ``small'' Hamming distance): in
time $\Oh(n)$ must use space $\Omega(n\lg n)$.
We extend the proof to obtain a time-space trade-off for time
between $n$ and $\alpha n\textfrac{\lg n}{\lg\lg n}$, for some
suitable $0<\alpha<1$. In particular we prove that if space is
\smash{$\Oh(n^{1-\epsilon})$}, then time is $\Omega(n\textfrac{\lg
n}{\lg\lg n})$.
\end{abstract}
\section{Introduction}
\label{sec:introduction}
In~\cite{Ajtai99} Miklos Ajtai proved that any $R$-way branching
program that decides \ed using sub-linear space must use super-linear
time. The technical details of the proof are complicated. However,
the basic ideas underlying the proof are not, and this was exploited
by Ajtai to prove an even stronger bound for the so called \hd problem
(a generalization of \ed) using much simpler arguments. An
interesting aspect of these bounds is that they are valid in the well
known RAM model with word size $\lg R$, and thus bounds in this model
are valid for RAMs independent on the specific instruction set.
The \ed \emph{theorem} is the more interesting of Ajtai's two
theorems, as \ed is an interesting and very well-studied
problem~\cite{Borodin87,Karchmer86,Yao94}. On the other hand, the \hd
\emph{proof} is arguably the more interesting of the two proofs, as it
is gives the best possible bound in the model using a clear and
elegant proof.
The purpose of this paper is to study the latter proof from three
angles: 1) To what extent does the \hd proof generalize, \ie for what
intervals of time and space do we have a lower bound in the form of a
time-space trade-off? 2) How strong is the proof, e.g. can the simpler
of the two proofs actually be used to prove something for \ed? 3) Can
the \hd proof be used to give non-trivial lower bounds in the Boolean
model?
The answer to the two latter questions seems to be negative, but we
can generalize the proof to achieve the following:
\begin{thm-name}{vanilla}{General theorem, vanilla version}
Any $R$-way branching program deciding \hd using time
$\Oh(k(n)\!\cdot\! n)$ , $k(n)<\alpha\textfrac{\lg n}{\lg\lg
n}$\footnote{Following ~\cite{Knuth98} we use ``$\lg$'' to denote
the logarithm in base 2.} for a suitable constant $0<\alpha<1$
must use space
\[
S(n)>\beta\frac{n\lg n}{k(n)^{10k(n)}},
\]
for some constant $\beta>0$.
\end{thm-name}
The full version of this theorem (to be found in \secref{result}) will
imply all three answers. For now, let us just observe that
\thmref{vanilla} implies that any $R$-way branching program, and thus
\emph{any} RAM with word size $\lg R$, solving the Hamming Distance
problem using $\Oh(n^{1-\epsilon})$ bits of space must use time
$\Omega(n\textfrac{\lg n}{\lg\lg n})$. In terms of time this is the
strongest result that can be achieved using our results. To our
knowledge this is the \emph{quantitatively} best known lower bound for
a decision problem in the $R$-way branching program model. Similarly,
using time $\Oh(n\textfrac{\lg n}{\lg\lg n})$) implies using space
$\Omega(n^{1-\epsilon})$ for some $0<\epsilon<1$.
Using the combinatorics of Ajtai in conjunction with the technique of
Beame et al.~\cite{Beame98a}, a theorem similar to Ajtai's \hd theorem
can be proved. In fact this gives rise to a $\Omega(n\lg\lg n)$ bound
on time for space in $\Oh(n^{1-\epsilon})$. This connection was
observed by Beame et al.~\cite{Beame00a} (independently on this work).
It is not surprising then that we can also prove \thmref{vanilla} by
combining the techniques of Beame et al.~\cite{Beame00a} with our
generalized version of Ajtai's combinatorics
(see~\cite{Pagter01}). Since the initial version of our paper
(available as~\cite{Pagter00}), Beame et al.~\cite{Beame03} has
provided an $\Omega(n\lg n)$ bound on time for space in
$\Oh(n^{1-\epsilon})$ for the \hd problem.
Our main contribution is the generalization of Ajtai's technical
lemmata. Using the generalized version of Ajtai's combinatorics we are
able to prove lower bounds for time up to $\Omega(n\textfrac{\lg
n}{\lg\lg n})$, improving the $\Omega(n\lg\lg n)$ bound of Beame et
al., and the $\Omega(n\textfrac{\lg\lg n}{\lg\lg\lg n})$ bound, which
follows by the immediate generalization of Ajtai's work (where
constants are systematically replaced by functions of $n$ without
modifying the proof as such). We observe that the proof also works for
randomized branching programs with one-sided error.
\section{Previous work}
\label{sec:previous work}
Proving lower bounds in general models of computation is notoriously
hard. Early approaches restricted the computational model, for example
to comparison based models, but natural models (such as the RAM) do
not obey such restrictions. Another approach has been to restrict the
resources available, e.g. limiting the amount of space which we allow
a given algorithm to use. This gives rise to time-space trade-offs,
for which several breakthroughs have been achieved recently
\cite{Ajtai99b,Ajtai99,Beame98a}.
Branching programs have been very useful for this line of study. In
particular much insight has been gained using the $R$-way branching
program model of Borodin and Cook~\cite{Borodin82}. Results include
general lower bounds for \sort \cite{Beame91,Borodin82} and \uh
\cite{Mansour93}. The problems for which these techniques work are
characterized by having a large output domain, which is essential for
the employed proofs. It was only recently that non-trivial lower
bounds for a decision problem were obtained in this model
\cite{Ajtai99,Beame98a}.
When proving upper bounds for problems like \hd, \ed, \sort and \uh,
one popular model has been the \emph{word RAM}, or RAM, (see
e.g.~\cite{Hagerup98}). Much debate has taken place over which
instruction sets to allow, giving rise to some confusion in the
literature. However, one of the very nice properties of $R$-way
branching programs is that they are strictly more powerful than
\emph{any} RAM with word size $\lg R$ (regardless of instruction
set). This means that the bounds obtained in this model will hold for
any RAM no matter what specific instruction set we are working with.
The first non-trivial bound for a decision problem in the $R$-way
branching program model was given by Beame et al.~\cite{Beame98a}, who
exhibited a problem for which they could prove a lower bound of
$\Omega(n\lg\lg n)$ bound on time, when restricting space to being
$\Oh(n)$. As mentioned earlier, this bound also applies to the \hd
problem.
Independently of our work Beame et al.~\cite{Beame00a} generalized the
\ed proof of Ajtai, and obtained a lower bound of
$\Omega(n\smash{\sqrt{\lg n/\lg\lg n}})$ for Element Distinctness in
the $R$-way model. Our results differ from this result in two aspects:
1) we focus on the ``easy'' version of Ajtai's original proof---the
\hd proof, and 2) quantitatively our bounds are better (albeit for \hd
and not \ed). Since the initial version of our paper Beame et
al.~\cite{Beame03} (a revision of \cite{Beame00a}) has provided an
$\Omega(n\lg n)$ bound on time for space in $\Oh(n^{1-(\epsilon})$ for
the \hd problem, improving on our bounds.
Many of the results achieved in the $R$-way branching program model
were stepping stones or ``side-effects'' in the quest for a
non-trivial (i.e. $T\in\omega(n)$ or even just $T>n$) for a decision
problem in a Boolean model, which for a great many people is the holy
grail of computational complexity. The first non-trivial bound for a
decision problem in the Boolean branching program model was given by
Beame et al.~\cite{Beame98a}, who exhibited a problem for which they
could prove a lower bound of $1.0178\,n$ (sic) on time for space
restricted to being sub-linear. Ajtai~\cite{Ajtai99b} extended his
own work for \ed in the $R$-way model to give a non-trivial lower
bound in the Boolean branching program model. The aforementioned work
of Beame et al.~\cite{Beame00a} generalizes \cite{Ajtai99b} (as well
as \cite{Ajtai99}) to obtain lower bounds for time up to
$\Omega(n\smash{\sqrt{\lg n/\lg\lg n}})$ in the Boolean branching
program model.
Despite the exciting developments in the Boolean model, our focus
remains the $R$-way model. We only treat the Boolean model via the
fact that bounds in the Boolean branching program model can be derived
from bounds obtained in the $R$-way model---at a cost.
\begin{prop}{r2bool}
Suppose we can prove a lower bound of $f(n)$ for some problem in the
$R$-way branching program model, then this translates into a
$f(n)/\lg R$ lower for the same problem in the Boolean branching
program model. Likewise for space.
\end{prop}
\begin{proof}
In the Boolean model we measure the size of the input, $n$, in bits.
In the $R$-way model $n$ refers to the number of words \emph{each}
consisting of $\lg R$ bits. Thus, if we translate inputs of size $n$
from the $R$-way model, they will become inputs of size $n\lg R$
bits in the Boolean model.
\end{proof}
So if we have that $R\in n^{\Oh(1)}$, we need $\omega(n\lg n)$ bounds
in the $R$-way model to infer $\omega(n)$ bounds in the Boolean model.
None of the results mentioned for decision problems in the $R$-way
branching program model---our results included---meet this
requirement.
The results of Beame et al.~\cite{Beame00a,Beame03} holds for
randomized branching programs with two-sided error. These are the
first non-trivial lower bounds for randomized branching programs in
both the $R$-way and the Boolean branching program model.
\section{Model of computation}
\label{sec:model}
We employ the $R$-way branching program model, which is at least as
strong as a RAM with word size $\lceil \lg R\rceil$ and any
instruction set. We will only give an informal presentation of the
model. For a detailed account see \cite{Borodin82} or \cite{Savage98}.
Boolean branching programs are the special case of $R=2$,
corresponding to looking at one bit at a time.
An $R$-way branching program is a directed acyclic graph. It has one
node with in degree $0$ which is called the \emph{start node}. Every
node with out degree $0$ is called a \emph{terminal node}, and is
labelled either $\YES$ or $\NO$, depending on whether the branching
program accepts or rejects the given input. Every node which is not a
terminal node is called a \emph{computation node} and is labelled with
some index $i$ (referring to the $i$'th word of the input). A
computation node has exactly $R$ outgoing edges, each with a unique
label from $1,\ldots ,R$. The input to the branching program consists
of $n$ elements from some universe of size $R$ (which may be a
function of $n$), and computation proceeds as follows: given input
$x\in\{1,\ldots,R\}^n$, we start in the start node where we read the
word indexed by the label of this node; if the value in this word is
$r$, then we follow the outgoing edge with label $r$. We continue this
procedure for the computation nodes we encounter until we end up in
some terminal node which tells us the result of the computation.
As measures of complexity we define time $T$ to be the height of our
branching program, and space $S$ to be $\lceil\lg
\#\text{nodes}\rceil$.
For the rest of this paper we will assume all branching programs to be
levelled, which means that we can partition the nodes into $T$
disjoint sets $V_1,V_2,\ldots V_T$, such that all edges originating
from $V_i$ go to $V_{i+1}$. For asymptotic purposes we may assume this
without loss of generality. For proofs and definition see e.g. Borodin
et al.~\cite{Borodin81}.
In this model of computation all ``internal'' computation is free, we
only ``pay'' for reading the input.
\section{The $n$-ary independence problem for a binary relation}
\label{sec:problem}
We now describe a particular class of decision problems that are the
focus of this paper. This class was introduced by Ajtai. Let
$\relation \subseteq U \times U$ be a binary relation on universe
$U$. Say that an input $x=\langle x(1), x(2), \ldots, x(n)\rangle$ is
$\relation$-independent if there is no $i\neq j$ such that
$\relation(x(i),x(j))$. The $n$-ary independence problem for
$\relation$ is the decision problem $\decision$ on $U^n$ defined by
$\decision(x)=\YES$ if and only if $x$ is $\relation$-independent.
For example, in the case that $\relation$ is the equality relation
$EQ=\{(x,x)|x\in U\}$, the $n$-ary independence problem for $EQ$ is
equivalent to \ed on $U^n$.
As a consequence of the fact mentioned above that all internal
computation is free, we have the following easy proposition.
\begin{prop}{upper}
We can decide the $n$-ary independence problem, $\decision$, for any
binary relation, $\relation$, on $U$ in time $T$ and space $S$ such
that
\[
T\!\cdot\! S\in\Oh(n^2\lg R),
\]
for time between $n$ and $n^2$.
\end{prop}
\begin{proof}
First observe that in time $n$ we may decide $\relation$ using $R^n$
nodes or $n\lg R$ bits of space, using an $R$-way decision
tree---\ie reading all the inputs once, remembering them, and then
exploit the free internal computation.
This easily generalizes by splitting the input into $b(n)$ blocks
each containing $n/b(n)$ words. Now what we will do is do decide
$\relation$ for each of the $\Oh(b(n)^2)$ pairs of blocks; each time
we read a block we also decide $\relation$ for every pair of
elements in that block. For each pair we may do the computation in
time $2n/b(n)$ using $2(n/b(n))\lg R$ bits of space; deciding
$\relation$ internally on the blocks is free as we remember the
entire block. In total we use time $\Oh(nb(n))$ and space
$\Oh((n/b(n))\lg R)$, yielding the desired trade-off.
\end{proof}
For time $n$ this construction of course works for \emph{any} problem,
which shows that for time $\Theta(n)$ one cannot give space-bounds
better than $\Omega(n\lg n)$ (when $R=2^{c\lg n}$), \ie we cannot hope
for a better general bound for \emph{any} such independence problem in
this model.
In this paper we focus on the case that $U=\{0,1\}^{c\lg n}$ (where
$c$ will be a suitable fixed natural number) with $|U|=n^c$. Recall
that our $n$-ary independence problems for binary relations are
defined on $x=\langle x(1), x(2), \ldots, x(n)\rangle\in U^n$; we
emphasize the difference in notation between $x_i\in U^n$ (an entire
input consisting of $n$ elements from $U$) and $x(i)\in U$ (the $i$'th
element from $U$ of the input $x$).
We will be concerned with the independence problem for another
relation: the \hd relation. For $0<\gamma<\half$ we define the
parameterized \hd relation $HD_\gamma(a,b)\subset \{0,1\}^{c\lg
n}\!\times\!\{0,1\}^{c\lg n}$ which relates $a$ and $b$ if and only if
they differ in at most $\gamma c \lg n$ positions. To alleviate
notation $HD$ will refer to $HD_{1/4}$ and \hd, or $D_{HD}$, will
refer to the $n$-ary independence problem for this relation with
$\gamma = 1/4$. \equality is the problem of deciding whether the input
contains to identical words. It should be clear that \hd is a natural
generalization of \equality\footnote{When Ajtai says \ed in
\cite{Ajtai99}, he actually means the dual problem \equality.}.
A relation $\relation$ is said to be $\full$ iff for any pair of
subsets of $U$ of size bigger than $\lambda(n)|U|$ there exists a pair
of elements, one from each of the two subsets, satisfying the
relation. Formally, $\relation$ is $\full$ if the following holds
\begin{equation}
\label{eq:full}
\forall A,B\subseteq U:\: |A|,|B|>\lambda(n)|U|\quad\Rightarrow\quad
\exists a\in A,\, b\in B:\: \relation(a,b)
\end{equation}
It should be clear that \equality is $\half-\text{full}$, as any two
subsets of $U$ containing more than half of $U$ will have a non-empty
intersection. In \cite{Ajtai99} it is proved that for all
$0<\gamma<\half$, $HD_\gamma$, and in particular $HD$, is
$2^{-\delta\lg n}-\text{full}$ on $U=\{0,1\}^{c\lg n}$ where $c$ and
$\delta$ are natural numbers suitably chosen (independent on $n$, but
depending on $\gamma$) such that $c>\delta$. This means that if we
have two subsets of $U$ of size bigger than $2^{(c-\delta)\lg n}$, we
are guaranteed to have two elements with low Hamming distance.
A relation $\relation$ is said to be $\sparse$ iff the number of
inputs of length $n$ for which the $n$-ary independence problem
$\decision(x)=\YES$ is less than $\zeta(n)|U|^n$ (a $\zeta(n)$
fraction of all possible inputs). Intuitively this means that the
$n$-ary independence problem for the binary relation, $\relation$, has
many $\NO$ instances. In \cite{Ajtai99} it is proved that for all
$0<\gamma<\half$ $HD_\gamma$ is $\half-\text{sparse}$ for a fixed
$c\in\mathbf{N}$ (recall that $|U|=n^c$) and $n$ sufficiently large.
This means that for at least half of all input to \hd the answer is
$\NO$. Ajtai~\cite{Ajtai99} also proves that \equality is
$c_=-\text{sparse}$ for a suitable fixed $c_=>0$.
\section{The result}
\label{sec:result}
We are now ready to state the result.
\begin{thm-name}{full}{General theorem, full version}
Let $\relation$ be a $\full$ and (non-trivial) $\sparse$ relation on
$U\!\times\! U$ with $|U|=R$. Consider an $R$-way branching program
deciding $\decision$, in time $T(n)=\time$ and space $S(n)$. If,
\begin{equation}
\label{eq:time}
72\, k(n)\lg k(n)< \lg \fullinv,
\end{equation}
and
\begin{equation}
\label{eq:sparseness}
\lg \sparseinv < \frac{n\lg \fullinv}{k(n)^{5k(n)}}.
\end{equation}
Then,
\begin{equation}
\label{eq:space}
S(n)\geq \frac{n\lg \fullinv}{k(n)^{4k(n)}}.
\end{equation}
\end{thm-name}
Let us discuss this theorem. First of all, many of the constants in the
above statement may possibly be improved, albeit not significantly.
What kind of fullness is required to achieve a non-trivial result?
>From Ajtai's paper it might seem as if the \hd proof works only for
what we might call \emph{polynomial fullness}, i.e.,
$1/{|U|^{\Oh(1)}}$-fullness, whereas the \ed (or rather \equality)
proof handles \emph{constant fullness}. If $k(n)\in\Oh(1)$, we see
from \eqref{time} that constant fullness, specifically
$\lambda(n)<2^{-144}$, actually does give rise to a non-trivial lower
bound. However, it does not seem to be the case that the parameters of
the proof can be improved enough to achieve anything for
$\half-\text{fullness}$, i.e., we cannot prove anything for \ed, but
we can get closer than what Ajtai's original statement suggests.
If $\lambda(n)|U|<1$ the relation in question would be trivial to
decide (as two subsets of size $1$ would then be enough to ensure
satisfaction of the fullness property), hence we can assume that
$\lambda(n)|U|\geq 1$. Combining this with \eqref{time} yields,
\[
\lg |U| \geq \lg \frac{1}{\lambda(n)} > 72\, k(n)\lg k(n).
\]
Recalling \propref{r2bool}, we see that the best result we can achieve
in the Boolean model is,
\[
\frac{T(n)}{\lg |U|} < \frac{\time}{126\, k(n)\lg k(n)} \in \oh(n).
\]
In conclusion, no matter how we might chose the parameters, we can get
no non-trivial implications for the Boolean model.
As stated previously, on $U=\{0,1\}^{c\lg n}$ \hd is
$n^\delta-\text{full}$ and $\half-\text{sparse}$, giving
\thmref{vanilla} as corollary to \thmref{full}. Interesting special
cases include Ajtai's original theorem: time in $\Oh(n)$ implies space
in $\Omega(n\lg n)$, and space $\Oh(n^{1-\epsilon})$ implies time
$\Omega(n\textfrac{\lg n}{\lg\lg n})$.
\section{Proof of \thmref{full}}
\label{sec:proof}
We would like to stress that almost all the arguments closely follow
those of Ajtai~\cite{Ajtai99}. We give the proof in full detail for
two reasons: 1) in the generalized version many constants, say $c$,
are replaced by functions, $c(n)$, and it is imperative to give the
full proof to see exactly where the proof holds and where it breaks
down; 2) it makes it clear exactly where we deviate from the original
proof.
The proof presented here deviates from that of Ajtai in two aspects.
When counting we use tighter estimates where it is possible, this has
little effect on Ajtai's original proof, but significantly extends the
interval of time for which the generalized proof works---an immediate
generalization will work for time up to roughly $n\textfrac{\lg\lg
n}{\lg\lg\lg n}$, whereas our proof works up to
$\Theta(n\textfrac{\lg n}{\lg\lg n})$. Another deviation is our proof
of \lemref{many} (corresponding to Lemmas 7 and 8 in \cite{Ajtai99}).
Besides these technical differences, changes have also been made for
reasons of presentation.
The overall intuition behind Ajtai's proof is as follows. We use the
time and space restrictions to construct a large set of inputs that
are all rejected by the algorithm. The structure and size of this set
will be such that we can utilize the fullness property of our problem
to ensure that at least one input, which we will call $\fail$, in the
above set must be accepted. This of course gives a contradiction on
the assumed time and space restrictions of the algorithm.
Assume we have an $R$-way branching program, $\A$, deciding
$\decision$ in time $\time$ and space $S(n)$ satisfying \eqref{time}
and \eqref{sparseness} but
\emph{not} \eqref{space}. We will show that then there exists an input
$x$ such that $\A(x)=\NO$ but $\decision(x)=\YES$, contradicting our
assumption that is, assuming \eqref{time} and \eqref{sparseness} we
may conclude \eqref{space}.
Define $\state(x,t)$ as the state of $\A$ (one of $2^{S(n)}$) after
time step $t$ on input $x$. For a time interval $I$, define
$\state(x,I)$ to be $\state(x,t_I)$, where $t_I$ is the \emph{last}
element of $I$ that is, given $x$ and a time interval $I$ we get the
state of the program when leaving $I$. An arbitrary set of times $T$
may be written as a disjoint union of maximal intervals $I_j$, i.e. $T
= \cup_j I_j$. If we assert that $\state(x,T)=\state(y,T)$ we mean
that $\forall j:\: \state(x,I_j) = \state(y,I_j)$.
We will construct $\fail$ from another input $x$ with some desirable
properties. The first property is that $\decision(x) = \NO$ and hence
$\A(x) = \NO$, as $\A$ is assumed to decide $\decision$ correctly.
Suppose that we have two disjoint subsets of indices
$W_1,W_2\subset\{1,\ldots,n\}$ and two disjoint subsets of times
$T_1,T_2\subset\{1,\ldots,\time\}$. Suppose now that on input $x$ the
indices of $W_i$ ($i=1,2$) are not read outside $T_i$; let
\[
\begin{array}{rl}
S^x_i \subseteq & \{x_i|(\text{ obtained by modifying } x \text{ on }
W_i \text{ only})\\
& \quad \quad \quad \quad \quad \quad \quad \quad \quad \wedge \quad \state(x,T_i) = \state(x_i,T_i)\} ,
\end{array}
\]
i.e. $x$ and $x_i$ are identical outside $W_i$ and each time we leave
$T_i$ on both $x$ and $x_i$ we are in the same state. Hence $\A(x) =
\A(x_i)$ for all $x_i\in S^x_i$ as the only differences between the
two inputs are in $W_i$ and hence ``forgotten'' when we leave $T_i$,
the only place where $W_i$ is read.
Based on $x$ we can thus construct $x_1$ and $x_2$ such that $\A(x) =
\A(x_1) = \A(x_2) = \NO$. As $W_1$ and $W_2$ are disjoint we may make
these two different changes to $x$ simultaneously, obtaining the input
$\fail$. We would like to ensure that $\A(x) = \A(\fail) = \NO$, but
currently this might not be the case. The reason is that in the set of
time intervals, say $T_1$, $\A$ is in principle able to look outside
$W_1$ and hence $\A$ might look at $W_2$. This is not a problem as
long as $x$ on $W_2$ is unchanged, but when making the two changes
simultaneously we no longer have any guarantee that the states are
fixed. The way to eliminate this problem is to enforce that the
indices of $W_j$ are not read in $T_i$ on $x_i$, removing the
possibility that $\A$ detects the change in $W_2$ when in $T_1$ and
vice versa.
We can now construct $\fail$ such that $\A(\fail) = \A(x) = \NO$ by
making changes to $x$ on $W_1$ and $W_2$. The plan is of course to
choose $\fail$ such that $\decision(\fail) = \YES$, giving the desired
contradiction. One way to achieve this is to have so many choices for
each of $x_1$ and $x_2$ that we can put the fullness property into
play. If $S\subset U^n$, let $S(l)$ be $S$ projected onto the $l$'th
dimension (or index).
\begin{prop}{dimension}
Let $W\subseteq\{1,\ldots,n\}$ and let $S\subset U^n$ be a set of
inputs that are all identical outside $W$. If
$|S|>(\lambda(n)|U|)^{|W|}$ then for at least one $l\in W$ it will
be the case that $|S(l)|>\lambda(n)|U|$, i.e. some $x(l)$ (over
$x$'s in $S$) must take on more than $\lambda(n)|U|$ values from
$U$.
\end{prop}
\begin{proof}
Suppose that $\forall l\in W$ it is the case that $|S(l)|\leq
\lambda(n)|U|$ then clearly $|S|\leq (\lambda(n)|U|)^{|W|}$.
\end{proof}
If we have many different inputs on $W_i$ to choose from, there will
be an index for which we have many values to choose from, allowing us
to exploit the fullness property.
Based on the above idea, we define a \emph{$\mu(n)-$hard set},
$\H$, for a branching program $\A$ deciding $\decision$ in time
$\time$.
\sloppy
\begin{defi-name}{hard set}{Hard Set}
A set of inputs $\H\subset U^n$ is called a $\mu(n)-$hard set for a
branching program $\A$ deciding $\decision$ in time $k(n)\!\cdot\! n$ and
space $S(n)$ if we have
\begin{itemize} \cramped
\item $W_1,W_2\subset \{1,\ldots,n\}$ with $W_1\cap W_2 = \emptyset$
and $|W_i|\geq \mu(n)\!\cdot\! n$,
\item $T_1,T_2\subset \{1,\ldots,\time\}$ with $T_1\cap T_2 =
\emptyset$,
\end{itemize}
such that $\forall x\in\H:$
\begin{itemize} \cramped
\item $\decision(x) = \NO$.
\item The indices of $W_i$ are only read in $T_i$ (on $x$).
\item $\state(x,I_{ij})$ is fixed for all the intervals comprising
$T_i$---i.e. every time we leave $T_i$, $\A$ will behave
identically for all $x\in\H$.
\end{itemize}
\end{defi-name}
\fussy
Further, if $H$ is a hard set then for $x\in\H$ we define an input $y$
to be \emph{$\H$-similar to $x$} if there exists inputs $x_1,x_2\in\H$
such that $x$ and $x_1$ differ only on indices in $W_1$ and $x$ and
$x_2$ differ only on indices in $W_2$, and $y$ is obtained from $x$ by
changing the position indexed by $W_1$ to match $x_1$ and those
indexed by $W_2$ to match $x_2$. We then have the following:
\begin{prop}{similar}
Let $\H$ be a hard set for a branching program $\A$ deciding
$\decision$. For $x\in\H$, if $y$ is $\H$-similar to $x$ then $\A(x) =
\A(y) = \NO$.
\end{prop}
\begin{proof}
Assume now that $\A$ is able to distinguish $x$ and $y$. This means
that these two inputs lead to different final states for the
branching program. As $x$ and $y$ are identical outside the indices
of $W_1$ and $W_2$, then there must exist $i$ and $j$ such that
$\state(x,I_{ij}) \neq \state(y,I_{ij})$, i.e., some interval
$I_j\subseteq T_i$.
Assume, without loss of generality, that $i=1$. So, for some
$I_{1j}\subseteq T_1$ $\state(x,I_{1j}) \neq \state(y,I_{1j})$. Let
$I_{1j}$ be the first such interval during the computation of $\A$,
hence $x$ and $y$ follow the same computation path until
$I_{1j}$. By construction, also $x_1$ and $x_2$ follow this same
path as $x$. Inside $I_{1j}$ we only read indices in $W_1$ and end
up in a different state than $\state(x,I_{1j})$ following a
different path from $x$. However, we follow the exact same path as
on $x_1$ as $y$ is identical to $x_1$ on $W_1$, so we have that
$\state(x,I_{1j}) \neq \state(x_1,I_{1j})$ contradicting that $x$
and $x_1$ are in the same hard set.
\end{proof}
The proof of \thmref{full} splits naturally in three parts. In
\lemref{many} we show that if we have a large hard set, we may obtain
$x_1$ and $x_2$ in ``many'' ways (relative to the fullness property).
Then in \lemref{large} we show that given the time and space
restrictions there exists a ``large'' hard set (relative to the
sparseness property). Finally these two lemmata are combined.
\begin{lem}{many}
Let $\relation$ be a $\full$ relation on U, $\A$ be a branching
program deciding $\decision$ in time $\time$ and space $S(n)$, and
let $\H$ be a $\mu(n)-$hard set for $\A$ so that,
\[
|\H| > 4 \lambda(n)^{\mu(n)\cdot n} |U|^n,
\]
then there exists some $x\in\H$ for which there exist $\fail$ which
is $\H$-similar to $x$, i.e. with $\A(\fail)=\NO$, but with
$\decision(\fail) = \YES$.
\end{lem}
\begin{proof}
Define $x_{|W}$, where $W\subseteq\{1,\ldots,n\}$, to be $x$ where
all values at positions in $W$ are overwritten with some standard symbol,
say $\bot\not\in U$. Suppose we have some set of inputs $\H\subseteq
U^n$ and some set of indices $W\subseteq\{1,\ldots,n\}$; define
\begin{equation}
\label{eq:S}
S^x = \{ y\in \H | x_{|W} = {y}_{|W} \},
\end{equation}
i.e. the set of elements in $\H$ which are identical to $x$ outside
$W$. We claim the following
\begin{equation}
\label{eq:partly identical inputs}
\#x\in\H\text{ with } |S^x|\leq \quarter \frac{|\H|}{|U|^{n-|W|}}
\text{ is less than }\quarter|\H|.
\end{equation}
Given $W$, define a partition of $\H$ according to $S^x$, i.e. $x$
and $y$ are in the same class if and only if $S^x = S^y$ (thus $y\in
S^x$, and $x\in S^y$). Clearly there are no more than $|U|^{n-|W|}$
classes, as we have at most this many ways of choosing a $y$ which is
different from $x$ outside $W$. The number of inputs in classes of
size at most $\quarter |\H| / (|U|^{n-|W|})$ is at most this number
(the maximum size of these classes) times $|U|^{n-|W|}$ (the maximum
total number of classes), implying \eqref{partly identical inputs}.
Based on a hard set $\H$, let $S^x_i$ be defined as in \eqref{S}
based on $\H$ and $W_i$. Clearly \eqref{partly identical inputs}
holds for $S^x_1$ and $S^x_2$, so these sets are relatively large
for most inputs in $\H$. We would like an $x$ for which both $S^x_1$
and $S^x_2$ are large, but in principle the $x$'s that give large
$S^x_1$ might not be the same as those that give large $S^x_2$ (and
vice versa). According to \eqref{partly identical inputs}, $|S^x_i|
\geq \quarter |\H| / (|U|^{n-|W|})$ for $\textfrac{3}{4}$ of the
elements in $\H$, hence for $\quarter$ of these elements both
$S^x_1$ and $S^x_2$ are no smaller than $\quarter |\H| /
(|U|^{n-|W|})$; certainly for any $\H$, $W_1$ and $W_2$, this gives
us an input $x'$ such that both $S^{x'}_1$ and $S^{x'}_2$ have this
size.
Consider this particular input $x'$. Each pair of $x_1\in S^{x'}_1$
and $x_2\in S^{x'}_2$ gives rise to a different $y$ $\H$-similar to
$x'$. Since $|W_i|\geq\mu(n)\!\cdot\! n$ we have that $\quarter |\H|
/ (|U|^{n-|W|})>(\lambda(n)|U|)^{|W_i|}$, yielding that $|S^{x'}_i|
> (\lambda(n)|U|)^{|W_i|}$. Hence, by \propref{dimension} there must
exists a pair of indices $\kfailone\in W_1$ and $\kfailtwo\in W_2$
taking on more than $\lambda(n)|U|$ different values for all the
inputs in $S^{x'}_1$ and $S^{x'}_2$ respectively. By the fullness
property this implies that there exists $x'_1\in S^{x'}_1$ and
$x'_2\in S^{x'}_2$ so that
$\relation(x'_1(\kfailone),x'_2(\kfailtwo))$. Define $\fail$ to be
the input $\H$-similar to $x'$ obtained from $x'$ by modifying $x'$
on $W_1$ and $W_2$ using $x'_1$ and $x'_2$ respectively. Thus
$\decision(\fail)=\YES$. By \propref{similar} $\A(\fail) = \NO$ as
$\fail$ is $\H$-similar to $x'$.
\end{proof}
\begin{lem}{large}
Let $\relation$ be a $\sparse$ relation on U, $\A$ be a branching
program deciding $\decision$ in time $\time$ and space $S(n)$, and
let $\mu(n) = k(n)^{-4k(n)}$. $\A$ has a $\mu(n)-$hard set of size
\[
|\H|\geq \frac{\zeta(n)|U|^n}{\binom{n}{\mu(n)\cdot n}^2\
(3k(n))^{8k(n)}\ 2^{4k(n)S(n)}}.
\]
\end{lem}
\begin{proof}
We start by constructing a partitioning $P_x$ of the indices as
follows. Split time into $9k^2(n)$ intervals of length\footnote{The
interval length is parameterized in \cite{Ajtai99}, however the
present choice leaves little room for improvement.} $n/(9k(n))$.
Two indices $i$ and $j$ are in the same class of $P_x$ if and only
if they are read in exactly the same intervals on input $x$. $P'_x$
is $P_x$ restricted to classes of $P_x$ whose members are queried in
at most $2k(n)$ time intervals on $x$. Finally $\Gamma_x$ is $P'_x$
restricted to classes whose size is at least $n/(4|P'_x|)$.
For $w \in \Gamma_x$, define $\interval(x,W)$ to be the intervals in
which $W$ is queried on input $x$. By construction, $\interval(x,W)$
contains at most $2k(n)$ intervals for all $W\in\Gamma_x$.
Our restricted partitioning $\Gamma_x$ has the following properties,
\begin{equation}
\label{eq:size}
\forall W\in\Gamma_x:\: |W| \geq \mu(n)\cdot n,
\end{equation}
\begin{equation}
\label{eq:disjoint}
\forall x: \exists W_1,W_2\in\Gamma_x:\:
\interval(x,W_1)\cap\interval(x,W_2) = \emptyset.
\end{equation}
It is a fact that no more than $n/2$ elements can be queried in more
than $2k(n)$ intervals, since the branching program has length at
most $\time$; hence $P'_x$ covers at least $n/2$ elements. Classes
of $P'_x$ with size no greater than $n/(4|P'_x|)$ can cover at most
$n/4$ indices (as we have most $|P'_x|$ such classes). Thus
$\Gamma_x$ covers at least $\textfrac{n}{2} - \textfrac{n}{4} =
\textfrac{n}{4}$ indices.
Each class of $P'_x$ is uniquely identified by the corresponding
set of at most $2k(n)$ intervals in which the indices of the class
are read. Hence we may bound the number of classes in $P'_x$ by the
number of ways to choose up to $2k(n)$ intervals from $9k^2(n)$,
\begin{eqnarray*}
|P'_x| & \leq & \sum_{i=0}^{2k(n)}\binom{9k^2(n)}{i}\\
& = & 2^{9k^2(n)H(\textfrac{2}{9k(n)})-\half\lg 9k^2(n)+\Oh(1)}\\
& \leq & k(n)^{3k(n)},
\end{eqnarray*}
according to \cite[p. 492]{GKP94}\footnote{These estimates are not
correct for $k(n)\in\Oh(1)$, in which case we can just use Ajtai's
original estimate. Also, using
\smash{$\binom{n}{k}<(\textfrac{n}{e})^k$} gives a bound that is
almost as good, but we use the above estimate to emphasize that
significantly better estimates are not possible.}, $H(m)$ is the
entropy function $m\lg \textfrac{1}{m}+(1-m)\lg \textfrac{1}{1-m}$.
By the lower bound on the size of the classes in $\Gamma_x$ we have
proved \eqref{size}.
\sloppy
We will now prove \eqref{disjoint}; in fact we will prove the
stronger statement that
\begin{equation}
\label{eq:stronger}
\forall W_1\in\Gamma_x\,\exists W_2\in\Gamma_x:\:
\interval(x,W_1)\cap\interval(x,W_2) = \emptyset.
\end{equation}
Since $\interval(x,W_1)$ has at most $2k(n)$ intervals, and each
such interval contains at most $n/9k(n)$ indices, the total number
of indices queried within those intervals is at most $2n/9$. As
there are $n/4$ indices covered by $\Gamma_x$, we can choose an
index $j$ covered by $\Gamma_x$ that is not queried within
$\interval(x,W_1)$, and so the class $W_2\in\Gamma_x$ containing $j$
satisfies \eqref{stronger}. This in turn imply \eqref{disjoint}.
\fussy
To conclude the proof we will use $\Gamma_x$ to construct a hard
set. For each $x\in U^n$ with $\decision(x)=\NO$ let $W_1$ and $W_2$
be the set of indices whose existence is promised in
\eqref{disjoint}, and let $T_i = \interval(x,W_i)$. Define $\H^x$ to
be the set of $y\in U^n$ that satisfy,
\begin{itemize}\cramped
\item $\decision(x)=\NO$.
\item $T_i = \interval(y,W_i)$ ($= \interval(x,W_i)$), hence $W_i$
is a class of both $\Gamma_x$ and $\Gamma_y$.
\item $\state(x,T_i) = \state(y,T_i)$.
\end{itemize}
Define $F_i$ to be a function that given $x$ and $T_i$ lists
$\state(x,I_{ij})$ where $T_i$ is comprised of $\cup_j I_{ij}$;
$F_i$ gives an ordered lists of the states of $\A$ each time we
leave $T_i$.
The above is a hard set as \eqref{size} means that $|W_i|\geq
\mu(n)\!\cdot\! n$ as required, and the indices of $W_i$ are
certainly not read in $T_j$, in fact they are \emph{only} read in
$T_i$.
\sloppy
Each hard set $\H^x$ is uniquely determined by the six-tuple
$\langle W_1,W_2,T_1,T_2,F_1,F_2\rangle$. If we can choose this
six-tuple in at most $h(n)$ ways, there must be an $x$ such that
$|\H^x| \geq \textfrac{\zeta(n)|U|^n}{h(n)}$ as there are at least
$\zeta(n)|U|^n$ inputs $x$ with $\decision(x)=\NO$.
\fussy
Observe that $W_i$ need not be bigger than $\mu(n)\!\cdot\! n$; if
we have more indices than this, just take the first $\mu(n)\cdot n$.
This may collapse a number of classes into one, meaning that we may
choose each class $W_i$ in $\smash{\binom{n}{\mu(n)\cdot n}}$ ways.
This restriction on the size of $W_i$ significantly increases the
size of the interval in which we can obtain a bound.
As $T_i$ consists of at most $2k(n)$ out of $9k(n)$ intervals, the
number of ways to choose $T_i$ is bounded by $(9k(n))^{2k(n)} =
(3k(n))^{4k(n)}$ (actually a better estimate should be possible, but
it will not improve the result significantly). Finally, as $T_i$
consists of at most $2k(n)$ intervals we fix the state of our
computation in at most this many places, each with $2^{S(n)}$
choices, meaning that $F_i$ can be chosen in at most
$(2^{S(n)})^{2k(n)}$ ways. In total we get that
\begin{eqnarray*}
h(n) & \leq & \binom{n}{\mu(n)\!\cdot\!
n}^2\cdot \left((3k(n))^{4k(n)}\right)^2\cdot
\left((2^{S(n)})^{2k(n)}\right)^2\\
& = & (\binom{n}{\mu(n)\cdot n}^2\ (3k(n))^{8k(n)}\
2^{4k(n)S(n)}.
\end{eqnarray*}
\end{proof}
\begin{proof-name}{\thmref{full}}
Let $\relation$ be a $\full$ and $\sparse$ relation on $U$. Assume
that $\A$ is an $R$-way branching program deciding $\decision$ on
$U^n$ in time $\time$ and space $S(n)$, such that \eqref{time} and
\eqref{sparseness} but not \eqref{space} holds. Based on these
assumptions our aim is to arrive at a contradiction, thus proving
the theorem. Specifically we will show that there exists an input
$\fail$ such that $\decision(\fail) = \YES$, but the branching
program answers $\A(\fail) = \NO$.
Combining \lemref{many} and \lemref{large} we see that we can find
$\fail$ if
\[
\frac{\zeta(n)|U|^n}{\binom{n}{\mu(n)\cdot n}^2\
(3k(n))^{8k(n)}\ 2^{4k(n)S(n)}} > 4 \lambda(n)^{\mu(n)\cdot n}
|U|^n.
\]
Using Stirling's approximation for $n!$ (see eg.~\cite[p.
115]{Knuth97}) we get that
\[
\lg \binom{n}{\mu(n)n} < 3 \mu(n) n \lg
\textstyle{\frac{1}{\mu(n)}}
\]
for $n$ sufficiently large, it is sufficient that
\[
2 + \lg \sparseinv + 6 \mu(n) n\lg\textstyle{\frac{1}{\mu(n)}} + 8
k(n) \lg 3k(n) + 4k(n)S(n) < \mu(n) n \lg \fullinv.
\] \sloppy
If $k(n)\lg n$ (necessarily). Likewise the constant $2$
is dominated by $k(n)S(n)$. Hence, the above is satisfied if \fussy
\[
\lg \sparseinv + 6 \mu(n)n\lg\textfrac{1}{\mu(n)} + 13k(n)S(n) <
\mu(n)n\lg \fullinv.
\]
This certainly holds if each of the three terms on the left hand
side is less than a third of the term on the right hand side. We
have the desired $\fail$ if
\[
6\mu(n)n\lg\textfrac{1}{\mu(n)} < \third \mu(n)n\lg \fullinv,
\]
\[
\lg \sparseinv < \third \mu(n)n\lg \fullinv,
\]
\[
13k(n)S(n) < \third \mu(n)n\lg \fullinv.
\]
Which is implied by \eqref{time}, \eqref{sparseness} and
$\neg$\eqref{space}, since $\mu(n)=k(n)^{-4k(n)}$. The last
equation---implied by $\neg$\eqref{space}---holds as $k(n)\geq 1$,
because we must always use time $n$.
\end{proof-name}
\section{Randomization}
\label{sec:randomization}
\begin{defi-name}{rand}{Randomized $R$-way branching program}
A \emph{randomized $R$-way branching program} using $r$ random bits,
consists of a collection of $2^r$ deterministic $R$-way branching
programs. Each execution of the randomized branching program starts
by uniformly at random choosing one of the $2^r$ deterministic
programs which is then executed.
We say that a randomized $R$-way branching program $\A$ deciding a
problem $D$ has constant 1-sided error if for $0\leq\epsilon<\half$
the following holds\footnote{Note that that the the standard
definition for 1-sided error (e.g. the complexity class RP) allows
for error on the accepting answer $D(x)=\YES$, whereas our
definition allows error on the rejecting answer (corresponding to
CoRP).}
\begin{itemize}\cramped
\item If $D(x)=\YES1$ then our randomized branching program $A$ answers
correctly on input $x$.
\item If $D(x)=\NO$ then $\Pr[\A(x)=\NO]>1\!-\!\epsilon$.
\end{itemize}
\end{defi-name}
\begin{cor}{rand}
The statement of \thmref{full} holds for randomized $R$-way
branching programs with constant 1-sided error if we modify the
constants slightly.
\end{cor}
\begin{proof}
By a standard averaging argument, one of the $2^r$ deterministic
branching programs must correctly answer $\NO$ for at least a
$1\!-\!\epsilon$ fraction of the inputs with answer $\NO$. Apply
\thmref{full} to this \emph{deterministic} branching program
computing $\decision$. Hence we only reduce the size of our hard set
with a factor $\epsilon$.
\end{proof}
\section{Acknowledgement}
I would like to thank Faith Fich. I would also like to thank the
anonymous referees for many helpful comments. In particular one
referee has provided detailed suggestions on how to improve
presentation as well as advice on how to make some of the proofs
simpler and more precise.
\bibliographystyle{amsalpha}
\bibliography{./cj05-01}
\end{document}