
\documentclass[10pt]{article}

\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{xspace}

\makeatletter
\newcommand{\cramped}                           % "Cramped" list style.
  {\parskip\@outerparskip\@topsep\parskip       % put after \begin{itemize}
  \@topsepadd2pt\itemsep0pt                     %
  \settowidth{\labelwidth}{\@itemlabel}         %
  \advance\leftmargin-\labelsep                 %
  \advance\leftmargin-\labelwidth               %
  \advance\@totalleftmargin-\leftmargin         %
  \advance\linewidth\leftmargin                 %
  \parshape1\@totalleftmargin\linewidth}        %
\makeatother

\newtheorem{_defi}{Definition}
\newtheorem{_thm}{Theorem}
\newtheorem{_lem}{Lemma}
\newtheorem{_cor}[_thm]{Corollary}
\newtheorem{_prop}[_defi]{Proposition}

\newenvironment{proof}{\vspace{.3\baselineskip}\noindent{\bf
    Proof:}}{{\hfill $\Box$\newline}\vspace{.3\baselineskip}}
\newenvironment{proof-name}[1]{\vspace{.3\baselineskip}\noindent{\bf
    Proof of #1:}}{{\hfill $\Box$\newline}\vspace{.3\baselineskip}}

\newenvironment{defi}[1]{\begin{_defi} \label{def:#1}}{{\hfill $\Box$}\end{_defi}}
\newenvironment{defi-name}[2]{\begin{_defi}[#2] \label{def:#1}}{{\hfill $\Box$} \end{_defi}}
\newenvironment{thm}[1]{\begin{_thm} \label{thm:#1}}{\end{_thm}}
\newenvironment{thm-name}[2]{\begin{_thm}[#2] \label{thm:#1}} {\end{_thm}}
\newenvironment{lem}[1]{\begin{_lem} \label{lem:#1}}{\end{_lem}}
\newenvironment{lem-name}[2]{\begin{_lem}[#2] \label{lem:#1}}{\end{_lem}}
\newenvironment{cor}[1]{\begin{_cor} \label{cor:#1}}{\end{_cor}}
\newenvironment{prop}[1]{\begin{_prop} \label{prop:#1}}{\end{_prop}}
\newenvironment{prop-name}[2]{\begin{_prop}[#2] \label{prop:#1}}{\end{_prop}}

\newcommand{\defref}[1]{Definition \ref{def:#1}}
\newcommand{\thmref}[1]{Theorem \ref{thm:#1}}
\newcommand{\lemref}[1]{Lemma \ref{lem:#1}}
\newcommand{\corref}[1]{Corollary \ref{cor:#1}}
\newcommand{\propref}[1]{Proposition \ref{prop:#1}}
\renewcommand{\eqref}[1]{(\ref{eq:#1})}
\newcommand{\mysection}[1]{\section{#1} \label{sec:#1}}
\newcommand{\secref}[1]{Section \ref{sec:#1}}
\newcommand{\apxref}[1]{Appendix \ref{sec:#1}}
\newcommand{\curly}[1]{\mathcal{#1}}
\newcommand{\mangler}[1]{$^\dagger$ \marginpar{$\dagger$ #1}}
\newcommand{\half}{{\textstyle\frac{1}{2}}}
\newcommand{\third}{{\textstyle\frac{1}{3}}}
\newcommand{\quarter}{{\textstyle\frac{1}{4}}}
\newcommand{\mult}{\!\cdot\!}
\newcommand{\textfrac}[2]{\smash{\textstyle{\frac{#1}{#2}}}}
\newcommand{\Oh}{O}
\newcommand{\oh}{o}
\newcommand{\ie}{i.e. }
\newcommand{\E}{\text{E}}
\newcommand{\A}{\curly{A}}
\renewcommand{\H}{H_{\curly{A}}}
\newcommand{\state}{\mathtt{state}}
\newcommand{\set}{\mathtt{set}}
\newcommand{\interval}{\mathtt{intervals}}
\newcommand{\indices}{\mathtt{indices}}
\newcommand{\eq}{\mathtt{eq}}
\newcommand{\x}{\mathbf{x}}
\newcommand{\y}{\mathbf{y}}
\renewcommand{\time}{k(n)\!\cdot\! n}
\newcommand{\relation}{\Delta}
\newcommand{\decision}{D_\Delta}
\newcommand{\fail}{x_{\text{fail}}}
\newcommand{\kfailone}{k_{1}}
\newcommand{\kfailtwo}{k_{2}}
\newcommand{\Fail}{X_{\text{fail}}}
\newcommand{\hard}{x_{\text{hard}}}
\newcommand{\YES}{\text{``YES''}}
\newcommand{\NO}{\text{``NO''}}
\newcommand{\sparse}{\zeta(n)\!-\!\text{sparse}}
\newcommand{\full}{\lambda(n)\!-\!\text{full}}
\newcommand{\fullinv}{\smash{\textstyle{\frac{1}{\lambda(n)}}}}
\newcommand{\sparseinv}{\smash{\textstyle{\frac{1}{\zeta(n)}}}}
\newcommand{\ed}{element distinctness\xspace}
\newcommand{\hd}{Hamming distance\xspace}
\newcommand{\equality}{equality\xspace}
\newcommand{\sort}{sorting\xspace}
\newcommand{\uh}{universal hashing\xspace}


\begin{document}

\title{On Ajtai's Lower Bound Technique for $R$-way Branching Programs
  and the Hamming Distance Problem} \author{Jakob
  Pagter\thanks{E-mail: \texttt{pagter@daimi.au.dk}. Partially supported
    by the IST Programme of the EU under contract number
    IST-1999-14186 (ALCOM-FT). Part of this work was done while the
    author was visiting University of Toronto.}}
\date{{\bf BRICS}\thanks{%
    \protect\begin{tabular}[t]{@{}l@{}} \textbf{B}asic
      \textbf{R}esearch in \textbf{C}omputer
      \textbf{S}cience,\protect\\ Centre of the Danish National
      Research Foundation.
      \protect\end{tabular}}\\
  Department of Computer Science\\
  University of Aarhus\\
  Denmark}

\maketitle

\begin{abstract}
  Miklos Ajtai [\emph{Determinism versus Non-Determinism for Linear
    Time RAMs with Memory Restrictions}, 31st Symposium on Theory of
  Computation (STOC), 1999] has proved that any $R$-way branching
  program deciding the so-called \hd problem (given $n$ elements
  decide whether any two of them have ``small'' Hamming distance): in
  time $\Oh(n)$ must use space $\Omega(n\lg n)$.
  
  We extend the proof to obtain a time-space trade-off for time
  between $n$ and $\alpha n\textfrac{\lg n}{\lg\lg n}$, for some
  suitable $0<\alpha<1$. In particular we prove that if space is
  \smash{$\Oh(n^{1-\epsilon})$}, then time is $\Omega(n\textfrac{\lg
    n}{\lg\lg n})$.
\end{abstract}


\section{Introduction}
\label{sec:introduction}


In~\cite{Ajtai99} Miklos Ajtai proved that any $R$-way branching
program that decides \ed using sub-linear space must use super-linear
time. The technical details of the proof are complicated.  However,
the basic ideas underlying the proof are not, and this was exploited
by Ajtai to prove an even stronger bound for the so called \hd problem
(a generalization of \ed) using much simpler arguments.  An
interesting aspect of these bounds is that they are valid in the well
known RAM model with word size $\lg R$, and thus bounds in this model
are valid for RAMs independent on the specific instruction set.

The \ed \emph{theorem} is the more interesting of Ajtai's two
theorems, as \ed is an interesting and very well-studied
problem~\cite{Borodin87,Karchmer86,Yao94}. On the other hand, the \hd
\emph{proof} is arguably the more interesting of the two proofs, as it
is gives the best possible bound in the model using a clear and
elegant proof.

The purpose of this paper is to study the latter proof from three
angles: 1) To what extent does the \hd proof generalize, \ie for what
intervals of time and space do we have a lower bound in the form of a
time-space trade-off? 2) How strong is the proof, e.g. can the simpler
of the two proofs actually be used to prove something for \ed? 3) Can
the \hd proof be used to give non-trivial lower bounds in the Boolean
model?

The answer to the two latter questions seems to be negative, but we
can generalize the proof to achieve the following:

\begin{thm-name}{vanilla}{General theorem, vanilla version}
  Any $R$-way branching program deciding \hd using time
  $\Oh(k(n)\!\cdot\! n)$ , $k(n)<\alpha\textfrac{\lg n}{\lg\lg
    n}$\footnote{Following ~\cite{Knuth98} we use ``$\lg$'' to denote
    the logarithm in base 2.} for a suitable constant $0<\alpha<1$
  must use space
  \[
  S(n)>\beta\frac{n\lg n}{k(n)^{10k(n)}},
  \]
  for some constant $\beta>0$.
\end{thm-name}

The full version of this theorem (to be found in \secref{result}) will
imply all three answers. For now, let us just observe that
\thmref{vanilla} implies that any $R$-way branching program, and thus
\emph{any} RAM with word size $\lg R$, solving the Hamming Distance
problem using $\Oh(n^{1-\epsilon})$ bits of space must use time
$\Omega(n\textfrac{\lg n}{\lg\lg n})$. In terms of time this is the
strongest result that can be achieved using our results.  To our
knowledge this is the \emph{quantitatively} best known lower bound for
a decision problem in the $R$-way branching program model. Similarly,
using time $\Oh(n\textfrac{\lg n}{\lg\lg n})$) implies using space
$\Omega(n^{1-\epsilon})$ for some $0<\epsilon<1$.

Using the combinatorics of Ajtai in conjunction with the technique of
Beame et al.~\cite{Beame98a}, a theorem similar to Ajtai's \hd theorem
can be proved. In fact this gives rise to a $\Omega(n\lg\lg n)$ bound
on time for space in $\Oh(n^{1-\epsilon})$. This connection was
observed by Beame et al.~\cite{Beame00a} (independently on this work).
It is not surprising then that we can also prove \thmref{vanilla} by
combining the techniques of Beame et al.~\cite{Beame00a} with our
generalized version of Ajtai's combinatorics
(see~\cite{Pagter01}). Since the initial version of our paper
(available as~\cite{Pagter00}), Beame et al.~\cite{Beame03} has
provided an $\Omega(n\lg n)$ bound on time for space in
$\Oh(n^{1-\epsilon})$ for the \hd problem.

Our main contribution is the generalization of Ajtai's technical
lemmata. Using the generalized version of Ajtai's combinatorics we are
able to prove lower bounds for time up to $\Omega(n\textfrac{\lg
n}{\lg\lg n})$, improving the $\Omega(n\lg\lg n)$ bound of Beame et
al., and the $\Omega(n\textfrac{\lg\lg n}{\lg\lg\lg n})$ bound, which
follows by the immediate generalization of Ajtai's work (where
constants are systematically replaced by functions of $n$ without
modifying the proof as such). We observe that the proof also works for
randomized branching programs with one-sided error.


\section{Previous work}
\label{sec:previous work}


Proving lower bounds in general models of computation is notoriously
hard. Early approaches restricted the computational model, for example
to comparison based models, but natural models (such as the RAM) do
not obey such restrictions. Another approach has been to restrict the
resources available, e.g. limiting the amount of space which we allow
a given algorithm to use. This gives rise to time-space trade-offs,
for which several breakthroughs have been achieved recently
\cite{Ajtai99b,Ajtai99,Beame98a}.

Branching programs have been very useful for this line of study. In
particular much insight has been gained using the $R$-way branching
program model of Borodin and Cook~\cite{Borodin82}. Results include
general lower bounds for \sort \cite{Beame91,Borodin82} and \uh
\cite{Mansour93}. The problems for which these techniques work are
characterized by having a large output domain, which is essential for
the employed proofs. It was only recently that non-trivial lower
bounds for a decision problem were obtained in this model
\cite{Ajtai99,Beame98a}.

When proving upper bounds for problems like \hd, \ed, \sort and \uh,
one popular model has been the \emph{word RAM}, or RAM, (see
e.g.~\cite{Hagerup98}). Much debate has taken place over which
instruction sets to allow, giving rise to some confusion in the
literature. However, one of the very nice properties of $R$-way
branching programs is that they are strictly more powerful than
\emph{any} RAM with word size $\lg R$ (regardless of instruction
set). This means that the bounds obtained in this model will hold for
any RAM no matter what specific instruction set we are working with.

The first non-trivial bound for a decision problem in the $R$-way
branching program model was given by Beame et al.~\cite{Beame98a}, who
exhibited a problem for which they could prove a lower bound of
$\Omega(n\lg\lg n)$ bound on time, when restricting space to being
$\Oh(n)$. As mentioned earlier, this bound also applies to the \hd
problem.

Independently of our work Beame et al.~\cite{Beame00a} generalized the
\ed proof of Ajtai, and obtained a lower bound of
$\Omega(n\smash{\sqrt{\lg n/\lg\lg n}})$ for Element Distinctness in
the $R$-way model. Our results differ from this result in two aspects:
1) we focus on the ``easy'' version of Ajtai's original proof---the
\hd proof, and 2) quantitatively our bounds are better (albeit for \hd
and not \ed). Since the initial version of our paper Beame et
al.~\cite{Beame03} (a revision of \cite{Beame00a}) has provided an
$\Omega(n\lg n)$ bound on time for space in $\Oh(n^{1-(\epsilon})$ for
the \hd problem, improving on our bounds.

Many of the results achieved in the $R$-way branching program model
were stepping stones or ``side-effects'' in the quest for a
non-trivial (i.e.  $T\in\omega(n)$ or even just $T>n$) for a decision
problem in a Boolean model, which for a great many people is the holy
grail of computational complexity. The first non-trivial bound for a
decision problem in the Boolean branching program model was given by
Beame et al.~\cite{Beame98a}, who exhibited a problem for which they
could prove a lower bound of $1.0178\,n$ (sic) on time for space
restricted to being sub-linear.  Ajtai~\cite{Ajtai99b} extended his
own work for \ed in the $R$-way model to give a non-trivial lower
bound in the Boolean branching program model.  The aforementioned work
of Beame et al.~\cite{Beame00a} generalizes \cite{Ajtai99b} (as well
as \cite{Ajtai99}) to obtain lower bounds for time up to
$\Omega(n\smash{\sqrt{\lg n/\lg\lg n}})$ in the Boolean branching
program model. 

Despite the exciting developments in the Boolean model, our focus
remains the $R$-way model. We only treat the Boolean model via the
fact that bounds in the Boolean branching program model can be derived
from bounds obtained in the $R$-way model---at a cost.

\begin{prop}{r2bool}
  Suppose we can prove a lower bound of $f(n)$ for some problem in the
  $R$-way branching program model, then this translates into a
  $f(n)/\lg R$ lower for the same problem in the Boolean branching
  program model. Likewise for space.
\end{prop}

\begin{proof}
  In the Boolean model we measure the size of the input, $n$, in bits.
  In the $R$-way model $n$ refers to the number of words \emph{each}
  consisting of $\lg R$ bits. Thus, if we translate inputs of size $n$
  from the $R$-way model, they will become inputs of size $n\lg R$
  bits in the Boolean model.
\end{proof}

So if we have that $R\in n^{\Oh(1)}$, we need $\omega(n\lg n)$ bounds
in the $R$-way model to infer $\omega(n)$ bounds in the Boolean model.
None of the results mentioned for decision problems in the $R$-way
branching program model---our results included---meet this
requirement.

The results of Beame et al.~\cite{Beame00a,Beame03} holds for
randomized branching programs with two-sided error. These are the
first non-trivial lower bounds for randomized branching programs in
both the $R$-way and the Boolean branching program model.


\section{Model of computation}
\label{sec:model}


We employ the $R$-way branching program model, which is at least as
strong as a RAM with word size $\lceil \lg R\rceil$ and any
instruction set. We will only give an informal presentation of the
model. For a detailed account see \cite{Borodin82} or \cite{Savage98}.
Boolean branching programs are the special case of $R=2$,
corresponding to looking at one bit at a time.

An $R$-way branching program is a directed acyclic graph. It has one
node with in degree $0$ which is called the \emph{start node}. Every
node with out degree $0$ is called a \emph{terminal node}, and is
labelled either $\YES$ or $\NO$, depending on whether the branching
program accepts or rejects the given input. Every node which is not a
terminal node is called a \emph{computation node} and is labelled with
some index $i$ (referring to the $i$'th word of the input). A
computation node has exactly $R$ outgoing edges, each with a unique
label from $1,\ldots ,R$. The input to the branching program consists
of $n$ elements from some universe of size $R$ (which may be a
function of $n$), and computation proceeds as follows: given input
$x\in\{1,\ldots,R\}^n$, we start in the start node where we read the
word indexed by the label of this node; if the value in this word is
$r$, then we follow the outgoing edge with label $r$. We continue this
procedure for the computation nodes we encounter until we end up in
some terminal node which tells us the result of the computation.

As measures of complexity we define time $T$ to be the height of our
branching program, and space $S$ to be $\lceil\lg
\#\text{nodes}\rceil$.

For the rest of this paper we will assume all branching programs to be
levelled, which means that we can partition the nodes into $T$
disjoint sets $V_1,V_2,\ldots V_T$, such that all edges originating
from $V_i$ go to $V_{i+1}$. For asymptotic purposes we may assume this
without loss of generality. For proofs and definition see e.g. Borodin
et al.~\cite{Borodin81}.

In this model of computation all ``internal'' computation is free, we
only ``pay'' for reading the input. 


\section{The $n$-ary independence problem for a binary relation}
\label{sec:problem}

We now describe a particular class of decision problems that are the
focus of this paper. This class was introduced by Ajtai. Let
$\relation \subseteq U \times U$ be a binary relation on universe
$U$. Say that an input $x=\langle x(1), x(2), \ldots, x(n)\rangle$ is
$\relation$-independent if there is no $i\neq j$ such that
$\relation(x(i),x(j))$. The $n$-ary independence problem for
$\relation$ is the decision problem $\decision$ on $U^n$ defined by
$\decision(x)=\YES$ if and only if $x$ is $\relation$-independent.

For example, in the case that $\relation$ is the equality relation
$EQ=\{(x,x)|x\in U\}$, the $n$-ary independence problem for $EQ$ is
equivalent to \ed on $U^n$.

As a consequence of the fact mentioned above that all internal
computation is free, we have the following easy proposition.

\begin{prop}{upper}
  We can decide the $n$-ary independence problem, $\decision$, for any
  binary relation, $\relation$, on $U$ in time $T$ and space $S$ such
  that
  \[
  T\!\cdot\! S\in\Oh(n^2\lg R),
  \]
  for time between $n$ and $n^2$.
\end{prop}

\begin{proof}
  First observe that in time $n$ we may decide $\relation$ using $R^n$
  nodes or $n\lg R$ bits of space, using an $R$-way decision
  tree---\ie reading all the inputs once, remembering them, and then
  exploit the free internal computation.
  
  This easily generalizes by splitting the input into $b(n)$ blocks
  each containing $n/b(n)$ words. Now what we will do is do decide
  $\relation$ for each of the $\Oh(b(n)^2)$ pairs of blocks; each time
  we read a block we also decide $\relation$ for every pair of
  elements in that block. For each pair we may do the computation in
  time $2n/b(n)$ using $2(n/b(n))\lg R$ bits of space; deciding
  $\relation$ internally on the blocks is free as we remember the
  entire block. In total we use time $\Oh(nb(n))$ and space
  $\Oh((n/b(n))\lg R)$, yielding the desired trade-off.
\end{proof}

For time $n$ this construction of course works for \emph{any} problem,
which shows that for time $\Theta(n)$ one cannot give space-bounds
better than $\Omega(n\lg n)$ (when $R=2^{c\lg n}$), \ie we cannot hope
for a better general bound for \emph{any} such independence problem in
this model.

In this paper we focus on the case that $U=\{0,1\}^{c\lg n}$ (where
$c$ will be a suitable fixed natural number) with $|U|=n^c$. Recall
that our $n$-ary independence problems for binary relations are
defined on $x=\langle x(1), x(2), \ldots, x(n)\rangle\in U^n$; we
emphasize the difference in notation between $x_i\in U^n$ (an entire
input consisting of $n$ elements from $U$) and $x(i)\in U$ (the $i$'th
element from $U$ of the input $x$).

We will be concerned with the independence problem for another
relation: the \hd relation. For $0<\gamma<\half$ we define the
parameterized \hd relation $HD_\gamma(a,b)\subset \{0,1\}^{c\lg
n}\!\times\!\{0,1\}^{c\lg n}$ which relates $a$ and $b$ if and only if
they differ in at most $\gamma c \lg n$ positions. To alleviate
notation $HD$ will refer to $HD_{1/4}$ and \hd, or $D_{HD}$, will
refer to the $n$-ary independence problem for this relation with
$\gamma = 1/4$. \equality is the problem of deciding whether the input
contains to identical words. It should be clear that \hd is a natural
generalization of \equality\footnote{When Ajtai says \ed in
\cite{Ajtai99}, he actually means the dual problem \equality.}.

A relation $\relation$ is said to be $\full$ iff for any pair of
subsets of $U$ of size bigger than $\lambda(n)|U|$ there exists a pair
of elements, one from each of the two subsets, satisfying the
relation.  Formally, $\relation$ is $\full$ if the following holds
\begin{equation}
  \label{eq:full}
  \forall A,B\subseteq U:\: |A|,|B|>\lambda(n)|U|\quad\Rightarrow\quad
  \exists a\in A,\, b\in B:\: \relation(a,b)
\end{equation}
It should be clear that \equality is $\half-\text{full}$, as any two
subsets of $U$ containing more than half of $U$ will have a non-empty
intersection. In \cite{Ajtai99} it is proved that for all
$0<\gamma<\half$, $HD_\gamma$, and in particular $HD$, is
$2^{-\delta\lg n}-\text{full}$ on $U=\{0,1\}^{c\lg n}$ where $c$ and
$\delta$ are natural numbers suitably chosen (independent on $n$, but
depending on $\gamma$) such that $c>\delta$. This means that if we
have two subsets of $U$ of size bigger than $2^{(c-\delta)\lg n}$, we
are guaranteed to have two elements with low Hamming distance.

A relation $\relation$ is said to be $\sparse$ iff the number of
inputs of length $n$ for which the $n$-ary independence problem
$\decision(x)=\YES$ is less than $\zeta(n)|U|^n$ (a $\zeta(n)$
fraction of all possible inputs).  Intuitively this means that the
$n$-ary independence problem for the binary relation, $\relation$, has
many $\NO$ instances.  In \cite{Ajtai99} it is proved that for all
$0<\gamma<\half$ $HD_\gamma$ is $\half-\text{sparse}$ for a fixed
$c\in\mathbf{N}$ (recall that $|U|=n^c$) and $n$ sufficiently large.
This means that for at least half of all input to \hd the answer is
$\NO$.  Ajtai~\cite{Ajtai99} also proves that \equality is
$c_=-\text{sparse}$ for a suitable fixed $c_=>0$.


\section{The result}
\label{sec:result}


We are now ready to state the result.

\begin{thm-name}{full}{General theorem, full version}
  Let $\relation$ be a $\full$ and (non-trivial) $\sparse$ relation on
  $U\!\times\! U$ with $|U|=R$. Consider an $R$-way branching program
  deciding $\decision$, in time $T(n)=\time$ and space $S(n)$. If,
  \begin{equation}
    \label{eq:time}
   72\, k(n)\lg k(n)< \lg \fullinv, 
  \end{equation}
  and
  \begin{equation}
    \label{eq:sparseness}
    \lg \sparseinv < \frac{n\lg \fullinv}{k(n)^{5k(n)}}.
  \end{equation}
  Then,
  \begin{equation}
    \label{eq:space}
    S(n)\geq \frac{n\lg \fullinv}{k(n)^{4k(n)}}.
  \end{equation}
\end{thm-name}

Let us discuss this theorem. First of all, many of the constants in the
above statement may possibly be improved, albeit not significantly.

What kind of fullness is required to achieve a non-trivial result?
>From Ajtai's paper it might seem as if the \hd proof works only for
what we might call \emph{polynomial fullness}, i.e.,
$1/{|U|^{\Oh(1)}}$-fullness, whereas the \ed (or rather \equality)
proof handles \emph{constant fullness}. If $k(n)\in\Oh(1)$, we see
from \eqref{time} that constant fullness, specifically
$\lambda(n)<2^{-144}$, actually does give rise to a non-trivial lower
bound. However, it does not seem to be the case that the parameters of
the proof can be improved enough to achieve anything for
$\half-\text{fullness}$, i.e., we cannot prove anything for \ed, but
we can get closer than what Ajtai's original statement suggests.

If $\lambda(n)|U|<1$ the relation in question would be trivial to
decide (as two subsets of size $1$ would then be enough to ensure
satisfaction of the fullness property), hence we can assume that
$\lambda(n)|U|\geq 1$. Combining this with \eqref{time} yields,
\[
\lg |U| \geq \lg \frac{1}{\lambda(n)} > 72\, k(n)\lg k(n).
\]
Recalling \propref{r2bool}, we see that the best result we can achieve
in the Boolean model is,
\[
\frac{T(n)}{\lg |U|} < \frac{\time}{126\, k(n)\lg k(n)} \in \oh(n).
\]
In conclusion, no matter how we might chose the parameters, we can get
no non-trivial implications for the Boolean model.

As stated previously, on $U=\{0,1\}^{c\lg n}$ \hd is
$n^\delta-\text{full}$ and $\half-\text{sparse}$, giving
\thmref{vanilla} as corollary to \thmref{full}. Interesting special
cases include Ajtai's original theorem: time in $\Oh(n)$ implies space
in $\Omega(n\lg n)$, and space $\Oh(n^{1-\epsilon})$ implies time
$\Omega(n\textfrac{\lg n}{\lg\lg n})$.


\section{Proof of \thmref{full}}
\label{sec:proof}


We would like to stress that almost all the arguments closely follow
those of Ajtai~\cite{Ajtai99}. We give the proof in full detail for
two reasons: 1) in the generalized version many constants, say $c$,
are replaced by functions, $c(n)$, and it is imperative to give the
full proof to see exactly where the proof holds and where it breaks
down; 2) it makes it clear exactly where we deviate from the original
proof.

The proof presented here deviates from that of Ajtai in two aspects.
When counting we use tighter estimates where it is possible, this has
little effect on Ajtai's original proof, but significantly extends the
interval of time for which the generalized proof works---an immediate
generalization will work for time up to roughly $n\textfrac{\lg\lg
  n}{\lg\lg\lg n}$, whereas our proof works up to
$\Theta(n\textfrac{\lg n}{\lg\lg n})$.  Another deviation is our proof
of \lemref{many} (corresponding to Lemmas 7 and 8 in \cite{Ajtai99}).
Besides these technical differences, changes have also been made for
reasons of presentation.

The overall intuition behind Ajtai's proof is as follows. We use the
time and space restrictions to construct a large set of inputs that
are all rejected by the algorithm. The structure and size of this set
will be such that we can utilize the fullness property of our problem
to ensure that at least one input, which we will call $\fail$, in the
above set must be accepted. This of course gives a contradiction on
the assumed time and space restrictions of the algorithm.

Assume we have an $R$-way branching program, $\A$, deciding
$\decision$ in time $\time$ and space $S(n)$ satisfying \eqref{time}
and \eqref{sparseness} but
\emph{not} \eqref{space}. We will show that then there exists an input
$x$ such that $\A(x)=\NO$ but $\decision(x)=\YES$, contradicting our
assumption that is, assuming \eqref{time} and \eqref{sparseness} we
may conclude \eqref{space}.

Define $\state(x,t)$ as the state of $\A$ (one of $2^{S(n)}$) after
time step $t$ on input $x$. For a time interval $I$, define
$\state(x,I)$ to be $\state(x,t_I)$, where $t_I$ is the \emph{last}
element of $I$ that is, given $x$ and a time interval $I$ we get the
state of the program when leaving $I$. An arbitrary set of times $T$
may be written as a disjoint union of maximal intervals $I_j$, i.e. $T
= \cup_j I_j$. If we assert that $\state(x,T)=\state(y,T)$ we mean
that $\forall j:\: \state(x,I_j) = \state(y,I_j)$.

We will construct $\fail$ from another input $x$ with some desirable
properties. The first property is that $\decision(x) = \NO$ and hence
$\A(x) = \NO$, as $\A$ is assumed to decide $\decision$ correctly.
Suppose that we have two disjoint subsets of indices
$W_1,W_2\subset\{1,\ldots,n\}$ and two disjoint subsets of times
$T_1,T_2\subset\{1,\ldots,\time\}$. Suppose now that on input $x$ the
indices of $W_i$ ($i=1,2$) are not read outside $T_i$; let
\[
\begin{array}{rl}
S^x_i \subseteq & \{x_i|(\text{ obtained by modifying } x \text{ on }
W_i \text{ only})\\ 
 & \quad \quad \quad \quad \quad \quad \quad \quad \quad \wedge \quad \state(x,T_i) = \state(x_i,T_i)\} ,
\end{array}
\]
i.e. $x$ and $x_i$ are identical outside $W_i$ and each time we leave
$T_i$ on both $x$ and $x_i$ we are in the same state. Hence $\A(x) =
\A(x_i)$ for all $x_i\in S^x_i$ as the only differences between the
two inputs are in $W_i$ and hence ``forgotten'' when we leave $T_i$,
the only place where $W_i$ is read. 

Based on $x$ we can thus construct $x_1$ and $x_2$ such that $\A(x) =
\A(x_1) = \A(x_2) = \NO$. As $W_1$ and $W_2$ are disjoint we may make
these two different changes to $x$ simultaneously, obtaining the input
$\fail$. We would like to ensure that $\A(x) = \A(\fail) = \NO$, but
currently this might not be the case. The reason is that in the set of
time intervals, say $T_1$, $\A$ is in principle able to look outside
$W_1$ and hence $\A$ might look at $W_2$. This is not a problem as
long as $x$ on $W_2$ is unchanged, but when making the two changes
simultaneously we no longer have any guarantee that the states are
fixed. The way to eliminate this problem is to enforce that the
indices of $W_j$ are not read in $T_i$ on $x_i$, removing the
possibility that $\A$ detects the change in $W_2$ when in $T_1$ and
vice versa.

We can now construct $\fail$ such that $\A(\fail) = \A(x) = \NO$ by
making changes to $x$ on $W_1$ and $W_2$. The plan is of course to
choose $\fail$ such that $\decision(\fail) = \YES$, giving the desired
contradiction. One way to achieve this is to have so many choices for
each of $x_1$ and $x_2$ that we can put the fullness property into
play. If $S\subset U^n$, let $S(l)$ be $S$ projected onto the $l$'th
dimension (or index).

\begin{prop}{dimension}
  Let $W\subseteq\{1,\ldots,n\}$ and let $S\subset U^n$ be a set of
  inputs that are all identical outside $W$. If
  $|S|>(\lambda(n)|U|)^{|W|}$ then for at least one $l\in W$ it will
  be the case that $|S(l)|>\lambda(n)|U|$, i.e. some $x(l)$ (over
  $x$'s in $S$) must take on more than $\lambda(n)|U|$ values from
  $U$.
\end{prop}

\begin{proof}
  Suppose that $\forall l\in W$ it is the case that $|S(l)|\leq
  \lambda(n)|U|$ then clearly $|S|\leq (\lambda(n)|U|)^{|W|}$.
\end{proof}

If we have many different inputs on $W_i$ to choose from, there will
be an index for which we have many values to choose from, allowing us
to exploit the fullness property.

Based on the above idea, we define a \emph{$\mu(n)-$hard set},
$\H$, for a branching program $\A$ deciding $\decision$ in time
$\time$.

\sloppy

\begin{defi-name}{hard set}{Hard Set}
  A set of inputs $\H\subset U^n$ is called a $\mu(n)-$hard set for a
  branching program $\A$ deciding $\decision$ in time $k(n)\!\cdot\! n$ and
  space $S(n)$ if we have
  \begin{itemize} \cramped
    \item $W_1,W_2\subset \{1,\ldots,n\}$ with $W_1\cap W_2 = \emptyset$
      and $|W_i|\geq \mu(n)\!\cdot\! n$,
    \item $T_1,T_2\subset \{1,\ldots,\time\}$ with $T_1\cap T_2 =
      \emptyset$,
  \end{itemize} 

  such that $\forall x\in\H:$

  \begin{itemize} \cramped
    \item $\decision(x) = \NO$.
    \item The indices of $W_i$ are only read in $T_i$ (on $x$).
    \item $\state(x,I_{ij})$ is fixed for all the intervals comprising
      $T_i$---i.e. every time we leave $T_i$, $\A$ will behave
      identically for all $x\in\H$.
  \end{itemize}
\end{defi-name}
  
\fussy

Further, if $H$ is a hard set then for $x\in\H$ we define an input $y$
to be \emph{$\H$-similar to $x$} if there exists inputs $x_1,x_2\in\H$
such that $x$ and $x_1$ differ only on indices in $W_1$ and $x$ and
$x_2$ differ only on indices in $W_2$, and $y$ is obtained from $x$ by
changing the position indexed by $W_1$ to match $x_1$ and those
indexed by $W_2$ to match $x_2$. We then have the following:

\begin{prop}{similar}
Let $\H$ be a hard set for a branching program $\A$ deciding
$\decision$. For $x\in\H$, if $y$ is $\H$-similar to $x$ then $\A(x) =
\A(y) = \NO$.
\end{prop}

\begin{proof}
  Assume now that $\A$ is able to distinguish $x$ and $y$. This means
  that these two inputs lead to different final states for the
  branching program. As $x$ and $y$ are identical outside the indices
  of $W_1$ and $W_2$, then there must exist $i$ and $j$ such that
  $\state(x,I_{ij}) \neq \state(y,I_{ij})$, i.e., some interval
  $I_j\subseteq T_i$.

  Assume, without loss of generality, that $i=1$. So, for some
  $I_{1j}\subseteq T_1$ $\state(x,I_{1j}) \neq \state(y,I_{1j})$. Let
  $I_{1j}$ be the first such interval during the computation of $\A$,
  hence $x$ and $y$ follow the same computation path until
  $I_{1j}$. By construction, also $x_1$ and $x_2$ follow this same
  path as $x$. Inside $I_{1j}$ we only read indices in $W_1$ and end
  up in a different state than $\state(x,I_{1j})$ following a
  different path from $x$. However, we follow the exact same path as
  on $x_1$ as $y$ is identical to $x_1$ on $W_1$, so we have that
  $\state(x,I_{1j}) \neq \state(x_1,I_{1j})$ contradicting that $x$
  and $x_1$ are in the same hard set.
\end{proof}

The proof of \thmref{full} splits naturally in three parts. In
\lemref{many} we show that if we have a large hard set, we may obtain
$x_1$ and $x_2$ in ``many'' ways (relative to the fullness property).
Then in \lemref{large} we show that given the time and space
restrictions there exists a ``large'' hard set (relative to the
sparseness property). Finally these two lemmata are combined.

\begin{lem}{many}
  Let $\relation$ be a $\full$ relation on U, $\A$ be a branching
  program deciding $\decision$ in time $\time$ and space $S(n)$, and
  let $\H$ be a $\mu(n)-$hard set for $\A$ so that,
  \[
  |\H| > 4 \lambda(n)^{\mu(n)\cdot n} |U|^n,
  \]
  then there exists some $x\in\H$ for which there exist $\fail$ which
  is $\H$-similar to $x$, i.e. with $\A(\fail)=\NO$, but with
  $\decision(\fail) = \YES$.
\end{lem}

\begin{proof}
  Define $x_{|W}$, where $W\subseteq\{1,\ldots,n\}$, to be $x$ where
  all values at positions in $W$ are overwritten with some standard symbol,
  say $\bot\not\in U$.  Suppose we have some set of inputs $\H\subseteq
  U^n$ and some set of indices $W\subseteq\{1,\ldots,n\}$; define
  \begin{equation}
    \label{eq:S}
    S^x = \{ y\in \H | x_{|W} = {y}_{|W} \},
  \end{equation}
  i.e. the set of elements in $\H$ which are identical to $x$ outside
  $W$. We claim the following
  \begin{equation}
    \label{eq:partly identical inputs}
      \#x\in\H\text{ with } |S^x|\leq \quarter \frac{|\H|}{|U|^{n-|W|}}
  \text{ is less than }\quarter|\H|.
  \end{equation}
  Given $W$, define a partition of $\H$ according to $S^x$, i.e. $x$
  and $y$ are in the same class if and only if $S^x = S^y$ (thus $y\in
  S^x$, and $x\in S^y$). Clearly there are no more than $|U|^{n-|W|}$
  classes, as we have at most this many ways of choosing a $y$ which is
  different from $x$ outside $W$. The number of inputs in classes of
  size at most $\quarter |\H| / (|U|^{n-|W|})$ is at most this number
  (the maximum size of these classes) times $|U|^{n-|W|}$ (the maximum
  total number of classes), implying \eqref{partly identical inputs}.
  
  Based on a hard set $\H$, let $S^x_i$ be defined as in \eqref{S}
  based on $\H$ and $W_i$. Clearly \eqref{partly identical inputs}
  holds for $S^x_1$ and $S^x_2$, so these sets are relatively large
  for most inputs in $\H$. We would like an $x$ for which both $S^x_1$
  and $S^x_2$ are large, but in principle the $x$'s that give large
  $S^x_1$ might not be the same as those that give large $S^x_2$ (and
  vice versa). According to \eqref{partly identical inputs}, $|S^x_i|
  \geq \quarter |\H| / (|U|^{n-|W|})$ for $\textfrac{3}{4}$ of the
  elements in $\H$, hence for $\quarter$ of these elements both
  $S^x_1$ and $S^x_2$ are no smaller than $\quarter |\H| /
  (|U|^{n-|W|})$; certainly for any $\H$, $W_1$ and $W_2$, this gives
  us an input $x'$ such that both $S^{x'}_1$ and $S^{x'}_2$ have this
  size.

  Consider this particular input $x'$. Each pair of $x_1\in S^{x'}_1$
  and $x_2\in S^{x'}_2$ gives rise to a different $y$ $\H$-similar to
  $x'$. Since $|W_i|\geq\mu(n)\!\cdot\! n$ we have that $\quarter |\H|
  / (|U|^{n-|W|})>(\lambda(n)|U|)^{|W_i|}$, yielding that $|S^{x'}_i|
  > (\lambda(n)|U|)^{|W_i|}$. Hence, by \propref{dimension} there must
  exists a pair of indices $\kfailone\in W_1$ and $\kfailtwo\in W_2$
  taking on more than $\lambda(n)|U|$ different values for all the
  inputs in $S^{x'}_1$ and $S^{x'}_2$ respectively. By the fullness
  property this implies that there exists $x'_1\in S^{x'}_1$ and
  $x'_2\in S^{x'}_2$ so that
  $\relation(x'_1(\kfailone),x'_2(\kfailtwo))$. Define $\fail$ to be
  the input $\H$-similar to $x'$ obtained from $x'$ by modifying $x'$
  on $W_1$ and $W_2$ using $x'_1$ and $x'_2$ respectively. Thus
  $\decision(\fail)=\YES$. By \propref{similar} $\A(\fail) = \NO$ as
  $\fail$ is $\H$-similar to $x'$.
\end{proof}


\begin{lem}{large}
  Let $\relation$ be a $\sparse$ relation on U, $\A$ be a branching
  program deciding $\decision$ in time $\time$ and space $S(n)$, and
  let $\mu(n) = k(n)^{-4k(n)}$. $\A$ has a $\mu(n)-$hard set of size
  \[
  |\H|\geq \frac{\zeta(n)|U|^n}{\binom{n}{\mu(n)\cdot n}^2\ 
    (3k(n))^{8k(n)}\  2^{4k(n)S(n)}}.
  \]
\end{lem}

\begin{proof}
  We start by constructing a partitioning $P_x$ of the indices as
  follows.  Split time into $9k^2(n)$ intervals of length\footnote{The
    interval length is parameterized in \cite{Ajtai99}, however the
    present choice leaves little room for improvement.} $n/(9k(n))$.
  Two indices $i$ and $j$ are in the same class of $P_x$ if and only
  if they are read in exactly the same intervals on input $x$. $P'_x$
  is $P_x$ restricted to classes of $P_x$ whose members are queried in
  at most $2k(n)$ time intervals on $x$. Finally $\Gamma_x$ is $P'_x$
  restricted to classes whose size is at least $n/(4|P'_x|)$.
  
  For $w \in \Gamma_x$, define $\interval(x,W)$ to be the intervals in
  which $W$ is queried on input $x$. By construction, $\interval(x,W)$
  contains at most $2k(n)$ intervals for all $W\in\Gamma_x$.

  Our restricted partitioning $\Gamma_x$ has the following properties,
  \begin{equation}
    \label{eq:size}
    \forall W\in\Gamma_x:\: |W| \geq \mu(n)\cdot n,
  \end{equation}
  \begin{equation}
    \label{eq:disjoint}
    \forall x: \exists W_1,W_2\in\Gamma_x:\:
    \interval(x,W_1)\cap\interval(x,W_2) = \emptyset.
  \end{equation}
  It is a fact that no more than $n/2$ elements can be queried in more
  than $2k(n)$ intervals, since the branching program has length at
  most $\time$; hence $P'_x$ covers at least $n/2$ elements. Classes
  of $P'_x$ with size no greater than $n/(4|P'_x|)$ can cover at most
  $n/4$ indices (as we have most $|P'_x|$ such classes). Thus
  $\Gamma_x$ covers at least $\textfrac{n}{2} - \textfrac{n}{4} =
  \textfrac{n}{4}$ indices.
  
  Each class of $P'_x$ is uniquely identified by the corresponding
  set of at most $2k(n)$ intervals in which the indices of the class
  are read. Hence we may bound the number of classes in $P'_x$ by the
  number of ways to choose up to $2k(n)$ intervals from $9k^2(n)$,
  \begin{eqnarray*}
    |P'_x| & \leq & \sum_{i=0}^{2k(n)}\binom{9k^2(n)}{i}\\
           & = & 2^{9k^2(n)H(\textfrac{2}{9k(n)})-\half\lg 9k^2(n)+\Oh(1)}\\
           & \leq & k(n)^{3k(n)},
  \end{eqnarray*}
  according to \cite[p. 492]{GKP94}\footnote{These estimates are not
    correct for $k(n)\in\Oh(1)$, in which case we can just use Ajtai's
    original estimate. Also, using
    \smash{$\binom{n}{k}<(\textfrac{n}{e})^k$} gives a bound that is
    almost as good, but we use the above estimate to emphasize that
    significantly better estimates are not possible.}, $H(m)$ is the
  entropy function $m\lg \textfrac{1}{m}+(1-m)\lg \textfrac{1}{1-m}$.
  By the lower bound on the size of the classes in $\Gamma_x$ we have
  proved \eqref{size}.
  
  \sloppy

  We will now prove \eqref{disjoint}; in fact we will prove the
  stronger statement that
  \begin{equation}
    \label{eq:stronger}
    \forall W_1\in\Gamma_x\,\exists W_2\in\Gamma_x:\:
    \interval(x,W_1)\cap\interval(x,W_2) = \emptyset.
  \end{equation}
  Since $\interval(x,W_1)$ has at most $2k(n)$ intervals, and each
  such interval contains at most $n/9k(n)$ indices, the total number
  of indices queried within those intervals is at most $2n/9$. As
  there are $n/4$ indices covered by $\Gamma_x$, we can choose an
  index $j$ covered by $\Gamma_x$ that is not queried within
  $\interval(x,W_1)$, and so the class $W_2\in\Gamma_x$ containing $j$
  satisfies \eqref{stronger}. This in turn imply \eqref{disjoint}.
  
  \fussy

  To conclude the proof we will use $\Gamma_x$ to construct a hard
  set. For each $x\in U^n$ with $\decision(x)=\NO$ let $W_1$ and $W_2$
  be the set of indices whose existence is promised in
  \eqref{disjoint}, and let $T_i = \interval(x,W_i)$. Define $\H^x$ to
  be the set of $y\in U^n$ that satisfy,
  \begin{itemize}\cramped
  \item $\decision(x)=\NO$.
  \item $T_i = \interval(y,W_i)$ ($= \interval(x,W_i)$), hence $W_i$
    is a class of both $\Gamma_x$ and $\Gamma_y$.
  \item $\state(x,T_i) = \state(y,T_i)$. 
  \end{itemize}
  Define $F_i$ to be a function that given $x$ and $T_i$ lists
  $\state(x,I_{ij})$ where $T_i$ is comprised of $\cup_j I_{ij}$;
  $F_i$ gives an ordered lists of the states of $\A$ each time we
  leave $T_i$.
  
  The above is a hard set as \eqref{size} means that $|W_i|\geq
  \mu(n)\!\cdot\! n$ as required, and the indices of $W_i$ are
  certainly not read in $T_j$, in fact they are \emph{only} read in
  $T_i$.
 
  \sloppy
 
  Each hard set $\H^x$ is uniquely determined by the six-tuple
  $\langle W_1,W_2,T_1,T_2,F_1,F_2\rangle$. If we can choose this
  six-tuple in at most $h(n)$ ways, there must be an $x$ such that
  $|\H^x| \geq \textfrac{\zeta(n)|U|^n}{h(n)}$ as there are at least
  $\zeta(n)|U|^n$ inputs $x$ with $\decision(x)=\NO$.
  
  \fussy
  
  Observe that $W_i$ need not be bigger than $\mu(n)\!\cdot\! n$; if
  we have more indices than this, just take the first $\mu(n)\cdot n$.
  This may collapse a number of classes into one, meaning that we may
  choose each class $W_i$ in $\smash{\binom{n}{\mu(n)\cdot n}}$ ways.
  This restriction on the size of $W_i$ significantly increases the
  size of the interval in which we can obtain a bound.  
  
  As $T_i$ consists of at most $2k(n)$ out of $9k(n)$ intervals, the
  number of ways to choose $T_i$ is bounded by $(9k(n))^{2k(n)} =
  (3k(n))^{4k(n)}$ (actually a better estimate should be possible, but
  it will not improve the result significantly).  Finally, as $T_i$
  consists of at most $2k(n)$ intervals we fix the state of our
  computation in at most this many places, each with $2^{S(n)}$
  choices, meaning that $F_i$ can be chosen in at most
  $(2^{S(n)})^{2k(n)}$ ways.  In total we get that
  \begin{eqnarray*}
    h(n) & \leq & \binom{n}{\mu(n)\!\cdot\!
    n}^2\cdot \left((3k(n))^{4k(n)}\right)^2\cdot
    \left((2^{S(n)})^{2k(n)}\right)^2\\  
    & = & (\binom{n}{\mu(n)\cdot n}^2\  (3k(n))^{8k(n)}\ 
    2^{4k(n)S(n)}.
  \end{eqnarray*}
\end{proof}


\begin{proof-name}{\thmref{full}}
  Let $\relation$ be a $\full$ and $\sparse$ relation on $U$. Assume
  that $\A$ is an $R$-way branching program deciding $\decision$ on
  $U^n$ in time $\time$ and space $S(n)$, such that \eqref{time} and
  \eqref{sparseness} but not \eqref{space} holds. Based on these
  assumptions our aim is to arrive at a contradiction, thus proving
  the theorem. Specifically we will show that there exists an input
  $\fail$ such that $\decision(\fail) = \YES$, but the branching
  program answers $\A(\fail) = \NO$.
  
  Combining \lemref{many} and \lemref{large} we see that we can find
  $\fail$ if
  \[
  \frac{\zeta(n)|U|^n}{\binom{n}{\mu(n)\cdot n}^2\ 
    (3k(n))^{8k(n)}\  2^{4k(n)S(n)}} > 4 \lambda(n)^{\mu(n)\cdot n}
  |U|^n.
  \]
  Using Stirling's approximation for $n!$ (see eg.~\cite[p.
  115]{Knuth97}) we get that
  \[
  \lg \binom{n}{\mu(n)n} < 3 \mu(n) n \lg
  \textstyle{\frac{1}{\mu(n)}}
  \]
  for $n$ sufficiently large, it is sufficient that
  \[
  2 + \lg \sparseinv + 6 \mu(n) n\lg\textstyle{\frac{1}{\mu(n)}} + 8
  k(n) \lg 3k(n) + 4k(n)S(n) < \mu(n) n \lg \fullinv.
  \] \sloppy
  If $k(n)<n$ (which it will be) then $8 k(n) \lg 3k(n) + 4k(n)S(n) <
  12k(n)S(n)$ as $S(n)>\lg n$ (necessarily). Likewise the constant $2$
  is dominated by $k(n)S(n)$. Hence, the above is satisfied if \fussy
  \[
  \lg \sparseinv + 6 \mu(n)n\lg\textfrac{1}{\mu(n)} + 13k(n)S(n) <
  \mu(n)n\lg \fullinv.
  \]
  This certainly holds if each of the three terms on the left hand
  side is less than a third of the term on the right hand side. We
  have the desired $\fail$ if
  \[
  6\mu(n)n\lg\textfrac{1}{\mu(n)} < \third \mu(n)n\lg \fullinv,
  \]
  \[
  \lg \sparseinv < \third \mu(n)n\lg \fullinv,
  \]
  \[
  13k(n)S(n) < \third \mu(n)n\lg \fullinv.
  \]
  Which is implied by \eqref{time}, \eqref{sparseness} and
  $\neg$\eqref{space}, since $\mu(n)=k(n)^{-4k(n)}$. The last
  equation---implied by $\neg$\eqref{space}---holds as $k(n)\geq 1$,
  because we must always use time $n$.
\end{proof-name}


\section{Randomization}
\label{sec:randomization}


\begin{defi-name}{rand}{Randomized $R$-way branching program}  
  A \emph{randomized $R$-way branching program} using $r$ random bits,
  consists of a collection of $2^r$ deterministic $R$-way branching
  programs. Each execution of the randomized branching program starts
  by uniformly at random choosing one of the $2^r$ deterministic
  programs which is then executed.
  
  We say that a randomized $R$-way branching program $\A$ deciding a
  problem $D$ has constant 1-sided error if for $0\leq\epsilon<\half$
  the following holds\footnote{Note that that the the standard
  definition for 1-sided error (e.g. the complexity class RP) allows
  for error on the accepting answer $D(x)=\YES$, whereas our
  definition allows error on the rejecting answer (corresponding to
  CoRP).}
  \begin{itemize}\cramped
  \item If $D(x)=\YES1$ then our randomized branching program $A$ answers
    correctly on input $x$.
  \item If $D(x)=\NO$ then $\Pr[\A(x)=\NO]>1\!-\!\epsilon$.
  \end{itemize}
\end{defi-name}

\begin{cor}{rand}
  The statement of \thmref{full} holds for randomized $R$-way
  branching programs with constant 1-sided error if we modify the
  constants slightly.
\end{cor}

\begin{proof}
  By a standard averaging argument, one of the $2^r$ deterministic
  branching programs must correctly answer $\NO$ for at least a
  $1\!-\!\epsilon$ fraction of the inputs with answer $\NO$. Apply
  \thmref{full} to this \emph{deterministic} branching program
  computing $\decision$. Hence we only reduce the size of our hard set
  with a factor $\epsilon$.
\end{proof}


\section{Acknowledgement}

I would like to thank Faith Fich. I would also like to thank the
anonymous referees for many helpful comments. In particular one
referee has provided detailed suggestions on how to improve
presentation as well as advice on how to make some of the proofs
simpler and more precise.


\bibliographystyle{amsalpha} 
\bibliography{./cj05-01}

\end{document}


