% The Chicago Journal of Theoretical Computer Science, Volume 1999, Article 4
% Published by MIT Press, Cambridge, Massachusetts USA
% Copyright 1999 by Massachusetts Institute of Technology
%
\newtoks\rstyle
\rstyle={cjlook}
%
\documentclass[v1.1,published]{cjstruct}
%
% Local style resources for this article.
%
\usestyleresource{amstext}
\usepackage{amssymb}
\usepackage{amsmath}
%
% Local macro definitions for this article.
%
\catcode`\@=11
\ifx\nopagebreakbeginpar\undeftex
  \newcommand{\nopagebreakbeginpar}{\@beginparpenalty10000}
\fi
\ifx\nopagebreakendpar\undeftex
  \newcommand{\nopagebreakendpar}{\@endparpenalty10000}
\fi
\ifx\nopagebreakinteritem\undeftex
  \newcommand{\nopagebreakinteritem}{\@itempenalty10000}
\fi
\catcode`\@=12
%
\newcommand{\prooffontshape}{\fontshape{n}\selectfont}


\newcommand{\card}[1]{\ensuremath{\left\| #1 \right\|}}
\newcommand{\Prob}[2]{\ensuremath{\text{\rm Prob}_{#1} \! \left[ #2 \right]}}
\newcommand{\pair}[1]{\langle #1 \rangle}
\newcommand{\set}[2]{\mbox{\(\{ #1 \; | \; #2 \}\)}}
\newcommand{\CompStyle}{\rm}
\newcommand{\ComplexityClass}[1]{\ensuremath{\text{\CompStyle #1}}}

% \newcommand{\ComplexityClass{RP}}{\ComplexityClass{RP}}
\newcommand{\AM}{\ComplexityClass{AM}}
\newcommand{\NE}{\ComplexityClass{NE}}
\newcommand{\RE}{\ComplexityClass{RE}}
\newcommand{\PH}{\ComplexityClass{PH}}
\newcommand{\FP}{\ComplexityClass{FP}}
\newcommand{\TALLY}{\ComplexityClass{TALLY}}
\newcommand{\pdom}{\ensuremath{\preceq}}
\newcommand{\pdequiv}{\ensuremath{\equiv}}
\newcommand{\co}[1]{\text{\textnormal{co-}#1}}

\newcommand{\generator}{instance generator}
\newcommand{\Rmu}[1][]{\ensuremath{(R_{#1},\mu_{#1})}}
\newcommand{\Cite}[1]{{\rm \cite{#1}}}
\newcommand{\Hash}[1][m,k]{\ensuremath{{\cal H}_{#1}}}
\newcommand{\hv}[1]{\textnormal{\textsf{#1}}}
\newcommand{\ds}{\displaystyle}
\newcommand{\must}{\ensuremath{\mu_{\text{st}}}}

\newcommand{\DCM}{\ensuremath{\text{\textnormal{DCM}}}}
\newcommand{\muDCM}{\ensuremath{\mu_{\DCM}}}
\newcommand{\RDCM}{\ensuremath{(R_{\DCM},\muDCM)}}
\newcommand{\GF}{\ensuremath{\text{\textnormal{GF}}}}
\newcommand{\muGF}{\ensuremath{\mu_{\GF}}}
\newcommand{\RGF}{\ensuremath{(R_{\GF},\muGF)}}


\def\registered{\({}^{\ooalign%
    {\hfil\raise.07ex\hbox{{\tiny\textup{R}}}%
    \hfil\crcr\scriptsize\mathhexbox20D\cr}}\)}
%
% end of local macro definitions for this article
%
\begin{document}
%
\begin{articleinfo}
%
\publisher{The MIT Press}
%
\publishercomment{%
  {\Large
    \begin{center}
      Volume 1999, Article 4\\
      \textit{25 March 1999}
    \end{center}
  }
  \par\noindent
  ISSN 1073--0486\@. MIT Press Journals, Five Cambridge Center, Cambridge, MA
  02142-1493 USA; (617)253-2889;
  \emph{journals-orders@mit.edu, journals-info@mit.edu}\@.
  Published one article at a time in \LaTeX\ source form on the
  Internet\@. Pagination varies from copy to copy\@. For more
  information and other articles see:
  \begin{itemize}
    \item \emph{http://mitpress.mit.edu/CJTCS/}
    \item \emph{http://www.cs.uchicago.edu/publications/cjtcs/}
    \item \emph{ftp://mitpress.mit.edu/pub/CJTCS}
    \item \emph{ftp://cs.uchicago.edu/pub/publications/cjtcs}
  \end{itemize}
  \sentence
  \par\noindent
  The \emph{Chicago Journal of Theoretical Computer Science} is
  abstracted or indexed in \emph{Research Alert,\registered
  SciSearch,\registered Current Contents\registered/Engineering
  Computing \& Technology}, and \emph{CompuMath Citation Index\@.\registered}
}
%
\copyrightstmt{%
  \copyright 1999 The Massachusetts Institute of Technology\@.
  Subscribers are licensed to use journal articles in a variety of
  ways, limited only as required to insure fair attribution to authors
  and the journal, and to prohibit use in a competing commercial
  product\@. See the journal's World Wide Web site for further
  details\@. Address inquiries to the Subsidiary Rights Manager, MIT
  Press Journals; (617)253-2864;
  \emph{journals-rights@mit.edu}\@.\pagebreak[2]
}
%
\journal{Chicago\nolinebreak[1] Journal of Theoretical Computer\nolinebreak[1] Science}
\journalcomment{%
  The \emph{Chicago Journal of Theoretical Computer Science}
  is a peer-reviewed scholarly journal in theoretical computer
  science\@. The journal is committed to providing a forum for
  significant results on theoretical aspects of all topics in computer
  science\@.
  \par
  \begin{trivlist}
    \item[\emph{Editor-in-Chief:}] Janos Simon
    \item[\emph{Consulting Editors:}]
      Joseph Halpern, Stuart A.\ Kurtz, Raimund Seidel
    \item[\emph{Editors:}]
      \begin{tabular}[t]{lll}
        Martin Abadi & Greg Frederickson & John Mitchell \\
        Pankaj Agarwal & Andrew Goldberg & Ketan Mulmuley \\
        Eric Allender & Georg Gottlob & Gil Neiger \\
        Tetsuo Asano & Vassos Hadzilacos & David Peleg \\
        Laszl\'o Babai & Juris Hartmanis & Andrew Pitts \\
        Eric Bach & Maurice Herlihy & James Royer \\
        Stephen Brookes & Ted Herman & Alan Selman \\
        Jin-Yi Cai & Stephen Homer & Nir Shavit \\
        Anne Condon & Neil Immerman & Eva Tardos \\
        Cynthia Dwork & Howard Karloff & Sam Toueg \\ 
        David Eppstein & Philip Klein & Moshe Vardi \\
        Ronald Fagin & Phokion Kolaitis & Jennifer Welch \\
        Lance Fortnow & Stephen Mahaney & Pierre Wolper \\
        Steven Fortune & Michael Merritt
      \end{tabular}
    \vspace{1ex}
    \item[\emph{Managing Editor:}] Michael J.\ O'Donnell
    \vspace{1ex}
    \item[\emph{Electronic Mail:}] \emph{chicago-journal@cs.uchicago.edu}
  \end{trivlist}
\sentence
}
%
\bannerfile{cjtcs-banner.tex}
%
\volume{1999}
%
\articleid{4}
%
\selfcitation{
     @article{cj99-04,
       author={Christoph Karg and Johannes K\"obler and Rainer Schuler},
       title={The Complexity of Generating Test Instances},
       journal={Chicago Journal of Theoretical Computer Science},
       volume={1999},
       number={4},
       publisher={MIT Press},
       month={March},
       year={1999}
     }
}
%
\begin{retrievalinfo}
\end{retrievalinfo}
%
\title{The Complexity of Generating Test Instances\footnote{
 An extended abstract of the paper was presented at the Fourteenth
 Symposium on Theoretical Aspects of Computer Science (STACS),
 Springer-Verlag, \emph{Lecture Notes in Computer Science} 1200,
 pp. 375--386, 1997\@.}
}
\shorttitle{Complexity of Generating Test Instances}
%
\editorcomment{%
\samepage%
This paper is a contribution to a special issue on the 1996
Dagstuhl workshop on "Structure and Complexity" by Eric Allender\@.
}
%
\begin{authorinfo}
%
% Correction to error in cjstruct.sty version 1.1.
%
\def\fixversion{1.1}
\ifx\cjversion\fixversion
  \renewcommand{\printname}[1]{
    \global\authorstring={#1}
    \global\authorstrue
    \ifinfo\item Printed name: #1\fi
  }
\fi
%
% End of correction.
%
   \printname{Christoph Karg}
   \collname{}{Karg, Christoph}
   \begin{institutioninfo}
       \instname{Universit\"at Ulm}
       \department{Abteilung  Theoretische Informatik}
       \address{}{}{}{89069 Ulm}{Germany}
   \end{institutioninfo}
   \email{?@?}
\end{authorinfo}
%
\begin{authorinfo}
   \printname{Johannes K\"obler}
   \collname{}{K\"obler, Johannes}
   \begin{institutioninfo}
       \instname{Universit\"at Ulm}
       \department{Abteilung  Theoretische Informatik}
       \address{}{}{}{89069 Ulm}{Germany}
   \end{institutioninfo}
   \email{?@?}
\end{authorinfo}
%
\begin{authorinfo}
   \printname{Rainer Schuler}
   \collname{}{Schuler, Rainer}
   \begin{institutioninfo}
       \instname{Universit\"at Ulm}
       \department{Abteilung  Theoretische Informatik}
       \address{}{}{}{89069 Ulm}{Germany}
   \end{institutioninfo}
   \email{schuler@informatik.uni-ulm.de}
\end{authorinfo}
%
\shortauthors{Karg, K\"obler, and Schuler}
%
\support{
Christoph Karg's work was supported by the Deutsche
Forschungsgemeinschaft, Research Grant Sch{\"o} 302/4-2\@.
}
%
\begin{editinfo}
  \submitted{00}{00}{1998}\reported{00}{00}{l998}\published{25}{03}{1999}
\end{editinfo}
%
\end{articleinfo}
%
\begin{articletext}


\begin{abstract}
\apar{Abstract-1}
  Recently, Watanabe proposed a framework for testing the correctness
  and average-case performance of algorithms that purport to solve
  a given \ComplexityClass{NP} search problem efficiently on average with respect
  to some distribution on the instances\@.  The idea is to randomly
  generate certified instances under some distribution that resembles
  the input distribution\@.  Watanabe showed that unless \(\RE=\NE\),
  test instances cannot be generated for some \ComplexityClass{NP} search problems\@. We
  further discuss Watanabe's approach and show, as an upper bound, that
  test instances can be generated for every \ComplexityClass{NP} search problem with
  non-adaptive queries to an \ComplexityClass{NP} oracle\@.
%
\apar{Abstract-2}
  We also introduce Las Vegas and Monte Carlo types of test instance
  generators and show that these generators can be used to find out
  (with high confidence) whether an algorithm is correct and efficient
  on average\@.  It is shown that Monte Carlo generators can be easily
  constructed for all \(\ComplexityClass{RP}\) search problems and that Las Vegas
  generators exist for all \(\ComplexityClass{ZPP}\) search problems as well as for
  other problems such as prime factorization\@.  On the other hand, we prove that Monte
  Carlo generators can only exist for problems in \(\ComplexityClass{NP} \cap \co\AM\)\@.
\end{abstract}
%

\asection{1}{Introduction}
%
\apar{1-1}
The class \ComplexityClass{NP} is one of the most intensively studied classes in
computational complexity theory, for it contains a large variety of
problems that are of practical importance\@. There are two types of
problems that we are interested in, namely decision problems and
search problems\@. Solving a decision problem means to decide whether a
given instance has a solution, and solving a search problem means to
actually find a solution\@. Although, at first glance, search problems
seem more difficult to solve than decision problems, in many cases,
they are equivalent under polynomial-time Turing reductions\@. For
example, since any \ComplexityClass{NP} search problem is polynomial-time Turing
reducible to any \ComplexityClass{NP}-complete (decision) problem, it follows that
\ComplexityClass{NP}-complete problems have \emph{self-computable witnesses} (in the
sense of \cite{cj99-04-04,cj99-04-12}), meaning that solutions can be efficiently
computed by asking oracle queries to the corresponding decision
problem\@.
%
\apar{1-2}
Since polynomial-time algorithms do not exist for \ComplexityClass{NP}-complete
problems unless \(\ComplexityClass{P}=\ComplexityClass{NP}\), it seems reasonable to look for algorithms
that are \emph{efficient in the average case} \cite{cj99-04-25} (see
\cite{cj99-04-16} or \cite{cj99-04-34} for a survey)\@.  In practical
applications, instances occur with certain probabilities\@. To model
this process, we use probabilistic Turing machines that output
instances in time polynomial in the length of the generated instances
\cite{cj99-04-03}\@.
%
\apar{1-3}
Besides designing efficient algorithms it is also important to find
ways of testing the correctness (and performance) of a given
algorithm\@. Ideally, one would like to give a proof that the algorithm
is correct and efficient\@.  However, in some cases, a mathematical
proof may be hard to obtain, or the proof may be long and complex\@.
Thus, in practical terms, it is reasonable to test algorithms by
feeding them with carefully chosen instances for which the solutions
are already known\@.  An obvious approach is to use a probabilistic
polynomial-time bounded algorithm that generates instances of the
problem together with some solution\@. To be useful, these test
instances should be generated according to some distribution that
resembles the frequencies with which the instances occur in practice\@.
%
\apar{1-4}
In this paper our objective is to verify
the ``global correctness'' of a given
algorithm \(A\) in the sense that, on a
randomly selected input (according to
the underlying distribution on the
instances), the output of \(A\) is correct
with high probability\@.  This should be
seen in contrast to the program checking
approach in \cite{cj99-04-10}, where the
outcome of the algorithm on a single
instance has to be verified\@.  Watanabe
\cite{cj99-04-35} introduced a framework for
globally checking the correctness and
average-case behavior of algorithms\@.
Intuitively, a generator for an \ComplexityClass{NP}
search problem only needs to solve the
problem on instances that are (randomly)
\emph{generated by the generator itself},
whereas an algorithm has to solve the
problem on instances that are (randomly)
\emph{generated ``from outside\@.''}  Thus,
for some \ComplexityClass{NP} search problems it might
be easier to find a generator than to
find an algorithm that solves it\@.  For
example, it is not hard to construct a
test instance generator for \emph{prime
factorization} under the standard
distribution (which is uniform on each
input length)\@. On the other hand, no
efficient on-average algorithm is known
that on input of a binary number \(n\),
either outputs a factor of \(n\) in case
\(n\) is composite, or outputs ``prime''
otherwise\@. In fact, this is the basis
for the practical security of, for example, the
RSA cryptosystem\@. Also, for certain
random self-reducible problems
\cite{cj99-04-30,cj99-04-01} it is possible to
generate instances (along with some
solution) according to the distribution
induced by the self-reduction\@. Examples
are the \emph{quadratic residue} and the
\emph{discrete logarithm} problems\@.
%
\apar{1-5}
A test instance generator, as introduced
by Watanabe, is a probabilistic
algorithm \(G\) that generates instances
along with a solution; in particular,
\(G\) only produces \emph{positive
  instances}\@.  Moreover, on input \(0^n\),
\(G\) is required to output each positive
instance of length \(n\) with sufficiently
large probability (compared with the
\emph{conditional} distribution on the
set of all positive instances of length
\(n\))\@. As shown by Watanabe, there exist
\ComplexityClass{NP} search problems for which test
instances cannot be generated unless
\(\RE=\NE\)\@.  In fact, since for certain
distributions the generator has to
amplify the probability of the positive
instances by an exponential factor, we
can show that this is even true for very
simple search problems\@.
%
\apar{1-6}
In our model, a generator may produce
positive as well as negative instances\@.
As in Watanabe's approach, we require
that the generator \(G\) output a witness
along with every positively classified
instance, i.e., \(G\) never misclassifies
a negative instance\@.  Moreover, each
instance \(x\) has to be generated with a
probability that is polynomially related
to the probability \(\mu(x)\) with which
\(x\) occurs in practice\@. Thus, \(G\) is not
required to amplify the probability of
the positive instances\@.  Furthermore,
this condition implies that any
algorithm is efficient on the generated
instances if and only if it is efficient
under the given distribution \(\mu\)\@.
%
\apar{1-7}
We define two types of test instance
generators in this paper, namely, a
Monte Carlo generator and a Las Vegas
generator\@.  While a Monte Carlo
generator is allowed to misclassify a
(positive) instance with small
probability, a Las Vegas generator has
to classify all generated instances
correctly\@.  We show how these generators
can be used to decide efficiently (and
with high confidence) whether a given
algorithm \(A\) for an \ComplexityClass{NP} search
problem is correct with respect to
\(\mu\)\@.  Further, we describe how to
construct Monte Carlo generators for all
\(\ComplexityClass{RP}\) search problems as well as Las
Vegas generators for all \(\ComplexityClass{ZPP}\) search
problems\@.
%
\apar{1-8}
In general, whereas logarithmically many
\emph{adaptive} queries are useless, a
polynomial number of \emph{non-adaptive}
queries to an \ComplexityClass{NP} set turn out to be
sufficient to generate test instances
for any \ComplexityClass{NP} search problem\@.  This
implies, in particular, that under the
assumption \(\ComplexityClass{NP}=\ComplexityClass{RP}\), any distributional
\ComplexityClass{NP} search problem has a test instance
generator\@.
%
\apar{1-9}
Further, by an application of the
universal hashing technique, we show
that the domain of any search problem
for which a Monte Carlo generator exists
(under the standard probability
distribution) necessarily belongs to
{\co\AM}\@.  Similarly, Las Vegas
generators can only exist for \ComplexityClass{NP}
search problems whose domain belongs to
\(\ComplexityClass{NP}\cap\co{\ComplexityClass{NP}}\)\@. 
As a consequence, Monte
Carlo or Las Vegas generators do not
exist for search problems with an
\ComplexityClass{NP}-complete domain, unless the
polynomial-time hierarchy collapses\@.
%
\apar{1-10}
Finally, we define a reducibility between \ComplexityClass{NP} search problems that,
similar to the reducibility introduced by Levin \cite{cj99-04-25,cj99-04-07},
preserves the probabilities of the instances\@.  Via this reducibility
we can obtain a test instance generator for a given \ComplexityClass{NP} search
problem provided that we already have a test instance generator for
some \ComplexityClass{NP} search problem to which the given one reduces\@.
%

\asection{2}{Preliminaries}
%
\apar{2-1}
In this paper we use the standard notations and definitions of
computational complexity theory (see, for example, \cite{cj99-04-06})\@. An
introduction to the theory of computational average-case complexity
can be found in \cite{cj99-04-16,cj99-04-03,cj99-04-34}\@. Given below are some of the
basic definitions we will use\@.
%
\apar{2-2}
All languages considered here are over the alphabet \(\Sigma=\{0,1\}\)\@.
The length of a string \(x\in\Sigma^*\) is denoted by \(|x|\)\@. For a set
\(A\) of strings, let \(A^{=n}=A\cap\Sigma^n\) and \(A^{\leq
  n}=\bigcup_{k=0}^n A^{=k}\)\@. We denote the cardinality of a finite
set \(A\) by \(\card{A}\), and we write \(A(x)=1\), if \(x\in A\), and \(A(x)=0\)
otherwise\@. The \hv{pairing function} \(\pair{\cdot,\cdot} : \Sigma^*
\times \Sigma^* \to \Sigma^*\) is defined as \(\pair{x,y}=d(x)01y\),
where \(d(x_1 \dots x_{n})=x_1 x_1 \dots x_{n} x_{n}\)\@.  Note that
\(\pair{x,y}\) is computable and invertible in polynomial time, and for
all \(x,y \in \Sigma^*\), \(|\pair{x,y}| = 2|x|+|y|+2\)\@.  \smallskip
%
\apar{2-3}
\noindent \textbf{Probability functions and distributions\@.} 
Let \(\mu\) be a probability distribution on \(\Sigma^*\)\@. Associated with
\(\mu\) are a \hv{probability function} that we also denote by \(\mu\) and
a \hv{distribution function}, denoted by \(\mu^*\)\@. More formally, \(\mu\)
and \(\mu^*\) are functions from \(\Sigma^*\) into the interval \([0,1]\)
such that \(\sum_x\mu(x)=1\) and \(\mu^*(x)=\sum_{y\leq x}\mu(y)\), where,
as usual, \(\leq\) denotes the lexicographic ordering on \(\Sigma^*\)\@. For
any set \(X \subseteq \Sigma^*\) let \(\mu(X)= \sum_{x \in X} \mu(x)\)\@.
We refer to the distribution with the probability function \(\must(x) =
1/(|x| (|x|+1) 2^{|x|})\) as the \hv{standard distribution}\@.
%
\apar{2-4}
Let \(\mu,\nu:\Sigma^*\to [0,1]\) be functions\@. We say that \(\mu\) is
\hv{polynomially dominated} by \(\nu\) (in symbols: \(\mu \pdom \nu\))
{\cite{cj99-04-25}} if, for some polynomial \(p\) and all \(x\), \(\mu(x) \leq
p(|x|) \cdot\nu(x)\)\@. In case \(\mu \pdom \nu\) and \(\nu \pdom \mu\), the
distributions \(\mu\) and \(\nu\) are called \hv{equivalent} (in symbols:
\(\mu \pdequiv \nu\)) {\cite{cj99-04-32}}\@.
A function \(f\) from \(\Sigma^*\) to the
non-negative integers is \hv{polynomial on
  \(\mu\)-average} \cite{cj99-04-25} if there
exists a constant \(\epsilon>0\) such that
\[
\sum_{x\neq\lambda}\frac{f^{\epsilon}(x)}{|x|}\cdot\mu(x)\;\;<\;\;\infty.
\]
The set of functions that are polynomial on \(\mu\)-average is closed
under addition and multiplication\@. Furthermore, if \(\mu\) is dominated
by \(\nu\), then every function that is polynomial on \(\nu\)-average is
polynomial on \(\mu\)-average {\cite{cj99-04-25,cj99-04-16}}\@.
%
\apar{2-5}
A distribution is called
\hv{p-computable} if its distribution
function \(\mu^*\) is polynomial-time
computable in the sense of Ko and
Friedman \cite{cj99-04-21}, i.e., there exists
a polynomial time bounded deterministic
Turing transducer \(M\) such that, for all
\(x\) and all \(k\), it holds that
\(|M(x,1^k)-\mu^*(x)|\leq 2^{-k}\)\@.  A
distribution is called \hv{samplable}
\cite{cj99-04-03} if there exists a
probabilistic Turing machine \(M\) that on
input \(\lambda\) outputs \(x\) with
probability \(\mu(x)\)\@.  If, in addition,
\(M\) is polynomial-time bounded in the
length of the output, then \(\mu\) is
called \hv{p-samplable}\@.  It is easy to
see that the standard distribution
\(\must\) is \allowbreak {p-computable}\@.  Furthermore,
every p-computable distribution is
dominated by some p-samplable
distribution {\cite[Theorem~7]{cj99-04-03}}\@.
%
\apar{2-6}
\noindent
\textbf{Distributional NP search
  problems\@.} An \ComplexityClass{NP} search problem is
specified by a binary polynomial-time
decidable \hv{witness relation} \(R\) and
a polynomial \(q_R\) such that \(R(x,w)\)
implies \(|w| = q_R(|x|)\)\@.  Any \(w\) for
which \(R(x,w)\) holds is called a
\hv{solution} (witness) for \(x\)\@. An
instance \(x\) is called a \hv{positive
  instance} in case it has a solution,
and a \hv{negative instance}, otherwise\@.
We assume without loss of generality
that \(\lambda\) is never a solution\@.  A
witness relation \(R\) also specifies an
\(\ComplexityClass{NP}\) decision problem, namely
\(D_R=\set{x\in\Sigma^*}{\exists w:
  R(x,w)}\)\@.  \(D_R\) is also called the
\hv{domain} of \(R\)\@. In case
\(D_R=\Sigma^*\), \(R\) is called a
\hv{total} \ComplexityClass{NP} search problem\@.
%
\apar{2-7}
A \hv{distributional NP search problem} is a pair {\Rmu} consisting of
a witness relation \(R\) and a p-samplable probability function \(\mu\)\@.
A (deterministic) algorithm \(A\) efficiently solves {\Rmu} if the
running time of \(A\) is polynomial on \(\mu\)-average and \(A\) computes a
solution for all inputs \(x\) in the domain of \(R\), whereas it outputs
\(\lambda\) on all other inputs\@.

\asection{3}{Generating Positive Test Instances}
%
\apar{3-1}
As pointed out in the introduction, the aim of test instance
generation is to verify the quality (correctness and performance) of a
given algorithm\@.  In this section we will discuss Watanabe's approach
to the test instance generation problem\@.  Specifically, Watanabe
requires the following properties of a test instance generator \(G\) for
a distributional \ComplexityClass{NP} search problem {\Rmu}: \(G\) is a probabilistic
polyno\-mial-time algorithm, which, on input of \(0^n\), either outputs
\(\bot\) (``not successful'') or generates some instance \(x\) of length
\(n\) together with a solution \(w\) for \(x\)\@.  Moreover, \(G\) has to output
each positive instance with non-negligible probability compared with
the conditional probability function \(\rho_n\) on the positive
instances of length \(n\)\@. In particular, \(G\) is not allowed to output
any negative instance, and hence, it is more appropriate to speak of
\(G\) as a generator for the \emph{promised} \(\ComplexityClass{NP}\) search problem
restricted to the positive instances\@.
%
\begin{alabsubtext}{Definition}{1}{Test Instance Generator \Cite{cj99-04-35}} \label{tig-def}
  Let {\Rmu} be a distributional \ComplexityClass{NP} search problem and let \(D\) be
  the domain of \(R\)\@. A probabilistic polynomial-time Turing machine
  \(G\) is called a \hv{test instance generator} for {\Rmu} if there is
  a polynomial \(q\) such that for all \(n\),
%
  \begin{enumerate}
  \item on every path, \(G(0^n)\) outputs either \(\bot\) or a pair
    \(\pair{x,w}\) such that \(R(x,w)\) and \(x \in \Sigma^n\), and
  \item if \(\mu(D^{=n})>0\), then for every \(x\in D^{=n}\), \(G(0^n)\)
    outputs with probability at least \(\rho_n(x)/q(n)\) a pair of the
    form \(\pair{x,w}\), \(w\in\Sigma^*\)\@.  Here, \( \rho_n(x) =
      {\mu(x)}/{\mu(D^{=n})}\) 
%if \(x \in \)
    is the probability of \(x\) under the
    condition that \(x\) belongs to
    \(D^{=n}\)\@.
  \end{enumerate}
%
\end{alabsubtext}
%
\apar{3-2}
As shown by Watanabe, generators of this type can be used to detect,
for a given input length \(n\) (in expected time polynomial in \(n\) and
\(1/\varepsilon\)), that an algorithm makes an error, provided that errors
occur with probability \(\varepsilon\) (with respect to \(\rho_n\))\@.
Furthermore, they can be used to find out that an algorithm is not
efficient (with respect to \(\rho_n\)) \cite{cj99-04-35}\@. More precisely, if
\(G\) is a test instance generator for a search problem \((R,\mu)\), then
the quality of a given algorithm \(A\) with respect to \(\rho_n\) can be
verified as follows\@.
%
\begin{enumerate}
\item If \(A\) makes an error on strings of length \(n\) with probability
  at least \(\varepsilon\), then with probability at least
  \(\varepsilon\), \(A\) makes at least one error on a polynomial-size
  sample \(S\) generated by \(G\), i.e., the test instances in \(S\) are
  obtained by running \(G(0^n)\) a polynomial number of times\@.
\item If \(A\) needs more than \(n^k\) steps on \(\Sigma^n\) with
  probability at least \(\varepsilon\), then with probability at least
  \(\varepsilon\), \(A\) needs more than \(n^k\) steps on some input from a
  polynomial-size sample \(S\) generated by \(G\)\@.
\end{enumerate}
%
We note that, since in Definition 1 the probability that
\(G(0^n)\) outputs \(\pair{x,w}\) is not bounded from above, an algorithm
\(A\) may take exponential time on average under the distribution
induced by the generator even if \(A\) is polynomial on \(\rho\)-average\@.
In fact, it may happen that \(G\) produces with high probability
instances that have probability zero with respect to \(\rho\)\@.
%
\apar{3-3}
The following result, due to Watanabe, shows that
it is unlikely that there exists a test instance
generator for every distributional \ComplexityClass{NP} search
problem\@.
%

\begin{alabsubtext}{Theorem}{1}{\cite{cj99-04-35}}\label{wat-thm}
  If every \ComplexityClass{NP} search problem has a test
  instance generator under the standard
  distribution, then \(\RE=\NE\)\@.
\end{alabsubtext}
%
In fact, the proof shows that randomly
computing instance/witness pairs for all \ComplexityClass{NP} search problems with a tally
domain is impossible, unless \(\RE=\NE\)\@.
Similarly, it can be seen that there are
total \ComplexityClass{NP} search problems (i.e., with
domain \(\Sigma^*\)) that do not have test
instance generators under the standard
distribution, unless
\(\NE\cap\co{\NE}\subseteq\RE\)\@.  This
shows that generating test instances for
an \ComplexityClass{NP} search problem for which
decision is easy might nevertheless be
hard\@.
%
\apar{3-4}
We now show that generating test instances
in Watanabe's model may be impossible even
for trivial search problems since, for some
distributions, it is necessary to amplify
the probability of the positive instances
of fixed length by an exponential factor\@.
%
\begin{alabsubtext}{Theorem}{2}{} \label{sparse-thm}
  Let \(R=\set{\pair{x,1}}{x \in \Sigma^*-\{1\}^*}\)\@. If for every
  p-samplable distribution \(\mu\) there exists a test instance
  generator for \((R,\mu)\), then \(\RE=\NE\)\@.
\end{alabsubtext}
%

\begin{alabsubtext}{Proof of Theorem}{2}{}
\prooffontshape
\apar{Proof of Theorem 2-1}
  Since
  \(\RE=\NE\) if and only if \(\ComplexityClass{NP} \cap \TALLY \subseteq \ComplexityClass{RP}\)
  \cite{cj99-04-13}, it suffices to show that the assumption implies that
  every tally \ComplexityClass{NP} set \(T\) is contained in \ComplexityClass{RP}\@.  Let \(R_T\) be a
  binary relation and \(p\) be a polynomial such that for all \(n\),
  \(0^n\in T\) if and only if there exists a string \(w\in\Sigma^{p(n)}\)
  with \(R_T(0^n,w)\)\@.
%
\apar{Proof of Theorem 2-2}
  Now consider the p-samplable distribution \(\mu\) generated by the
  following probabilistic algorithm:
  \begin{quote}
    First, guess a positive integer \(n\) with
    probability proportional to \(n^{-2}\)\@.  Then,
    uniformly guess a string \(w \in
    \Sigma^{p(n)}\) and output \(w\) in case
    \(R_T(0^n,w)\) holds; else, output \(1^{p(n)}\)\@.
  \end{quote}
  Then any test instance generator \(G\) for \((R,\mu)\) has the property
  that, if \(0^n\in T\), then either \(R_T(0^n,1^{p(n)})\) holds or
  \(G(0^{p(n)})\) outputs with probability \(1/n^{O(1)}\) a pair
  \(\pair{w,1}\) for which \(R_T(0^n,w)\) holds\@. Note that, in the latter
  case, \(\rho_{p(n)}\{w\in\Sigma^{p(n)}\mid R_T(0^n,w)\}=1\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Theorem 2}}
\end{alabsubtext}
%
\apar{3-5}
An interesting question is whether the converse of
Theorem~1 (or of Theorem~2) is also true\@.
As shown below, non-adaptive queries to an \ComplexityClass{NP} set are sufficient to
generate test instances for any distributional \ComplexityClass{NP} search problem\@.
This implies in particular that under the assumption \(\ComplexityClass{NP}=\ComplexityClass{RP}\), any
distributional \ComplexityClass{NP} search problem has a test instance generator\@.
%
\apar{3-6}
As shown in the following proposition, \(O(\log n)\) many queries to any
oracle are not helpful for a generator in Watanabe's model\@.
%
\begin{alabsubtext}{Proposition}{1}{}
  If, for a distributional \ComplexityClass{NP} search problem
  {\Rmu}, test instances can be generated with
  \(O(\log n)\) queries to some oracle, then there
  exists a test instance generator for {\Rmu}\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{1}{}
\prooffontshape
  Let \(M\) be a probabilistic oracle Turing machine that generates test
  instances by asking \(O(\log n)\) queries to some oracle\@. Let \(M'\)
  simulate \(M\) where each oracle query is answered by a fair coin
  toss\@. If \(M\) outputs a pair \(\pair{x,w}\), then \(M'\) outputs
  \(\pair{x,w}\) only if \(R(x,w)\) holds, and outputs \(\bot\) otherwise\@.
  Then the probability that all oracle answers are guessed correctly
  is \(1/n^{O(1)}\), implying that \(M'\) still fulfills the conditions of
  Definition~1\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 1}}
\end{alabsubtext}
%
\apar{3-7}
The upper bound on the complexity of generating test instances that we
will give in Theorem~3 below is based on the
universal hashing technique \cite{cj99-04-14,cj99-04-29,cj99-04-31}\@.  Let \Hash\ be the
class of all linear hash functions from \(\Sigma^m\) to \(\Sigma^k\)\@. The
following lemma is straightforward ({cf.} 
% \cite[page 450]{cj99-04-26})\@.
\cite{cj99-04-26})\@.
%
\begin{alabsubtext}{Lemma}{1}{} \label{hash-lemma}
  Let \(S\) be a subset of \(\Sigma^m\) of cardinality \(\card{S}=s\) such
  that \(2^{k}\leq3s\leq 2^{k+1}\) and let \(x\in S\)\@.  Then, for a
  uniformly chosen hash function \(h\) from \(\Hash\), it holds with
  probability at least \(2/(9s)\) that \(x\) is the only element in \(S\)
  with \(h(x)=0^k\)\@. Hence, with probability at least \(2/9\), there exists
  a unique \(x\in S\) with \(h(x)=0^k\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Lemma}{1}{}
\prooffontshape
  Since \(\Hash\) is a universal class of hash
  functions, it follows for a uniformly chosen
  \(h\) that \(\Prob{}{h(x)=0^k}=2^{-k}\)
  and that
%
  \begin{eqnarray*}
    \lefteqn{\Prob{}{h(x)=0^k\wedge \exists y\in
        S-\{x\}: h(y)=0^k}}\\ &\leq&\sum_{y\in
      S-\{x\}} \Prob{}{h(x)=0^k\wedge
      h(y)=0^k}\;<\; s2^{-2k}.
  \end{eqnarray*}
%
  Consequently, the probability that \(x\) is the only pre-image of
  \(0^k\) in \(S\) can be calculated as follows\@.
  \[
  \begin{array}{lcl}
    \lefteqn{\Prob{}{h(x)=0^k\wedge \forall y\in
        S-\{x\}: h(y)\not=0^k}}\\ 
    &=&\Prob{}{h(x)=0^k}- \Prob{}{h(x)=0^k\wedge
      \exists y\in S-\{x\}: h(y)=0^k}\\ 
    &>&2^{-k}(1-s2^{-k})\\ 
    &\geq&2/(9s),\text{ since }s2^{-k}(1-s2^{-k})\geq2/9\text{ for }1/3\leq s2^{-k}\leq2/3.
  \end{array}\vspace*{-4ex} \]
{\nopagebreakbeginpar\markendlst{Proof of Lemma 1}}
\end{alabsubtext}
%
\begin{alabsubtext}{Theorem}{3}{}\label{upper-bound-thm}
  For every distributional \ComplexityClass{NP} search problem {\Rmu}, test instances
  can be generated with parallel queries to some set in \ComplexityClass{NP}\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Theorem}{3}{}
\prooffontshape
%
  Let \(D\) be the domain of \(R\) and let \(q\) be a polynomial such that
  \(|w|=q(|x|)\) for all solutions \(w\) for \(x\)\@. Let \(M\) be a
  probabilistic Turing machine witnessing that \(\mu\) is p-samplable
  and let \(p\) be a polynomial time bound for \(M\), i.e., the time used
  by \(M(\lambda)\) to output a string \(x\) is bounded by \(p(|x|)\)\@. For
  any string \(r\in\Sigma^{p(n)}\), let \(M_r(\lambda)\) denote the output
  of \(M\) (if any) when using sequence \(r\) as random source\@. The test
  instance generator \(G\) for \((R_D,\mu)\) is defined as follows\@.
%
  \begin{quote}
    On input \(0^n\), \(G\) first guesses two integers
    \(k\in\{1,\dots,p(n)+1\}\), \(l\in\{1,\dots,q(n)+1\}\) and two linear
    hash functions \(h_1\in\Hash[p(n),k]\), \(h_2\in\Hash[q(n),l]\)\@. By
    asking in parallel the queries \(y_i=\pair{0^n,i,h_1,h_2}\), \(1\leq
    i\leq m=2p(n)+q(n)+2\), to the oracle \(A\), \(G\) obtains a sequence
    \(s=A(y_1)\dots A(y_{m})\) of oracle answer bits\@.  Then \(G\)
    determines strings \(r\in\Sigma^{p(n)}\), \(w\in\Sigma^{q(n)}\), and
    \(x\in\Sigma^*\) such that \(\pair{r,w}=s\) and \(x=M_r(\lambda)\)\@.
    Finally, if \(|x|=n\) and \(R(x,w)\) hold, then \(G\) outputs the pair
    \(\pair{x,w}\); otherwise, \(G\) outputs the symbol \(\bot\)\@.
  \end{quote}
%
  For every \(x\), let \(S_x=\{r\in\Sigma^{p(|x|)}\mid M_r(\lambda)=x\}\)
  denote the set of all random sequences \(r\) using which \(M\) outputs
  \(x\), and let \(S_n=\bigcup_{x\in D^{=n}}S_x\)\@. For every \(r\in S_n\)
  let \(\alpha_r\) denote the probability that \(G(0^n)\) guesses integers
  \(k,l\) and linear hash functions \(h_1,h_2\) such that \(r\) is the only
  string in \(S_n\) with \(h_1(r)=0^k\) and there exists a unique solution
  \(w\) for \(M_r(\lambda)\) with \(h_2(w)=0^l\)\@. Then it follows by
  Lemma~1 that
  \[
  \alpha_r\geq\frac{1}{(p(n)+1)(q(n)+1)}\cdot
  \frac{2}{9\cdot\card{S_n}}\cdot \frac{2}{9}.
  \]
%
  Now consider the following \ComplexityClass{NP} oracle set \(A\) defined as
%
  \begin{eqnarray*}
    \pair{0^n,i,h_1,h_2}\in A&\Leftrightarrow& \exists\pair{r,w}: h_1(r)=0^k,\,r\in S_n,\,h_2(w)=0^l,\\ 
                             &               & R(M_r(\lambda),w),\mbox{ and the \protect\(i\protect\)th bit of \protect\(\pair{r,w}\protect\) is 1}.
  \end{eqnarray*}
%
  Then it follows that, for every \(x\in D^{=n}\), \(G(0^n)\) outputs a
  pair of the form \(\pair{x,w}\), \(w\in\Sigma^*\), with probability at
  least
  \[
  \sum_{r\in S_x} \alpha_r \,\geq\,
  \frac{4\card{S_x}}{81(p(n)+1)(q(n)+1)\card{S_n}} \,=\,
  \frac{4}{81(p(n)+1)(q(n)+1)}\cdot\rho_n(x).\vspace*{-4ex}
  \]
{\nopagebreakbeginpar\markendlst{Proof of Theorem 3}}
\end{alabsubtext}
%
We end this section by observing that the above theorem can easily be
extended to the Monte Carlo type of generators that we will introduce
in the next section\@.
%

\asection{4}{Test Instance Generation Without Amplification}
%
\apar{4-1}
In this section we propose a slightly
different model of generating test
instances\@. As explained above, our aim
is to verify the quality of a given
algorithm but to avoid the need for
amplifying the probability of the
positive instances by a possibly
exponential factor\@.  This goal is achieved by
allowing the generator not only to
output pairs \(\pair{x,w}\), where \(x\) is
a positive instance and \(w\) is a
solution for \(x\), but also to output 
pairs of the form \(\pair{x,\lambda}\),
where \(x\) might be negative as well\@.
(Recall our assumption that \(\lambda\) is
not a solution for any instance\@.)  In
the definition of a Monte Carlo
generator, we also allow that positive
instances \(x\) be generated (with small
probability) in the form
\(\pair{x,\lambda}\), whereas this is
totally forbidden for a Las Vegas
generator\@. Before we proceed to the
formal definition of these generators,
let us explain why it might be easier
to generate test instances for a
distributional \ComplexityClass{NP} search problem than
to solve the problem itself\@.
%
\apar{4-2}
Intuitively, a generator for a
distributional \ComplexityClass{NP} search problem can
benefit from the advantage that it may
intertwine the process of randomly
generating an instance with the process
of constructing a suitable solution\@. In
contrast, an algorithm has to solve the
problem on instances that are randomly
generated ``from outside\@.''  Thus, there
might exist distributional \ComplexityClass{NP} search
problems for which it is easier to
generate instance/witness pairs than to
solve the problem\@.  In fact, this
phenomenon is exploited in the design of
many cryptographic protocols, such as in 
generating public keys together with
some (secret) trapdoor information\@.  For
example, the security of the RSA
cryptosystem is based on the practical
hardness of the factorization problem,
i.e., no efficient-on-average algorithm
is known (under the standard
distribution) that on input of \(n\) outputs
a factor of \(n\) in case \(n\) is
composite, and outputs ``prime''
otherwise\@. On the other hand, it is
computationally easy to randomly
generate composite numbers together with
some factor\@. Indeed, it is not hard to
construct a Las Vegas generator for the
corresponding search problem (see Proposition~4)\@.
%
\apar{4-3}
Next we define the notion of an {instance} generator for a
distributional \ComplexityClass{NP} search problem {\Rmu}\@. An {instance} generator
\(G\) efficiently generates instances in accordance with the
distribution \(\mu\)\@.  Furthermore, it provides, along with each
generated instance \(x\), either some solution \(w\) (implying that \(x\)
belongs to the domain of \(R\)) or the empty string \(\lambda\) (implying
absolutely nothing on the membership status of \(x\))\@. In fact, an
{instance} generator may only output pairs of the form
\(\pair{x,\lambda}\)\@. In order to force \(G\) to generate ``good'' test
instances, we will later impose an additional bound on the probability
that \(G\) outputs a pair \(\pair{x,\lambda}\) when \(x\) is positive\@.
%
\begin{alabsubtext}{Definition}{2}{\sf{\generator}} \label{generator-def}
  Let {\Rmu} be a distributional \ComplexityClass{NP} search
  problem\@. A probabilistic Turing machine \(G\) (with output but with no input) is
  called an \hv{\generator} for \(R\) under \(\mu\)
  if
  \begin{enumerate}
  \item on every halting computation, \(G\)
    outputs a pair \(\pair{x,w}\) with the
    property that either \(R(x,w)\) holds or
    \(w = \lambda\),
  \item the time needed by \(G\) to output a pair
    of the form \(\pair{x,w}\), \(w\in\Sigma^*\), is
    polynomially bounded in \(|x|\), and
  \item \(\mu_G\) is equivalent to \(\mu\), where
    \(\mu_G(x)\) is the probability that \(G\)
    generates instance \(x\), {i.e.}, \(\mu_G(x) =
    \sum_{w\in\Sigma^*} {\mu}_G(x,w)\), where \({\mu}_G(x,w)\)
    is the probability that \(G\) outputs the pair
    \(\pair{x,w}\)\@.
  \end{enumerate}
\end{alabsubtext}
%
Note that if \(G\) were required to stop on all paths, then only finitely
many instances could be generated\@. Actually, we allow that
\(\mu_G(\Sigma^*)<1\), i.e., \(G\) might run forever with \emph{positive}
probability\@.  Also, if \(\mu(\Sigma^n)=1/n^{O(1)}\), then \(G\) can easily
be modified to produce instances of a given length according to the
conditional probability of that length\@. (This is similar to Watanabe's model,
but note that the generator might still output negative instances\@.)
%
\apar{4-4}
Suppose that \(G\) is an {\generator} for a distributional \ComplexityClass{NP} search
problem {\Rmu} and let \(A\) be an algorithm\@. Since the distributions
\(\mu\) and \(\mu_G\) are equivalent, the running time of \(A\) is
polynomial on \(\mu\)-average if and only if it is polynomial on
\(\mu_G\)-average \cite{cj99-04-25}\@.
%
\apar{4-5}
We distinguish between two types of generators, depending on the
quality of the generated test instances\@.  A Monte Carlo generator is
allowed to err with small probability, whereas a Las Vegas generator
only generates test instances that are correctly classified\@. 
%
\begin{alabsubtext}{Definition}{3}{\sf{Monte Carlo generator}} \label{mctig-def}
  Let \(G\) be an {\generator} for a distributional
  \ComplexityClass{NP} search problem {\Rmu} with domain \(D\)\@.
  \begin{enumerate}
  \item \(G\) is called a \hv{Monte Carlo
      generator} for \(R\) under \(\mu\) if for
    every polynomial \(q\),
    \[
    \mu_G(x,\lambda)\leq \frac{\mu_G(x)}{q(|x|)}
    \]
    holds for all but finitely many instances
    \(x\) in \(D\)\@.
  \item \(G\) is called a \hv{Las Vegas generator}
    for \(R\) under \(\mu\) if \(G\) never outputs a
    pair \(\pair{x,\lambda}\) with \(x\in D\)\@.
  \end{enumerate}
\end{alabsubtext}
%
\apar{4-6}
We note that if \(\mu(D^{=n})=1/n^{O(1)}\), then any Monte Carlo
generator for {\Rmu} can be transformed into a test instance generator
for {\Rmu} according to Watanabe's model\@.
%
\apar{4-7}
We next show how a Monte Carlo generator can be used to find out
efficiently and with high confidence whether a given algorithm \(A\) for
a distributional \ComplexityClass{NP} search problem \(\Rmu\) is correct with respect
to the underlying probability distribution \(\mu\)\@. Without loss of
generality, we can assume that \(A\) makes only errors on instances in
the domain \(D\) of \(R\)\@. More formally, for any subset \(D'\) of \(D\), we
call the probability \(\mu\{x\in D'\mid A(x)=\lambda\}\) that \(x\in
D'\) and \(A\) does not find a witness for \(x\) the \hv{error rate} of \(A\)
on \(D'\)\@. We say that a pair \(\pair{x,w}\) \hv{convicts} \(A\) on \(D'\) if
\(R(x,w)\) and \(x \in D'\) hold, but \(A(x)=\lambda\)\@.
%
\begin{alabsubtext}{Theorem}{4}{}
  Let \(G\) be a Monte Carlo generator for a distributional \ComplexityClass{NP} search
  problem {\Rmu} and let \(D\) be the domain of \(R\)\@.  Then there exists
  a polynomial-time probabilistic transducer \(T\) such that the
  following statements hold for any search algorithm \(A\)\@.
  \begin{itemize}
  \item If the error rate of \(A\) on \(D^{\leq l}\) is at least \(1/m\),
    then for all \(n\), \(T\) on input \(\pair{1^n,1^m,1^l}\) produces with
    probability at least \(1-2^{-n}\) a sequence of pairs, at least one
    of which convicts \(A\) on \(D^{\leq l}\)\@.
  \item If additionally, \(A\) is polynomial on \(\mu\)-average then for
    some polynomial \(t\), the sequence contains with probability at
    least \(1-2^{-n}\) a pair \(\pair{x,w}\) that convicts \(A\) on \(D^{\leq
      l}\) and for which \(A(x)\) stops after at most \(t(l,m)\) steps\@.
  \end{itemize}
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Theorem}{4}{}
\prooffontshape
\apar{Proof of Theorem 4-1}
  Let \(G\) be a Monte Carlo generator for {\Rmu} and let \(q\) be a
  polynomial such that \(|w|=q(|x|)\) for all solutions \(w\) of \(x\)\@. By
  Definition~2(3), there is a polynomial \(p\) such
  that for all instances \(x\),
  \begin{align*}
    &\mu(x)/p(|x|) \leq \mu_G(x)\leq p(|x|)\cdot
    \mu(x).
  \end{align*}
  Since Definition~3(1) guarantees that for all but
  finitely many \(x\in D\), \(\mu_G(x,\lambda)\leq\mu_G(x)/2\), we can
  assume that, for all instances \(x \in D\),
  \begin{align*}
    &\sum_{w\in\Sigma^{q(|x|)}} \mu_G(x,w) \geq \mu_G(x)/2 \geq
    \mu(x)/(2p(|x|)).
  \end{align*}
  Since the error rate of \(A\) on \(D^{\leq l}\) is at least \(1/m\), it
  follows that \(G\) outputs with probability at least \(1/(2mp(l))\) a
  pair \(\pair{x,w}\) with \(x \in D^{\leq l}\) and \(A(x)=\lambda\)\@.
  Therefore, during \(2mnp(l)\) independent computations, \(G\) will
  output with probability at least \(1-(1-1/(2mp(l)))^{2mnp(l)}>
  1-2^{-n}\) some pair \(\pair{x,w}\) with \(x\in D^{\leq l}\) and
  \(A(x)=\lambda\)\@. Note that since the running time of \(G\) is
  polynomially bounded in the length of the output, \(T\) can suspend
  the simulation of \(G\) after a polynomial number (in \(l\)) of steps\@.
%
\apar{Proof of Theorem 4-2}
  Note that since \(\mu\) and \(\mu_G\) are equivalent, the assumption
  that \(A\) is polynomial on \(\mu\)-average implies that \(A\) is
  polynomial on \(\mu_G\)-average, i.e., there exist constants \(k,c>0\)
  such that
  \(\sum_{x\neq\lambda}\frac{\text{time}_A(x)^{1/k}}{|x|}\cdot\mu(x)<c\),
  where \(\text{time}_A(x)\) denotes the running time of \(A\) on input
  \(x\)\@.  Hence, it follows that
  \[
    \mu_G\{x\in D^{\leq l}\mid A(x) \text{ runs for more than \(t(l,m)\)
    steps}\}\leq1/(4mp(l)),
  \]
  where \(t\) is the polynomial \(t(l,m)=(2cmlp(l))^k\)\@. This implies that
  \(G\) outputs with probability at least \(1/(4mp(l))\) a pair
  \(\pair{x,w}\) that convicts \(A\) on \(D^{\leq l}\) and for which \(A(x)\)
  stops after at most \(t(l,m)\) steps\@.
{\nopagebreakbeginpar\markendlst{Proof of Theorem 4}}
\end{alabsubtext}
%
\apar{4-8}
It is interesting to note that the assumption in the above theorem can
be weakened for certain distributions \(\mu\)\@. In fact, if \(\mu\) has the
property that for some polynomial \(q\) and all \(n\),
\begin{quote}
  \(\sum_{x:|x|>q(n)} \mu(x)\leq 1/n\),
\end{quote}
and if the error rate of \(A\) on \(D\) (instead of \(D^{\leq l}\) as in
the above theorem) is at least \(1/m\), then it follows that \(\mu\{x\in
D^{\leq q(2m)}\mid A(x)=\lambda\} \geq1/(2m)\)\@.  Hence, letting
\(T'(1^n,1^{m}) = T(1^n,1^{2m},1^{q(2m)})\), the above theorem implies
that with probability at least \(1-2^{-n}\), \(T'(1^n,1^{m})\) produces an
instance \(x\in D^{\leq q(2m)}\) with \(A(x)=\lambda\)\@.  \smallskip
%
\apar{4-9}
Next we consider distributional {\ComplexityClass{RP}} search problems {\Rmu} (meaning
that the binary relation \(R\) witnesses that its domain \(D_R\) belongs
to \(\ComplexityClass{RP}\), i.e., each \(x\in D_R\) has at least \(2^{q_R(|x|)-1}\) many
solutions)\@. As shown below, instance/witness pairs for {\Rmu} can be
easily generated by sampling a string \(x\) according to \(\mu\) and
randomly generating a witness for \(x\)\@.
%
\begin{alabsubtext}{Proposition}{2}{} \label{rp-mc-gen}
  For every distributional\/ {\ComplexityClass{RP}} search problem {\Rmu} there exists
  a Monte Carlo generator\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{2}{}
\prooffontshape
  Let \(D\) be the domain of \(R\) and let \(q\) be a polynomial such that
  \(|w|=q(|x|)\) for all solutions \(w\) of \(x\)\@. Let \(M\) be a
  probabilistic Turing machine witnessing that \(\mu\) is p-samplable\@.
  Consider the following {instance} generator \(G\) for \((R,\mu)\)\@.
  \begin{quote}
    Simulate \(M\)\@. In case \(M\) outputs an instance \(x\), \(|x|=n\), guess
    randomly and independently a sequence \(w_1,\dots,w_n\) of strings
    of length \(q(n)\) and determine the lexicographically largest
    string \(w\) in the set \(\{\lambda\}\cup\{w_i\mid 1\leq i\leq n\) and
    \(R(x,w_i)\) holds\(\}\)\@.  Output the pair \(\pair{x,w}\)\@.
  \end{quote}
  It is easy to verify that \(G\) is a Monte Carlo test instance
  generator for \((R,\mu)\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 2}}
\end{alabsubtext}
%
\apar{4-10}
As shown in the next theorem, it is unlikely that every \(\ComplexityClass{NP}\) search
problem has a Monte Carlo generator under the standard probability
distribution\@. In fact, Monte Carlo generators can only exist if the
domain of the search problem belongs to {\co\AM}\@. As a consequence,
Monte Carlo generators do not exist for \(\ComplexityClass{NP}\) search problems with an
\ComplexityClass{NP}-complete domain unless the polynomial-time hierarchy collapses to
the second level \cite{cj99-04-09,cj99-04-28}\@.  For definitions of the class
{\AM} and of Arthur-Merlin games we refer the reader to \cite{cj99-04-11} or
to a textbook, such as \cite{cj99-04-05} or \cite{cj99-04-22}\@.
%
\begin{alabsubtext}{Theorem}{5}{} \label{mcg-am-thm}
  If there exists a Monte Carlo generator for an \(\ComplexityClass{NP}\) search problem
  \(R\) under the standard distribution, then the domain of \(R\) is
  contained in \(\co\AM\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Theorem}{5}{}
\prooffontshape
\apar{Proof of Theorem 5-1}
  Let \(G\) be a Monte Carlo generator for \((R,\must)\) and let \(p\) be a
  polynomial bounding the running time of \(G\)\@. From Definition
  2(3), we can conclude that, for some polynomial \(s\),
  \[
  \frac{1}{s(|x|) 2^{|x|}} \leq \mu_G(x) \leq
  \frac{s(|x|)}{2^{|x|}}.
  \]
  From Definition 3(1), we know that, for any polynomial
  \(q\),
  \[
  \mu_G(x,\lambda)\leq\frac{\mu_G(x)}{q(|x|)}
  \]
  holds for all but finitely many \(x\) in the domain \(D\) of \(R\)\@. Now
  let \(S_x\) be the set of strings \(r\in\Sigma^{p(n)}\) such that \(G\)
  outputs the pair \(\pair{x,\lambda}\) when using \(r\) as random source\@.
  Let \(q(n)\) be the polynomial \(2^5s(n)^2\)\@. Then, for all but finitely
  many \(x\), the following implications are true (where \(n=|x|\))\@.
\[
\begin{array}{c@{\;\;}c@{\;\;}c@{\;\;}c@{\;\;}l@{\;\;}c@{\;\;}l}
        \ds x \in D &\ds\Rightarrow&\ds \mu_G(x,\lambda)\leq\frac{s(n)}{2^nq(n)}
    &\ds\Rightarrow&\ds \card{S_x} \leq\frac{s(n)2^{p(n)}}{2^nq(n)}
    &\ds\Rightarrow&\ds \card{S_x} \leq \frac{2^{p(n)-n-5}}{s(n)},\\[4mm] 
        \ds x\not\in D &\ds\Rightarrow&\ds \mu_G(x,\lambda)\geq\frac{1}{s(n)2^n}
    &\ds\Rightarrow&\ds \card{S_x} \geq\frac{2^{p(n)}}{s(n)2^n}
    &\ds\Rightarrow&\ds \card{S_x} \geq \frac{2^{p(n)-n}}{s(n)}.\\[4mm]
\end{array}
\]
%
  Therefore, letting \(k(n)=p(n)-n-\lceil\log_2 s(n)\rceil-2\), it follows,
  for all but finitely many \(x\in D\), that \(\card{S_x}\leq2^{k(n)-2}\) 
  and for all but finitely many \(x\not\in D\), that
  \(\card{S_x}\geq2^{k(n)+2}\)\@. 
%
\apar{Proof of Theorem 5-2}
  Now consider for a fixed instance \(x\) (for which the above
  implications are true) the random variable \(Z\) that for a uniformly
  chosen \(h\) in \(\Hash[p(n),k(n)]\) gives the number of
  strings \(r\in S_x\) for which \(h(r)=0^{k(n)}\)\@.  Then the expectation
  of \(Z\) is \(E(Z)=2^{-k(n)}\card{S_x}\) and the variance is
  \(V(Z)=2^{-k(n)}(1-2^{-k(n)})\card{S_x}<E(Z)\) \cite{cj99-04-31}\@.  Thus, by
  using Chebyshev's inequality it follows that
  {\footnotesize
  \begin{eqnarray*}
    x \in D &\Rightarrow& \Prob{}{Z\geq1} \leq E(Z)\leq 1/4, \\[2mm]
    x \not\in D &\Rightarrow& \Prob{}{Z=0} \leq \Prob{}{|Z-E(Z)| \geq E(Z)} \leq V(Z)/E(Z)^2<1/E(Z)\leq1/4.
  \end{eqnarray*}
  }
  The Arthur-Merlin protocol for \(\overline D\)
  proceeds as follows: On input \(x\), \(|x|=n\),
  Arthur randomly chooses a hash function \(h \in
  \Hash[p(n),k(n)]\) and asks Merlin to show him
  a string \(r\in S_x\) with \(h(r)=0^{k(n)}\)\@.
  Since Merlin succeeds with probability at
  least \(3/4\), if \(x \not\in D\), and with
  probability at most \(1/4\), otherwise, it
  follows that \(\overline{D}\in\AM\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Theorem 5}}
\end{alabsubtext}
%
\apar{4-11}
It is interesting to note that the above
proof can easily be extended to any \(\ComplexityClass{NP}\)
search problem \(\Rmu\), where \(\mu\) is
positive and \(1/\mu(x)\) can be {\em
  polynomially approximated} by an \(\FP\)
function \(f(x)\), i.e., for some constant
\(c\) and all \(x\), \(f(x)\leq 1/\mu(x)\leq
f(x)\cdot |x|^c\)\@.
%
\apar{4-12}
By combining Theorem~5 with the result that \(\ComplexityClass{NP}\) is
not contained in \(\co\AM\) unless the polynomial-time hierarchy
collapses to the second level \cite{cj99-04-09,cj99-04-28}, we obtain the
following corollary\@.
%
\begin{alabsubtext}{Corollary}{1}{} \label{mcg-am-cor}
  If there exists a Monte Carlo generator for some distributional
  \ComplexityClass{NP} search problem \((R,\must)\) whose domain is \ComplexityClass{NP}-complete,
  then \(\PH=\Sigma^P_2\)\@.
\end{alabsubtext}
%
\apar{4-13}
We end this section by considering the Las Vegas type of generators\@.
Define a \ComplexityClass{ZPP}\ search problem to be a {\ComplexityClass{RP}} search problem whose
domain is in {\ComplexityClass{ZPP}}\@. Then a Las Vegas generator can be constructed for
any \ComplexityClass{ZPP}\ search problem in a similar way as in the proof of
Proposition 2\@.
%
\begin{alabsubtext}{Proposition}{3}{}
  For every distributional {\ComplexityClass{ZPP}} search problem there exists a Las
  Vegas generator\@.
\end{alabsubtext}
%
\apar{4-14}
It is also interesting to observe that
some search problems that are not known
to be efficiently solvable have a Las
Vegas generator\@.  As an example, we
consider the prime factorization problem
under the standard distribution \(\must\)\@.
We assume that numbers are given in
binary, i.e., the string \(x\) is used to
describe the number (denoted by \(n_x\))
with the binary representation \(1x\)\@.
More precisely, let \(R_{\text{prime}}\)
denote the witness relation defined as
\[
R_{\text{prime}}(x,w)=
  \begin{cases} 1, & n_w\text{ is a prime factor
      of }n_x\text{ and }n_w<n_x,\\ 0, & \text{otherwise.}
  \end{cases}
\]
%
\begin{alabsubtext}{Proposition}{4}{}
  There exists a Las Vegas generator for the prime factorization
  problem \((R_{\text{prime}}, \must)\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{4}{}
\prooffontshape
\apar{Proof of Proposition 4-1}
  Recall that the domain of
  \(R_{\text{prime}}\) belongs to \(\ComplexityClass{ZPP}\)
  {\cite{cj99-04-02}}, i.e., there is a
  probabilistic polynomial-time
  algorithm \(A\) that on input of \(x\) either
  accepts (implying that \(n_x\) is prime)
  or rejects (implying that \(n_x\) is
  composite) or outputs ``?'' (with
  probability at most \(1/2\))\@.
  Furthermore, since the standard
  distribution \(\must\) is equivalent to
  some p-samplable distribution, there
  is a probabilistic Turing machine \(M\)
  that on input \(\lambda\) outputs \(x\)
  with probability \(\mu_M(x)\), where
  \(\must(x)/4\leq\mu_M(x)\leq4\must(x)\)
  (cf.~{\cite[Theorem~7]{cj99-04-03}})\@.
%
\apar{Proof of Proposition 4-2}
  Now consider the following generator \(G\):
  \begin{quote}
    With probability \(1/2\), execute step (1) or step (2)\@.
%
\apar{}
    (1) Choose randomly under \(\mu_M\) a string \(x\) in \(\Sigma^*\)\@. If
    \(A(x)\) accepts, then output \(\pair{x,\lambda}\)\@. Otherwise, loop
    forever\@.
%
\apar{}
    (2) Choose randomly and independently under \(\mu_M\) two strings
    \(y,w\in\Sigma^*\)\@. If \(A(w)\) accepts and if \(y\not=\lambda\), then
    output \(\pair{x,w}\), where \(1x\) is the binary representation of
    \(n_y\cdot n_w\)\@. Otherwise, loop forever\@.
  \end{quote}
  It is easy to see that \(G\) fulfills conditions (1) and (2) of
  Definition 2 as well as condition (2) of
  Definition 3\@. Furthermore, it is not hard to verify
  that the distribution \(\mu_G\) induced by \(G\) is equivalent to
  \(\must\) (i.e., \(G\) also fulfills Definition 2(3)),
  implying that \(G\) is a Las Vegas generator for \((R_{\text{prime}},
  \must)\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 4}}
\end{alabsubtext}
%
\apar{4-15}
On the other hand, Las Vegas generators can only exist for \ComplexityClass{NP}
search problems whose domain belongs to 
\(\ComplexityClass{NP}\cap\co{\ComplexityClass{NP}}\)\@.
%
\begin{alabsubtext}{Proposition}{5}{}
  If there exists a Las Vegas generator for an \(\ComplexityClass{NP}\) search problem
  \(R\) under the standard distribution \(\must\), then the domain of \(R\)
  is contained in \(\co{\ComplexityClass{NP}}\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{5}{}
\prooffontshape
  Let \(D\) be the domain of a search problem \(R\) and let \(G\) be a Las
  Vegas generator for \((R,\must)\) whose running time is bounded by
  some polynomial \(s\)\@. We show that \(\overline{D} \in \ComplexityClass{NP}\)\@.  Consider
  the following \ComplexityClass{NP} machine \(M\):
  \begin{quote}
    On input \(x\), simulate \(G\) for at most \(s(|\pair{x,\lambda}|)\)
    steps and accept if \(G\) outputs the pair \(\pair{x,\lambda}\)\@.
  \end{quote}
  Since \(\must(x)>0\) for all \(x\) and since
  \(G\) is a Las Vegas generator for
  \((R,\must)\), \(M\) accepts \(x\) if and only
  if \(x\in\overline{D}\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 5}}
\end{alabsubtext}
%
\begin{alabsubtext}{Corollary}{2}{}
  If there exists a Las Vegas generator
  for some distributional \ComplexityClass{NP} search
  problem \((R,\must)\) whose domain is
  \ComplexityClass{NP}-complete, then 
  \(\ComplexityClass{NP} =\co{\ComplexityClass{NP}}\)\@.
\end{alabsubtext}
%
\apar{4-16}
As a final remark, it is not hard to see that, for every set \(L\) that
has self-computable witnesses (in the sense of \cite{cj99-04-04,cj99-04-12}),
there is a Las Vegas generator using \(L\) as an oracle\@.  For example,
since graph isomorphism (GI) and graph automorphism (GA) have
self-computable witnesses and non-adaptively self-computable
witnesses, respectively 
(cf. \cite{cj99-04-27} and \cite[Lemmas~5.2 and~5.3]{cj99-04-24}), 
there exist Las Vegas generators for the
corresponding search problems that ask adaptive (non-adaptive) queries
to GI (GA, respectively)\@.
%

\asection{5}{Hard Problems for Test Instance Generation}
%
\apar{5-1}
In this section we consider the question
of how the test instance generation
problem for one \ComplexityClass{NP} search problem
\((S,\nu)\) can be reduced to the test
instance generation problem for another
\ComplexityClass{NP} search problem \((R,\mu)\)\@.  Inspired
by \cite{cj99-04-35}, we provide a way to
transform a Monte Carlo or Las Vegas
generator for \((S,\nu)\) to a generator for
any problem \((R,\mu)\) that reduces to
\((S,\nu)\) via the following kind of
reduction\@.
%
\begin{alabsubtext}{Definition}{4}{}\label{genred}
  Let \((R,\mu)\) and \((S,\nu)\) be distributional \ComplexityClass{NP} search problems\@.
  Let \(D_R\) (\(D_S\)) be the domain of \(R\) (\(S\), respectively)\@. Then \((R,\mu)\) is
  \hv{generator reducible} to \((S,\nu)\) if there exist functions \(f, g
  \in \FP\) and polynomials \(p,q\) such that:
  \begin{enumerate}
  \item For all \(y\) and \(r\), if \(f(y,r) \not= \bot\), then \(f(y,r) \in
    D_R\) if and only if \(y \in D_S\)\@.
  \item For all \(y\), \(v\), and \(r\), if \(f(y,r) \not= \bot\) and \(S(y,v)\),
    then \(R(f(y,r),g(y,v,r))\)\@.
  \item \(f\) is honest, {i.e.}, for all \(y,r\), if \(f(y,r) \not= \bot\)
    then \(p(|f(y,r)|)\geq |y|\)\@.
  \item \(\mu\) is equivalent to \(\phi\), where \(\phi\) is the
    distribution induced by \(\nu\), \(q\), and \(f\), {i.e.},
    \(\phi(x)=\sum_{y,r}\nu(y)2^{-|r|}\), where the sum ranges over all
    strings \(y,r\) such that \(|r|=q(|y|)\) and \(f(y,r)=x\)\@.
  \end{enumerate}
\end{alabsubtext}
%
Two distributional \ComplexityClass{NP} search problems
are called \hv{generator equivalent} if
they are generator reducible to each
other\@.
%
\apar{5-2}
In the sequel, we assume without loss of generality that \(f(y,r r') =
f(y,r)\) for all \(y,r,r'\) such that \(|r|=q(|y|)\), and that
\(g(y,\lambda,r) = \lambda\) for all \(y,r\)\@.
%
\begin{alabsubtext}{Proposition}{6}{}
  If \((R,\mu)\) is generator reducible to
  \((S,\nu)\) and if there exists a Monte
  Carlo (Las Vegas) generator for
  \((S,\nu)\), then there exists a Monte
  Carlo (Las Vegas, respectively) generator for
  \((R,\mu)\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{6}{}
\prooffontshape
\apar{Proof of Proposition 6-1}
  Assume that \((R,\mu)\) reduces to
  \((S,\nu)\) via the functions \(f\), \(g\) and
  the polynomials \(p, q\)\@.  Let \(G\) be a
  generator for \((S,\nu)\)\@. Consider the
  following algorithm \(G'\)\@.
  \begin{quote}
    Simulate \(G\)\@. If \(G\) produces a pair
    \(\pair{y,v}\), then randomly guess a
    string \(r \in \{0,1\}^{q(|y|)}\)\@. If
    \(f(y,r) \neq \bot\), then output the
    pair \(\pair{f(y,r), g(y,v,r)}\);
    otherwise, loop forever\@.
  \end{quote}
  We claim that \(G'\) is an {instance} generator for \((R,\mu)\) (of the
  same type as \(G\))\@. By Definition~4 we know that if \(G'\)
  outputs \(\pair{x,w}\), then either \(R(x,w)\) or \(w=\lambda\)\@. Let \(C(x)
  = \set{(y,r)}{f(y,r)=x, |r|=q(|y|)}\) and let \(t\) and \(t'\) be
  polynomials such that \(\mu_G(x)\geq \nu(x)/t(|x|)\) and \(\phi(x)
  \geq\mu(x)/t'(|x|)\), where \(\phi\) is the distribution induced by
  \(f\), \(q\), and \(\nu\)\@.  Then we have, for all \(x\),
  \begin{eqnarray*}
    \mu_{G'}(x) %
    &=   & \sum_{(y,r) \in C(x)} \mu_G(y) \, 2^{-|r|} \\ %
    &\geq& \sum_{(y,r) \in C(x)} \frac{1}{t(|y|)} \nu(y) \, 2^{-|r|} \\ %
    &\geq& \frac{1}{t(p(|x|))} \underbrace{\sum_{(y,r) \in C(x)} \nu(y) \,
      2^{-|r|}}_{\phi(x)} \\ %
    &\geq& \frac{1}{t(p(|x|)) \, t'(|x|)} \mu(x).
  \end{eqnarray*}
  Thus, \(\mu \pdom \mu_{G'}\), and since
  \(\mu_{G'} \pdom \mu\) can be derived in a
  similar way, \(\mu_{G'} \pdequiv \mu\)
  follows\@.  Furthermore, since \(f\) is
  honest, the time needed by \(G'\) to
  output a pair \(\pair{x,w}\) is
  polynomially bounded in \(|x|\)\@.
%
\apar{Proof of Proposition 6-2}
  Assume now that \(G\) is a Monte Carlo
  generator for \((S,\nu)\)\@. We have to show
  that for any polynomial \(t\) and almost
  all \(x\in D_R\), \(\mu_{G'}(x) \geq t(|x|)
  \cdot \mu_{G'}(x,\lambda)\)\@. Let \(s\) be a
  polynomial such that \(|f(y,r)|\leq
  s(|y|)\) for all \(y\) and \(r\),
  \(|r|=q(|y|)\)\@. Since \(G\) is a Monte Carlo
  generator, it holds for almost all \(y\in
  D_S\) that \(\mu_G(y) \geq t(s(|y|)) \cdot
  \mu_G(y,\lambda)\)\@. Then, for all
  sufficiently large \(x \in D_R\) it
  follows that
  \begin{align*}
    \mu_{G'}(x) %
    &= \sum_{(y,r) \in C(x)} \mu_G(y) \, 2^{-|r|} \\ %
    &\geq \sum_{(y,r) \in C(x)} t(s(|y|)) \, \mu_G(y,\lambda) \,
    2^{-|r|} \\ % 
    &\geq t(|x|) \sum_{(y,r) \in C(x)} \mu_G(y,\lambda) \, 2^{-|r|} \\ %
    &= t(|x|) \cdot \mu_{G'}(x,\lambda).
  \end{align*}
  Thus, it follows that \(G'\) is a Monte
  Carlo generator for \(\Rmu\)\@.
%
\apar{Proof of Proposition 6-3}
  In the case that \(G\) is a Las Vegas
  generator for \((S,\nu)\), it follows
  immediately from the properties of \(f\)
  and \(g\) that \(G'\) is a Las Vegas
  generator for \(\Rmu\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 6}}
\end{alabsubtext}
%
Another important property of the generator reducibility is transitivity\@.
%
\begin{alabsubtext}{Proposition}{7}{}
  The generator reducibility is transitive\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{7}{}
\prooffontshape
  Let \((R_i,\mu_i)\), \(i=1,2,3\), be
  distributional \ComplexityClass{NP} search problems\@.
  Let \(D_i\) be the domain of \(R_i\),
  \(i=1,2,3\)\@. Assume that \((R_i,\mu_i)\) is
  generator reducible to
  \((R_{i+1},\mu_{i+1})\) via the functions
  \(f_i, g_i \in \FP\) and the polynomials
  \(p_i, q_i\), \(i=1,2\)\@. Consider the
  functions \(f\) and \(g\) defined as
  \begin{align*}
    f(y,r_1,r_2) &= 
    \begin{cases}
      f_1(f_2(y,r_2),r_1), & f_2(y,r_2)
      \neq \bot,\\ %
      \bot, & \text{otherwise\@.}
    \end{cases} \\ %
    \intertext{and} 
    g(y,v,r_1,r_2) &=
    \begin{cases}
      g_1(f_2(y,r_2),g_2(y,v,r_2),r_1), & R_3(y,v), f(y,r_1,r_2) \neq \bot, \\ %  
      \lambda, & \text{otherwise}.
    \end{cases}
  \end{align*}
  Since \(f_1\) and \(f_2\) satisfy the
  conditions of Definition 4,
  it follows that \(f\) and \(g\) fulfill the
  following conditions for all \(y,r_1,r_2\)
  with \(f(y,r_1,r_2) \neq \bot\)\@.
  \begin{enumerate}
  \item \(\displaystyle y \in D_3
    \Leftrightarrow f_2(y,r_2) \in D_2
    \Leftrightarrow
    \underbrace{f_1(f_2(y,r_2),r_1)}_{f(y,r_1,r_2)}
    \in D_1\),
  \item \(\begin{array}[t]{lll} R_3(y,v)
      &\Rightarrow& R_2(f_2(y,r_2),
      g_2(y,v,r_2))\\[1mm] &\Rightarrow&
      R_1(\underbrace{f_1(f_2(y,r_2),r_1)}_{f(y,r_2
        r_1)}, \underbrace{g_1(f_2(y,r_2),
        g_2(y,v,r_2), r_1)}_{g(y,v,r_2
        r_1)}),
    \end{array}\)
  \item \(\displaystyle |y| \leq
    p_2(p_1(|f_1(f_2(y,r_2),r_1)|)) \leq
    p_2(|f_2(y,r_2)|)\), and
  \item \(\displaystyle \mu_1 \pdequiv
    \phi(x)\), where \(\phi\) is the
    distribution induced by \(f\), \(q\), and
    \(\mu_3\)\@.  Indeed, since \(|f_2(y,r_2)|\)
    is polynomially bounded in \(|y|\),
    there exists a polynomial \(q\) such
    that \(q_2(|y|) + q_1(|f_2(y,r_2)|)
    \leq q(|y|)\) for all \(y,r_2\) with
    \(f_2(y,r_2)\neq\bot\)\@. For a given \(x\),
    consider the following sets:
    \begin{align*}
      A(x) &=
      \set{(y,r_1,r_2)}{f(y,r_1,r_2)=x,
        |r_2|=q_2(|y|),
      |r_1|=q(|y|)-q_2(|y|)}, \\ %
    B(x) &=
    \set{(y,r_1,r_2)}{f(y,r_1,r_2)=x,
      |r_2|=q_2(|y|),
      |r_1|=q_1(|f_2(y,r_2)|)}, \\ %
    C_1(x) &= \set{(z,r_1)}{f_1(z,r_1)=x,
      |r_1|=q_1(|z|)}, \\ %
    C_2(z) &= \set{(y,r_2)}{f_2(y,r_2)=z,
      |r_2|=q_2(|y|)}.
    \end{align*} 
    For \(i=1, 2\), let \(\phi_i\) be the
    distribution induced by \(f_i\), \(q_i\), and
    \(\mu_{i+1}\)\@. Since \(\m