% The Chicago Journal of Theoretical Computer Science, Volume 1999, Article 4
% Published by MIT Press, Cambridge, Massachusetts USA
% Copyright 1999 by Massachusetts Institute of Technology
%
\newtoks\rstyle
\rstyle={cjlook}
%
\documentclass[v1.1,published]{cjstruct}
%
% Local style resources for this article.
%
\usestyleresource{amstext}
\usepackage{amssymb}
\usepackage{amsmath}
%
% Local macro definitions for this article.
%
\catcode`\@=11
\ifx\nopagebreakbeginpar\undeftex
\newcommand{\nopagebreakbeginpar}{\@beginparpenalty10000}
\fi
\ifx\nopagebreakendpar\undeftex
\newcommand{\nopagebreakendpar}{\@endparpenalty10000}
\fi
\ifx\nopagebreakinteritem\undeftex
\newcommand{\nopagebreakinteritem}{\@itempenalty10000}
\fi
\catcode`\@=12
%
\newcommand{\prooffontshape}{\fontshape{n}\selectfont}
\newcommand{\card}[1]{\ensuremath{\left\| #1 \right\|}}
\newcommand{\Prob}[2]{\ensuremath{\text{\rm Prob}_{#1} \! \left[ #2 \right]}}
\newcommand{\pair}[1]{\langle #1 \rangle}
\newcommand{\set}[2]{\mbox{\(\{ #1 \; | \; #2 \}\)}}
\newcommand{\CompStyle}{\rm}
\newcommand{\ComplexityClass}[1]{\ensuremath{\text{\CompStyle #1}}}
% \newcommand{\ComplexityClass{RP}}{\ComplexityClass{RP}}
\newcommand{\AM}{\ComplexityClass{AM}}
\newcommand{\NE}{\ComplexityClass{NE}}
\newcommand{\RE}{\ComplexityClass{RE}}
\newcommand{\PH}{\ComplexityClass{PH}}
\newcommand{\FP}{\ComplexityClass{FP}}
\newcommand{\TALLY}{\ComplexityClass{TALLY}}
\newcommand{\pdom}{\ensuremath{\preceq}}
\newcommand{\pdequiv}{\ensuremath{\equiv}}
\newcommand{\co}[1]{\text{\textnormal{co-}#1}}
\newcommand{\generator}{instance generator}
\newcommand{\Rmu}[1][]{\ensuremath{(R_{#1},\mu_{#1})}}
\newcommand{\Cite}[1]{{\rm \cite{#1}}}
\newcommand{\Hash}[1][m,k]{\ensuremath{{\cal H}_{#1}}}
\newcommand{\hv}[1]{\textnormal{\textsf{#1}}}
\newcommand{\ds}{\displaystyle}
\newcommand{\must}{\ensuremath{\mu_{\text{st}}}}
\newcommand{\DCM}{\ensuremath{\text{\textnormal{DCM}}}}
\newcommand{\muDCM}{\ensuremath{\mu_{\DCM}}}
\newcommand{\RDCM}{\ensuremath{(R_{\DCM},\muDCM)}}
\newcommand{\GF}{\ensuremath{\text{\textnormal{GF}}}}
\newcommand{\muGF}{\ensuremath{\mu_{\GF}}}
\newcommand{\RGF}{\ensuremath{(R_{\GF},\muGF)}}
\def\registered{\({}^{\ooalign%
{\hfil\raise.07ex\hbox{{\tiny\textup{R}}}%
\hfil\crcr\scriptsize\mathhexbox20D\cr}}\)}
%
% end of local macro definitions for this article
%
\begin{document}
%
\begin{articleinfo}
%
\publisher{The MIT Press}
%
\publishercomment{%
{\Large
\begin{center}
Volume 1999, Article 4\\
\textit{25 March 1999}
\end{center}
}
\par\noindent
ISSN 1073--0486\@. MIT Press Journals, Five Cambridge Center, Cambridge, MA
02142-1493 USA; (617)253-2889;
\emph{journals-orders@mit.edu, journals-info@mit.edu}\@.
Published one article at a time in \LaTeX\ source form on the
Internet\@. Pagination varies from copy to copy\@. For more
information and other articles see:
\begin{itemize}
\item \emph{http://mitpress.mit.edu/CJTCS/}
\item \emph{http://www.cs.uchicago.edu/publications/cjtcs/}
\item \emph{ftp://mitpress.mit.edu/pub/CJTCS}
\item \emph{ftp://cs.uchicago.edu/pub/publications/cjtcs}
\end{itemize}
\sentence
\par\noindent
The \emph{Chicago Journal of Theoretical Computer Science} is
abstracted or indexed in \emph{Research Alert,\registered
SciSearch,\registered Current Contents\registered/Engineering
Computing \& Technology}, and \emph{CompuMath Citation Index\@.\registered}
}
%
\copyrightstmt{%
\copyright 1999 The Massachusetts Institute of Technology\@.
Subscribers are licensed to use journal articles in a variety of
ways, limited only as required to insure fair attribution to authors
and the journal, and to prohibit use in a competing commercial
product\@. See the journal's World Wide Web site for further
details\@. Address inquiries to the Subsidiary Rights Manager, MIT
Press Journals; (617)253-2864;
\emph{journals-rights@mit.edu}\@.\pagebreak[2]
}
%
\journal{Chicago\nolinebreak[1] Journal of Theoretical Computer\nolinebreak[1] Science}
\journalcomment{%
The \emph{Chicago Journal of Theoretical Computer Science}
is a peer-reviewed scholarly journal in theoretical computer
science\@. The journal is committed to providing a forum for
significant results on theoretical aspects of all topics in computer
science\@.
\par
\begin{trivlist}
\item[\emph{Editor-in-Chief:}] Janos Simon
\item[\emph{Consulting Editors:}]
Joseph Halpern, Stuart A.\ Kurtz, Raimund Seidel
\item[\emph{Editors:}]
\begin{tabular}[t]{lll}
Martin Abadi & Greg Frederickson & John Mitchell \\
Pankaj Agarwal & Andrew Goldberg & Ketan Mulmuley \\
Eric Allender & Georg Gottlob & Gil Neiger \\
Tetsuo Asano & Vassos Hadzilacos & David Peleg \\
Laszl\'o Babai & Juris Hartmanis & Andrew Pitts \\
Eric Bach & Maurice Herlihy & James Royer \\
Stephen Brookes & Ted Herman & Alan Selman \\
Jin-Yi Cai & Stephen Homer & Nir Shavit \\
Anne Condon & Neil Immerman & Eva Tardos \\
Cynthia Dwork & Howard Karloff & Sam Toueg \\
David Eppstein & Philip Klein & Moshe Vardi \\
Ronald Fagin & Phokion Kolaitis & Jennifer Welch \\
Lance Fortnow & Stephen Mahaney & Pierre Wolper \\
Steven Fortune & Michael Merritt
\end{tabular}
\vspace{1ex}
\item[\emph{Managing Editor:}] Michael J.\ O'Donnell
\vspace{1ex}
\item[\emph{Electronic Mail:}] \emph{chicago-journal@cs.uchicago.edu}
\end{trivlist}
\sentence
}
%
\bannerfile{cjtcs-banner.tex}
%
\volume{1999}
%
\articleid{4}
%
\selfcitation{
@article{cj99-04,
author={Christoph Karg and Johannes K\"obler and Rainer Schuler},
title={The Complexity of Generating Test Instances},
journal={Chicago Journal of Theoretical Computer Science},
volume={1999},
number={4},
publisher={MIT Press},
month={March},
year={1999}
}
}
%
\begin{retrievalinfo}
\end{retrievalinfo}
%
\title{The Complexity of Generating Test Instances\footnote{
An extended abstract of the paper was presented at the Fourteenth
Symposium on Theoretical Aspects of Computer Science (STACS),
Springer-Verlag, \emph{Lecture Notes in Computer Science} 1200,
pp. 375--386, 1997\@.}
}
\shorttitle{Complexity of Generating Test Instances}
%
\editorcomment{%
\samepage%
This paper is a contribution to a special issue on the 1996
Dagstuhl workshop on "Structure and Complexity" by Eric Allender\@.
}
%
\begin{authorinfo}
%
% Correction to error in cjstruct.sty version 1.1.
%
\def\fixversion{1.1}
\ifx\cjversion\fixversion
\renewcommand{\printname}[1]{
\global\authorstring={#1}
\global\authorstrue
\ifinfo\item Printed name: #1\fi
}
\fi
%
% End of correction.
%
\printname{Christoph Karg}
\collname{}{Karg, Christoph}
\begin{institutioninfo}
\instname{Universit\"at Ulm}
\department{Abteilung Theoretische Informatik}
\address{}{}{}{89069 Ulm}{Germany}
\end{institutioninfo}
\email{?@?}
\end{authorinfo}
%
\begin{authorinfo}
\printname{Johannes K\"obler}
\collname{}{K\"obler, Johannes}
\begin{institutioninfo}
\instname{Universit\"at Ulm}
\department{Abteilung Theoretische Informatik}
\address{}{}{}{89069 Ulm}{Germany}
\end{institutioninfo}
\email{?@?}
\end{authorinfo}
%
\begin{authorinfo}
\printname{Rainer Schuler}
\collname{}{Schuler, Rainer}
\begin{institutioninfo}
\instname{Universit\"at Ulm}
\department{Abteilung Theoretische Informatik}
\address{}{}{}{89069 Ulm}{Germany}
\end{institutioninfo}
\email{schuler@informatik.uni-ulm.de}
\end{authorinfo}
%
\shortauthors{Karg, K\"obler, and Schuler}
%
\support{
Christoph Karg's work was supported by the Deutsche
Forschungsgemeinschaft, Research Grant Sch{\"o} 302/4-2\@.
}
%
\begin{editinfo}
\submitted{00}{00}{1998}\reported{00}{00}{l998}\published{25}{03}{1999}
\end{editinfo}
%
\end{articleinfo}
%
\begin{articletext}
\begin{abstract}
\apar{Abstract-1}
Recently, Watanabe proposed a framework for testing the correctness
and average-case performance of algorithms that purport to solve
a given \ComplexityClass{NP} search problem efficiently on average with respect
to some distribution on the instances\@. The idea is to randomly
generate certified instances under some distribution that resembles
the input distribution\@. Watanabe showed that unless \(\RE=\NE\),
test instances cannot be generated for some \ComplexityClass{NP} search problems\@. We
further discuss Watanabe's approach and show, as an upper bound, that
test instances can be generated for every \ComplexityClass{NP} search problem with
non-adaptive queries to an \ComplexityClass{NP} oracle\@.
%
\apar{Abstract-2}
We also introduce Las Vegas and Monte Carlo types of test instance
generators and show that these generators can be used to find out
(with high confidence) whether an algorithm is correct and efficient
on average\@. It is shown that Monte Carlo generators can be easily
constructed for all \(\ComplexityClass{RP}\) search problems and that Las Vegas
generators exist for all \(\ComplexityClass{ZPP}\) search problems as well as for
other problems such as prime factorization\@. On the other hand, we prove that Monte
Carlo generators can only exist for problems in \(\ComplexityClass{NP} \cap \co\AM\)\@.
\end{abstract}
%
\asection{1}{Introduction}
%
\apar{1-1}
The class \ComplexityClass{NP} is one of the most intensively studied classes in
computational complexity theory, for it contains a large variety of
problems that are of practical importance\@. There are two types of
problems that we are interested in, namely decision problems and
search problems\@. Solving a decision problem means to decide whether a
given instance has a solution, and solving a search problem means to
actually find a solution\@. Although, at first glance, search problems
seem more difficult to solve than decision problems, in many cases,
they are equivalent under polynomial-time Turing reductions\@. For
example, since any \ComplexityClass{NP} search problem is polynomial-time Turing
reducible to any \ComplexityClass{NP}-complete (decision) problem, it follows that
\ComplexityClass{NP}-complete problems have \emph{self-computable witnesses} (in the
sense of \cite{cj99-04-04,cj99-04-12}), meaning that solutions can be efficiently
computed by asking oracle queries to the corresponding decision
problem\@.
%
\apar{1-2}
Since polynomial-time algorithms do not exist for \ComplexityClass{NP}-complete
problems unless \(\ComplexityClass{P}=\ComplexityClass{NP}\), it seems reasonable to look for algorithms
that are \emph{efficient in the average case} \cite{cj99-04-25} (see
\cite{cj99-04-16} or \cite{cj99-04-34} for a survey)\@. In practical
applications, instances occur with certain probabilities\@. To model
this process, we use probabilistic Turing machines that output
instances in time polynomial in the length of the generated instances
\cite{cj99-04-03}\@.
%
\apar{1-3}
Besides designing efficient algorithms it is also important to find
ways of testing the correctness (and performance) of a given
algorithm\@. Ideally, one would like to give a proof that the algorithm
is correct and efficient\@. However, in some cases, a mathematical
proof may be hard to obtain, or the proof may be long and complex\@.
Thus, in practical terms, it is reasonable to test algorithms by
feeding them with carefully chosen instances for which the solutions
are already known\@. An obvious approach is to use a probabilistic
polynomial-time bounded algorithm that generates instances of the
problem together with some solution\@. To be useful, these test
instances should be generated according to some distribution that
resembles the frequencies with which the instances occur in practice\@.
%
\apar{1-4}
In this paper our objective is to verify
the ``global correctness'' of a given
algorithm \(A\) in the sense that, on a
randomly selected input (according to
the underlying distribution on the
instances), the output of \(A\) is correct
with high probability\@. This should be
seen in contrast to the program checking
approach in \cite{cj99-04-10}, where the
outcome of the algorithm on a single
instance has to be verified\@. Watanabe
\cite{cj99-04-35} introduced a framework for
globally checking the correctness and
average-case behavior of algorithms\@.
Intuitively, a generator for an \ComplexityClass{NP}
search problem only needs to solve the
problem on instances that are (randomly)
\emph{generated by the generator itself},
whereas an algorithm has to solve the
problem on instances that are (randomly)
\emph{generated ``from outside\@.''} Thus,
for some \ComplexityClass{NP} search problems it might
be easier to find a generator than to
find an algorithm that solves it\@. For
example, it is not hard to construct a
test instance generator for \emph{prime
factorization} under the standard
distribution (which is uniform on each
input length)\@. On the other hand, no
efficient on-average algorithm is known
that on input of a binary number \(n\),
either outputs a factor of \(n\) in case
\(n\) is composite, or outputs ``prime''
otherwise\@. In fact, this is the basis
for the practical security of, for example, the
RSA cryptosystem\@. Also, for certain
random self-reducible problems
\cite{cj99-04-30,cj99-04-01} it is possible to
generate instances (along with some
solution) according to the distribution
induced by the self-reduction\@. Examples
are the \emph{quadratic residue} and the
\emph{discrete logarithm} problems\@.
%
\apar{1-5}
A test instance generator, as introduced
by Watanabe, is a probabilistic
algorithm \(G\) that generates instances
along with a solution; in particular,
\(G\) only produces \emph{positive
instances}\@. Moreover, on input \(0^n\),
\(G\) is required to output each positive
instance of length \(n\) with sufficiently
large probability (compared with the
\emph{conditional} distribution on the
set of all positive instances of length
\(n\))\@. As shown by Watanabe, there exist
\ComplexityClass{NP} search problems for which test
instances cannot be generated unless
\(\RE=\NE\)\@. In fact, since for certain
distributions the generator has to
amplify the probability of the positive
instances by an exponential factor, we
can show that this is even true for very
simple search problems\@.
%
\apar{1-6}
In our model, a generator may produce
positive as well as negative instances\@.
As in Watanabe's approach, we require
that the generator \(G\) output a witness
along with every positively classified
instance, i.e., \(G\) never misclassifies
a negative instance\@. Moreover, each
instance \(x\) has to be generated with a
probability that is polynomially related
to the probability \(\mu(x)\) with which
\(x\) occurs in practice\@. Thus, \(G\) is not
required to amplify the probability of
the positive instances\@. Furthermore,
this condition implies that any
algorithm is efficient on the generated
instances if and only if it is efficient
under the given distribution \(\mu\)\@.
%
\apar{1-7}
We define two types of test instance
generators in this paper, namely, a
Monte Carlo generator and a Las Vegas
generator\@. While a Monte Carlo
generator is allowed to misclassify a
(positive) instance with small
probability, a Las Vegas generator has
to classify all generated instances
correctly\@. We show how these generators
can be used to decide efficiently (and
with high confidence) whether a given
algorithm \(A\) for an \ComplexityClass{NP} search
problem is correct with respect to
\(\mu\)\@. Further, we describe how to
construct Monte Carlo generators for all
\(\ComplexityClass{RP}\) search problems as well as Las
Vegas generators for all \(\ComplexityClass{ZPP}\) search
problems\@.
%
\apar{1-8}
In general, whereas logarithmically many
\emph{adaptive} queries are useless, a
polynomial number of \emph{non-adaptive}
queries to an \ComplexityClass{NP} set turn out to be
sufficient to generate test instances
for any \ComplexityClass{NP} search problem\@. This
implies, in particular, that under the
assumption \(\ComplexityClass{NP}=\ComplexityClass{RP}\), any distributional
\ComplexityClass{NP} search problem has a test instance
generator\@.
%
\apar{1-9}
Further, by an application of the
universal hashing technique, we show
that the domain of any search problem
for which a Monte Carlo generator exists
(under the standard probability
distribution) necessarily belongs to
{\co\AM}\@. Similarly, Las Vegas
generators can only exist for \ComplexityClass{NP}
search problems whose domain belongs to
\(\ComplexityClass{NP}\cap\co{\ComplexityClass{NP}}\)\@.
As a consequence, Monte
Carlo or Las Vegas generators do not
exist for search problems with an
\ComplexityClass{NP}-complete domain, unless the
polynomial-time hierarchy collapses\@.
%
\apar{1-10}
Finally, we define a reducibility between \ComplexityClass{NP} search problems that,
similar to the reducibility introduced by Levin \cite{cj99-04-25,cj99-04-07},
preserves the probabilities of the instances\@. Via this reducibility
we can obtain a test instance generator for a given \ComplexityClass{NP} search
problem provided that we already have a test instance generator for
some \ComplexityClass{NP} search problem to which the given one reduces\@.
%
\asection{2}{Preliminaries}
%
\apar{2-1}
In this paper we use the standard notations and definitions of
computational complexity theory (see, for example, \cite{cj99-04-06})\@. An
introduction to the theory of computational average-case complexity
can be found in \cite{cj99-04-16,cj99-04-03,cj99-04-34}\@. Given below are some of the
basic definitions we will use\@.
%
\apar{2-2}
All languages considered here are over the alphabet \(\Sigma=\{0,1\}\)\@.
The length of a string \(x\in\Sigma^*\) is denoted by \(|x|\)\@. For a set
\(A\) of strings, let \(A^{=n}=A\cap\Sigma^n\) and \(A^{\leq
n}=\bigcup_{k=0}^n A^{=k}\)\@. We denote the cardinality of a finite
set \(A\) by \(\card{A}\), and we write \(A(x)=1\), if \(x\in A\), and \(A(x)=0\)
otherwise\@. The \hv{pairing function} \(\pair{\cdot,\cdot} : \Sigma^*
\times \Sigma^* \to \Sigma^*\) is defined as \(\pair{x,y}=d(x)01y\),
where \(d(x_1 \dots x_{n})=x_1 x_1 \dots x_{n} x_{n}\)\@. Note that
\(\pair{x,y}\) is computable and invertible in polynomial time, and for
all \(x,y \in \Sigma^*\), \(|\pair{x,y}| = 2|x|+|y|+2\)\@. \smallskip
%
\apar{2-3}
\noindent \textbf{Probability functions and distributions\@.}
Let \(\mu\) be a probability distribution on \(\Sigma^*\)\@. Associated with
\(\mu\) are a \hv{probability function} that we also denote by \(\mu\) and
a \hv{distribution function}, denoted by \(\mu^*\)\@. More formally, \(\mu\)
and \(\mu^*\) are functions from \(\Sigma^*\) into the interval \([0,1]\)
such that \(\sum_x\mu(x)=1\) and \(\mu^*(x)=\sum_{y\leq x}\mu(y)\), where,
as usual, \(\leq\) denotes the lexicographic ordering on \(\Sigma^*\)\@. For
any set \(X \subseteq \Sigma^*\) let \(\mu(X)= \sum_{x \in X} \mu(x)\)\@.
We refer to the distribution with the probability function \(\must(x) =
1/(|x| (|x|+1) 2^{|x|})\) as the \hv{standard distribution}\@.
%
\apar{2-4}
Let \(\mu,\nu:\Sigma^*\to [0,1]\) be functions\@. We say that \(\mu\) is
\hv{polynomially dominated} by \(\nu\) (in symbols: \(\mu \pdom \nu\))
{\cite{cj99-04-25}} if, for some polynomial \(p\) and all \(x\), \(\mu(x) \leq
p(|x|) \cdot\nu(x)\)\@. In case \(\mu \pdom \nu\) and \(\nu \pdom \mu\), the
distributions \(\mu\) and \(\nu\) are called \hv{equivalent} (in symbols:
\(\mu \pdequiv \nu\)) {\cite{cj99-04-32}}\@.
A function \(f\) from \(\Sigma^*\) to the
non-negative integers is \hv{polynomial on
\(\mu\)-average} \cite{cj99-04-25} if there
exists a constant \(\epsilon>0\) such that
\[
\sum_{x\neq\lambda}\frac{f^{\epsilon}(x)}{|x|}\cdot\mu(x)\;\;<\;\;\infty.
\]
The set of functions that are polynomial on \(\mu\)-average is closed
under addition and multiplication\@. Furthermore, if \(\mu\) is dominated
by \(\nu\), then every function that is polynomial on \(\nu\)-average is
polynomial on \(\mu\)-average {\cite{cj99-04-25,cj99-04-16}}\@.
%
\apar{2-5}
A distribution is called
\hv{p-computable} if its distribution
function \(\mu^*\) is polynomial-time
computable in the sense of Ko and
Friedman \cite{cj99-04-21}, i.e., there exists
a polynomial time bounded deterministic
Turing transducer \(M\) such that, for all
\(x\) and all \(k\), it holds that
\(|M(x,1^k)-\mu^*(x)|\leq 2^{-k}\)\@. A
distribution is called \hv{samplable}
\cite{cj99-04-03} if there exists a
probabilistic Turing machine \(M\) that on
input \(\lambda\) outputs \(x\) with
probability \(\mu(x)\)\@. If, in addition,
\(M\) is polynomial-time bounded in the
length of the output, then \(\mu\) is
called \hv{p-samplable}\@. It is easy to
see that the standard distribution
\(\must\) is \allowbreak {p-computable}\@. Furthermore,
every p-computable distribution is
dominated by some p-samplable
distribution {\cite[Theorem~7]{cj99-04-03}}\@.
%
\apar{2-6}
\noindent
\textbf{Distributional NP search
problems\@.} An \ComplexityClass{NP} search problem is
specified by a binary polynomial-time
decidable \hv{witness relation} \(R\) and
a polynomial \(q_R\) such that \(R(x,w)\)
implies \(|w| = q_R(|x|)\)\@. Any \(w\) for
which \(R(x,w)\) holds is called a
\hv{solution} (witness) for \(x\)\@. An
instance \(x\) is called a \hv{positive
instance} in case it has a solution,
and a \hv{negative instance}, otherwise\@.
We assume without loss of generality
that \(\lambda\) is never a solution\@. A
witness relation \(R\) also specifies an
\(\ComplexityClass{NP}\) decision problem, namely
\(D_R=\set{x\in\Sigma^*}{\exists w:
R(x,w)}\)\@. \(D_R\) is also called the
\hv{domain} of \(R\)\@. In case
\(D_R=\Sigma^*\), \(R\) is called a
\hv{total} \ComplexityClass{NP} search problem\@.
%
\apar{2-7}
A \hv{distributional NP search problem} is a pair {\Rmu} consisting of
a witness relation \(R\) and a p-samplable probability function \(\mu\)\@.
A (deterministic) algorithm \(A\) efficiently solves {\Rmu} if the
running time of \(A\) is polynomial on \(\mu\)-average and \(A\) computes a
solution for all inputs \(x\) in the domain of \(R\), whereas it outputs
\(\lambda\) on all other inputs\@.
\asection{3}{Generating Positive Test Instances}
%
\apar{3-1}
As pointed out in the introduction, the aim of test instance
generation is to verify the quality (correctness and performance) of a
given algorithm\@. In this section we will discuss Watanabe's approach
to the test instance generation problem\@. Specifically, Watanabe
requires the following properties of a test instance generator \(G\) for
a distributional \ComplexityClass{NP} search problem {\Rmu}: \(G\) is a probabilistic
polyno\-mial-time algorithm, which, on input of \(0^n\), either outputs
\(\bot\) (``not successful'') or generates some instance \(x\) of length
\(n\) together with a solution \(w\) for \(x\)\@. Moreover, \(G\) has to output
each positive instance with non-negligible probability compared with
the conditional probability function \(\rho_n\) on the positive
instances of length \(n\)\@. In particular, \(G\) is not allowed to output
any negative instance, and hence, it is more appropriate to speak of
\(G\) as a generator for the \emph{promised} \(\ComplexityClass{NP}\) search problem
restricted to the positive instances\@.
%
\begin{alabsubtext}{Definition}{1}{Test Instance Generator \Cite{cj99-04-35}} \label{tig-def}
Let {\Rmu} be a distributional \ComplexityClass{NP} search problem and let \(D\) be
the domain of \(R\)\@. A probabilistic polynomial-time Turing machine
\(G\) is called a \hv{test instance generator} for {\Rmu} if there is
a polynomial \(q\) such that for all \(n\),
%
\begin{enumerate}
\item on every path, \(G(0^n)\) outputs either \(\bot\) or a pair
\(\pair{x,w}\) such that \(R(x,w)\) and \(x \in \Sigma^n\), and
\item if \(\mu(D^{=n})>0\), then for every \(x\in D^{=n}\), \(G(0^n)\)
outputs with probability at least \(\rho_n(x)/q(n)\) a pair of the
form \(\pair{x,w}\), \(w\in\Sigma^*\)\@. Here, \( \rho_n(x) =
{\mu(x)}/{\mu(D^{=n})}\)
%if \(x \in \)
is the probability of \(x\) under the
condition that \(x\) belongs to
\(D^{=n}\)\@.
\end{enumerate}
%
\end{alabsubtext}
%
\apar{3-2}
As shown by Watanabe, generators of this type can be used to detect,
for a given input length \(n\) (in expected time polynomial in \(n\) and
\(1/\varepsilon\)), that an algorithm makes an error, provided that errors
occur with probability \(\varepsilon\) (with respect to \(\rho_n\))\@.
Furthermore, they can be used to find out that an algorithm is not
efficient (with respect to \(\rho_n\)) \cite{cj99-04-35}\@. More precisely, if
\(G\) is a test instance generator for a search problem \((R,\mu)\), then
the quality of a given algorithm \(A\) with respect to \(\rho_n\) can be
verified as follows\@.
%
\begin{enumerate}
\item If \(A\) makes an error on strings of length \(n\) with probability
at least \(\varepsilon\), then with probability at least
\(\varepsilon\), \(A\) makes at least one error on a polynomial-size
sample \(S\) generated by \(G\), i.e., the test instances in \(S\) are
obtained by running \(G(0^n)\) a polynomial number of times\@.
\item If \(A\) needs more than \(n^k\) steps on \(\Sigma^n\) with
probability at least \(\varepsilon\), then with probability at least
\(\varepsilon\), \(A\) needs more than \(n^k\) steps on some input from a
polynomial-size sample \(S\) generated by \(G\)\@.
\end{enumerate}
%
We note that, since in Definition 1 the probability that
\(G(0^n)\) outputs \(\pair{x,w}\) is not bounded from above, an algorithm
\(A\) may take exponential time on average under the distribution
induced by the generator even if \(A\) is polynomial on \(\rho\)-average\@.
In fact, it may happen that \(G\) produces with high probability
instances that have probability zero with respect to \(\rho\)\@.
%
\apar{3-3}
The following result, due to Watanabe, shows that
it is unlikely that there exists a test instance
generator for every distributional \ComplexityClass{NP} search
problem\@.
%
\begin{alabsubtext}{Theorem}{1}{\cite{cj99-04-35}}\label{wat-thm}
If every \ComplexityClass{NP} search problem has a test
instance generator under the standard
distribution, then \(\RE=\NE\)\@.
\end{alabsubtext}
%
In fact, the proof shows that randomly
computing instance/witness pairs for all \ComplexityClass{NP} search problems with a tally
domain is impossible, unless \(\RE=\NE\)\@.
Similarly, it can be seen that there are
total \ComplexityClass{NP} search problems (i.e., with
domain \(\Sigma^*\)) that do not have test
instance generators under the standard
distribution, unless
\(\NE\cap\co{\NE}\subseteq\RE\)\@. This
shows that generating test instances for
an \ComplexityClass{NP} search problem for which
decision is easy might nevertheless be
hard\@.
%
\apar{3-4}
We now show that generating test instances
in Watanabe's model may be impossible even
for trivial search problems since, for some
distributions, it is necessary to amplify
the probability of the positive instances
of fixed length by an exponential factor\@.
%
\begin{alabsubtext}{Theorem}{2}{} \label{sparse-thm}
Let \(R=\set{\pair{x,1}}{x \in \Sigma^*-\{1\}^*}\)\@. If for every
p-samplable distribution \(\mu\) there exists a test instance
generator for \((R,\mu)\), then \(\RE=\NE\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Theorem}{2}{}
\prooffontshape
\apar{Proof of Theorem 2-1}
Since
\(\RE=\NE\) if and only if \(\ComplexityClass{NP} \cap \TALLY \subseteq \ComplexityClass{RP}\)
\cite{cj99-04-13}, it suffices to show that the assumption implies that
every tally \ComplexityClass{NP} set \(T\) is contained in \ComplexityClass{RP}\@. Let \(R_T\) be a
binary relation and \(p\) be a polynomial such that for all \(n\),
\(0^n\in T\) if and only if there exists a string \(w\in\Sigma^{p(n)}\)
with \(R_T(0^n,w)\)\@.
%
\apar{Proof of Theorem 2-2}
Now consider the p-samplable distribution \(\mu\) generated by the
following probabilistic algorithm:
\begin{quote}
First, guess a positive integer \(n\) with
probability proportional to \(n^{-2}\)\@. Then,
uniformly guess a string \(w \in
\Sigma^{p(n)}\) and output \(w\) in case
\(R_T(0^n,w)\) holds; else, output \(1^{p(n)}\)\@.
\end{quote}
Then any test instance generator \(G\) for \((R,\mu)\) has the property
that, if \(0^n\in T\), then either \(R_T(0^n,1^{p(n)})\) holds or
\(G(0^{p(n)})\) outputs with probability \(1/n^{O(1)}\) a pair
\(\pair{w,1}\) for which \(R_T(0^n,w)\) holds\@. Note that, in the latter
case, \(\rho_{p(n)}\{w\in\Sigma^{p(n)}\mid R_T(0^n,w)\}=1\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Theorem 2}}
\end{alabsubtext}
%
\apar{3-5}
An interesting question is whether the converse of
Theorem~1 (or of Theorem~2) is also true\@.
As shown below, non-adaptive queries to an \ComplexityClass{NP} set are sufficient to
generate test instances for any distributional \ComplexityClass{NP} search problem\@.
This implies in particular that under the assumption \(\ComplexityClass{NP}=\ComplexityClass{RP}\), any
distributional \ComplexityClass{NP} search problem has a test instance generator\@.
%
\apar{3-6}
As shown in the following proposition, \(O(\log n)\) many queries to any
oracle are not helpful for a generator in Watanabe's model\@.
%
\begin{alabsubtext}{Proposition}{1}{}
If, for a distributional \ComplexityClass{NP} search problem
{\Rmu}, test instances can be generated with
\(O(\log n)\) queries to some oracle, then there
exists a test instance generator for {\Rmu}\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{1}{}
\prooffontshape
Let \(M\) be a probabilistic oracle Turing machine that generates test
instances by asking \(O(\log n)\) queries to some oracle\@. Let \(M'\)
simulate \(M\) where each oracle query is answered by a fair coin
toss\@. If \(M\) outputs a pair \(\pair{x,w}\), then \(M'\) outputs
\(\pair{x,w}\) only if \(R(x,w)\) holds, and outputs \(\bot\) otherwise\@.
Then the probability that all oracle answers are guessed correctly
is \(1/n^{O(1)}\), implying that \(M'\) still fulfills the conditions of
Definition~1\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 1}}
\end{alabsubtext}
%
\apar{3-7}
The upper bound on the complexity of generating test instances that we
will give in Theorem~3 below is based on the
universal hashing technique \cite{cj99-04-14,cj99-04-29,cj99-04-31}\@. Let \Hash\ be the
class of all linear hash functions from \(\Sigma^m\) to \(\Sigma^k\)\@. The
following lemma is straightforward ({cf.}
% \cite[page 450]{cj99-04-26})\@.
\cite{cj99-04-26})\@.
%
\begin{alabsubtext}{Lemma}{1}{} \label{hash-lemma}
Let \(S\) be a subset of \(\Sigma^m\) of cardinality \(\card{S}=s\) such
that \(2^{k}\leq3s\leq 2^{k+1}\) and let \(x\in S\)\@. Then, for a
uniformly chosen hash function \(h\) from \(\Hash\), it holds with
probability at least \(2/(9s)\) that \(x\) is the only element in \(S\)
with \(h(x)=0^k\)\@. Hence, with probability at least \(2/9\), there exists
a unique \(x\in S\) with \(h(x)=0^k\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Lemma}{1}{}
\prooffontshape
Since \(\Hash\) is a universal class of hash
functions, it follows for a uniformly chosen
\(h\) that \(\Prob{}{h(x)=0^k}=2^{-k}\)
and that
%
\begin{eqnarray*}
\lefteqn{\Prob{}{h(x)=0^k\wedge \exists y\in
S-\{x\}: h(y)=0^k}}\\ &\leq&\sum_{y\in
S-\{x\}} \Prob{}{h(x)=0^k\wedge
h(y)=0^k}\;<\; s2^{-2k}.
\end{eqnarray*}
%
Consequently, the probability that \(x\) is the only pre-image of
\(0^k\) in \(S\) can be calculated as follows\@.
\[
\begin{array}{lcl}
\lefteqn{\Prob{}{h(x)=0^k\wedge \forall y\in
S-\{x\}: h(y)\not=0^k}}\\
&=&\Prob{}{h(x)=0^k}- \Prob{}{h(x)=0^k\wedge
\exists y\in S-\{x\}: h(y)=0^k}\\
&>&2^{-k}(1-s2^{-k})\\
&\geq&2/(9s),\text{ since }s2^{-k}(1-s2^{-k})\geq2/9\text{ for }1/3\leq s2^{-k}\leq2/3.
\end{array}\vspace*{-4ex} \]
{\nopagebreakbeginpar\markendlst{Proof of Lemma 1}}
\end{alabsubtext}
%
\begin{alabsubtext}{Theorem}{3}{}\label{upper-bound-thm}
For every distributional \ComplexityClass{NP} search problem {\Rmu}, test instances
can be generated with parallel queries to some set in \ComplexityClass{NP}\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Theorem}{3}{}
\prooffontshape
%
Let \(D\) be the domain of \(R\) and let \(q\) be a polynomial such that
\(|w|=q(|x|)\) for all solutions \(w\) for \(x\)\@. Let \(M\) be a
probabilistic Turing machine witnessing that \(\mu\) is p-samplable
and let \(p\) be a polynomial time bound for \(M\), i.e., the time used
by \(M(\lambda)\) to output a string \(x\) is bounded by \(p(|x|)\)\@. For
any string \(r\in\Sigma^{p(n)}\), let \(M_r(\lambda)\) denote the output
of \(M\) (if any) when using sequence \(r\) as random source\@. The test
instance generator \(G\) for \((R_D,\mu)\) is defined as follows\@.
%
\begin{quote}
On input \(0^n\), \(G\) first guesses two integers
\(k\in\{1,\dots,p(n)+1\}\), \(l\in\{1,\dots,q(n)+1\}\) and two linear
hash functions \(h_1\in\Hash[p(n),k]\), \(h_2\in\Hash[q(n),l]\)\@. By
asking in parallel the queries \(y_i=\pair{0^n,i,h_1,h_2}\), \(1\leq
i\leq m=2p(n)+q(n)+2\), to the oracle \(A\), \(G\) obtains a sequence
\(s=A(y_1)\dots A(y_{m})\) of oracle answer bits\@. Then \(G\)
determines strings \(r\in\Sigma^{p(n)}\), \(w\in\Sigma^{q(n)}\), and
\(x\in\Sigma^*\) such that \(\pair{r,w}=s\) and \(x=M_r(\lambda)\)\@.
Finally, if \(|x|=n\) and \(R(x,w)\) hold, then \(G\) outputs the pair
\(\pair{x,w}\); otherwise, \(G\) outputs the symbol \(\bot\)\@.
\end{quote}
%
For every \(x\), let \(S_x=\{r\in\Sigma^{p(|x|)}\mid M_r(\lambda)=x\}\)
denote the set of all random sequences \(r\) using which \(M\) outputs
\(x\), and let \(S_n=\bigcup_{x\in D^{=n}}S_x\)\@. For every \(r\in S_n\)
let \(\alpha_r\) denote the probability that \(G(0^n)\) guesses integers
\(k,l\) and linear hash functions \(h_1,h_2\) such that \(r\) is the only
string in \(S_n\) with \(h_1(r)=0^k\) and there exists a unique solution
\(w\) for \(M_r(\lambda)\) with \(h_2(w)=0^l\)\@. Then it follows by
Lemma~1 that
\[
\alpha_r\geq\frac{1}{(p(n)+1)(q(n)+1)}\cdot
\frac{2}{9\cdot\card{S_n}}\cdot \frac{2}{9}.
\]
%
Now consider the following \ComplexityClass{NP} oracle set \(A\) defined as
%
\begin{eqnarray*}
\pair{0^n,i,h_1,h_2}\in A&\Leftrightarrow& \exists\pair{r,w}: h_1(r)=0^k,\,r\in S_n,\,h_2(w)=0^l,\\
& & R(M_r(\lambda),w),\mbox{ and the \protect\(i\protect\)th bit of \protect\(\pair{r,w}\protect\) is 1}.
\end{eqnarray*}
%
Then it follows that, for every \(x\in D^{=n}\), \(G(0^n)\) outputs a
pair of the form \(\pair{x,w}\), \(w\in\Sigma^*\), with probability at
least
\[
\sum_{r\in S_x} \alpha_r \,\geq\,
\frac{4\card{S_x}}{81(p(n)+1)(q(n)+1)\card{S_n}} \,=\,
\frac{4}{81(p(n)+1)(q(n)+1)}\cdot\rho_n(x).\vspace*{-4ex}
\]
{\nopagebreakbeginpar\markendlst{Proof of Theorem 3}}
\end{alabsubtext}
%
We end this section by observing that the above theorem can easily be
extended to the Monte Carlo type of generators that we will introduce
in the next section\@.
%
\asection{4}{Test Instance Generation Without Amplification}
%
\apar{4-1}
In this section we propose a slightly
different model of generating test
instances\@. As explained above, our aim
is to verify the quality of a given
algorithm but to avoid the need for
amplifying the probability of the
positive instances by a possibly
exponential factor\@. This goal is achieved by
allowing the generator not only to
output pairs \(\pair{x,w}\), where \(x\) is
a positive instance and \(w\) is a
solution for \(x\), but also to output
pairs of the form \(\pair{x,\lambda}\),
where \(x\) might be negative as well\@.
(Recall our assumption that \(\lambda\) is
not a solution for any instance\@.) In
the definition of a Monte Carlo
generator, we also allow that positive
instances \(x\) be generated (with small
probability) in the form
\(\pair{x,\lambda}\), whereas this is
totally forbidden for a Las Vegas
generator\@. Before we proceed to the
formal definition of these generators,
let us explain why it might be easier
to generate test instances for a
distributional \ComplexityClass{NP} search problem than
to solve the problem itself\@.
%
\apar{4-2}
Intuitively, a generator for a
distributional \ComplexityClass{NP} search problem can
benefit from the advantage that it may
intertwine the process of randomly
generating an instance with the process
of constructing a suitable solution\@. In
contrast, an algorithm has to solve the
problem on instances that are randomly
generated ``from outside\@.'' Thus, there
might exist distributional \ComplexityClass{NP} search
problems for which it is easier to
generate instance/witness pairs than to
solve the problem\@. In fact, this
phenomenon is exploited in the design of
many cryptographic protocols, such as in
generating public keys together with
some (secret) trapdoor information\@. For
example, the security of the RSA
cryptosystem is based on the practical
hardness of the factorization problem,
i.e., no efficient-on-average algorithm
is known (under the standard
distribution) that on input of \(n\) outputs
a factor of \(n\) in case \(n\) is
composite, and outputs ``prime''
otherwise\@. On the other hand, it is
computationally easy to randomly
generate composite numbers together with
some factor\@. Indeed, it is not hard to
construct a Las Vegas generator for the
corresponding search problem (see Proposition~4)\@.
%
\apar{4-3}
Next we define the notion of an {instance} generator for a
distributional \ComplexityClass{NP} search problem {\Rmu}\@. An {instance} generator
\(G\) efficiently generates instances in accordance with the
distribution \(\mu\)\@. Furthermore, it provides, along with each
generated instance \(x\), either some solution \(w\) (implying that \(x\)
belongs to the domain of \(R\)) or the empty string \(\lambda\) (implying
absolutely nothing on the membership status of \(x\))\@. In fact, an
{instance} generator may only output pairs of the form
\(\pair{x,\lambda}\)\@. In order to force \(G\) to generate ``good'' test
instances, we will later impose an additional bound on the probability
that \(G\) outputs a pair \(\pair{x,\lambda}\) when \(x\) is positive\@.
%
\begin{alabsubtext}{Definition}{2}{\sf{\generator}} \label{generator-def}
Let {\Rmu} be a distributional \ComplexityClass{NP} search
problem\@. A probabilistic Turing machine \(G\) (with output but with no input) is
called an \hv{\generator} for \(R\) under \(\mu\)
if
\begin{enumerate}
\item on every halting computation, \(G\)
outputs a pair \(\pair{x,w}\) with the
property that either \(R(x,w)\) holds or
\(w = \lambda\),
\item the time needed by \(G\) to output a pair
of the form \(\pair{x,w}\), \(w\in\Sigma^*\), is
polynomially bounded in \(|x|\), and
\item \(\mu_G\) is equivalent to \(\mu\), where
\(\mu_G(x)\) is the probability that \(G\)
generates instance \(x\), {i.e.}, \(\mu_G(x) =
\sum_{w\in\Sigma^*} {\mu}_G(x,w)\), where \({\mu}_G(x,w)\)
is the probability that \(G\) outputs the pair
\(\pair{x,w}\)\@.
\end{enumerate}
\end{alabsubtext}
%
Note that if \(G\) were required to stop on all paths, then only finitely
many instances could be generated\@. Actually, we allow that
\(\mu_G(\Sigma^*)<1\), i.e., \(G\) might run forever with \emph{positive}
probability\@. Also, if \(\mu(\Sigma^n)=1/n^{O(1)}\), then \(G\) can easily
be modified to produce instances of a given length according to the
conditional probability of that length\@. (This is similar to Watanabe's model,
but note that the generator might still output negative instances\@.)
%
\apar{4-4}
Suppose that \(G\) is an {\generator} for a distributional \ComplexityClass{NP} search
problem {\Rmu} and let \(A\) be an algorithm\@. Since the distributions
\(\mu\) and \(\mu_G\) are equivalent, the running time of \(A\) is
polynomial on \(\mu\)-average if and only if it is polynomial on
\(\mu_G\)-average \cite{cj99-04-25}\@.
%
\apar{4-5}
We distinguish between two types of generators, depending on the
quality of the generated test instances\@. A Monte Carlo generator is
allowed to err with small probability, whereas a Las Vegas generator
only generates test instances that are correctly classified\@.
%
\begin{alabsubtext}{Definition}{3}{\sf{Monte Carlo generator}} \label{mctig-def}
Let \(G\) be an {\generator} for a distributional
\ComplexityClass{NP} search problem {\Rmu} with domain \(D\)\@.
\begin{enumerate}
\item \(G\) is called a \hv{Monte Carlo
generator} for \(R\) under \(\mu\) if for
every polynomial \(q\),
\[
\mu_G(x,\lambda)\leq \frac{\mu_G(x)}{q(|x|)}
\]
holds for all but finitely many instances
\(x\) in \(D\)\@.
\item \(G\) is called a \hv{Las Vegas generator}
for \(R\) under \(\mu\) if \(G\) never outputs a
pair \(\pair{x,\lambda}\) with \(x\in D\)\@.
\end{enumerate}
\end{alabsubtext}
%
\apar{4-6}
We note that if \(\mu(D^{=n})=1/n^{O(1)}\), then any Monte Carlo
generator for {\Rmu} can be transformed into a test instance generator
for {\Rmu} according to Watanabe's model\@.
%
\apar{4-7}
We next show how a Monte Carlo generator can be used to find out
efficiently and with high confidence whether a given algorithm \(A\) for
a distributional \ComplexityClass{NP} search problem \(\Rmu\) is correct with respect
to the underlying probability distribution \(\mu\)\@. Without loss of
generality, we can assume that \(A\) makes only errors on instances in
the domain \(D\) of \(R\)\@. More formally, for any subset \(D'\) of \(D\), we
call the probability \(\mu\{x\in D'\mid A(x)=\lambda\}\) that \(x\in
D'\) and \(A\) does not find a witness for \(x\) the \hv{error rate} of \(A\)
on \(D'\)\@. We say that a pair \(\pair{x,w}\) \hv{convicts} \(A\) on \(D'\) if
\(R(x,w)\) and \(x \in D'\) hold, but \(A(x)=\lambda\)\@.
%
\begin{alabsubtext}{Theorem}{4}{}
Let \(G\) be a Monte Carlo generator for a distributional \ComplexityClass{NP} search
problem {\Rmu} and let \(D\) be the domain of \(R\)\@. Then there exists
a polynomial-time probabilistic transducer \(T\) such that the
following statements hold for any search algorithm \(A\)\@.
\begin{itemize}
\item If the error rate of \(A\) on \(D^{\leq l}\) is at least \(1/m\),
then for all \(n\), \(T\) on input \(\pair{1^n,1^m,1^l}\) produces with
probability at least \(1-2^{-n}\) a sequence of pairs, at least one
of which convicts \(A\) on \(D^{\leq l}\)\@.
\item If additionally, \(A\) is polynomial on \(\mu\)-average then for
some polynomial \(t\), the sequence contains with probability at
least \(1-2^{-n}\) a pair \(\pair{x,w}\) that convicts \(A\) on \(D^{\leq
l}\) and for which \(A(x)\) stops after at most \(t(l,m)\) steps\@.
\end{itemize}
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Theorem}{4}{}
\prooffontshape
\apar{Proof of Theorem 4-1}
Let \(G\) be a Monte Carlo generator for {\Rmu} and let \(q\) be a
polynomial such that \(|w|=q(|x|)\) for all solutions \(w\) of \(x\)\@. By
Definition~2(3), there is a polynomial \(p\) such
that for all instances \(x\),
\begin{align*}
&\mu(x)/p(|x|) \leq \mu_G(x)\leq p(|x|)\cdot
\mu(x).
\end{align*}
Since Definition~3(1) guarantees that for all but
finitely many \(x\in D\), \(\mu_G(x,\lambda)\leq\mu_G(x)/2\), we can
assume that, for all instances \(x \in D\),
\begin{align*}
&\sum_{w\in\Sigma^{q(|x|)}} \mu_G(x,w) \geq \mu_G(x)/2 \geq
\mu(x)/(2p(|x|)).
\end{align*}
Since the error rate of \(A\) on \(D^{\leq l}\) is at least \(1/m\), it
follows that \(G\) outputs with probability at least \(1/(2mp(l))\) a
pair \(\pair{x,w}\) with \(x \in D^{\leq l}\) and \(A(x)=\lambda\)\@.
Therefore, during \(2mnp(l)\) independent computations, \(G\) will
output with probability at least \(1-(1-1/(2mp(l)))^{2mnp(l)}>
1-2^{-n}\) some pair \(\pair{x,w}\) with \(x\in D^{\leq l}\) and
\(A(x)=\lambda\)\@. Note that since the running time of \(G\) is
polynomially bounded in the length of the output, \(T\) can suspend
the simulation of \(G\) after a polynomial number (in \(l\)) of steps\@.
%
\apar{Proof of Theorem 4-2}
Note that since \(\mu\) and \(\mu_G\) are equivalent, the assumption
that \(A\) is polynomial on \(\mu\)-average implies that \(A\) is
polynomial on \(\mu_G\)-average, i.e., there exist constants \(k,c>0\)
such that
\(\sum_{x\neq\lambda}\frac{\text{time}_A(x)^{1/k}}{|x|}\cdot\mu(x)q(n)} \mu(x)\leq 1/n\),
\end{quote}
and if the error rate of \(A\) on \(D\) (instead of \(D^{\leq l}\) as in
the above theorem) is at least \(1/m\), then it follows that \(\mu\{x\in
D^{\leq q(2m)}\mid A(x)=\lambda\} \geq1/(2m)\)\@. Hence, letting
\(T'(1^n,1^{m}) = T(1^n,1^{2m},1^{q(2m)})\), the above theorem implies
that with probability at least \(1-2^{-n}\), \(T'(1^n,1^{m})\) produces an
instance \(x\in D^{\leq q(2m)}\) with \(A(x)=\lambda\)\@. \smallskip
%
\apar{4-9}
Next we consider distributional {\ComplexityClass{RP}} search problems {\Rmu} (meaning
that the binary relation \(R\) witnesses that its domain \(D_R\) belongs
to \(\ComplexityClass{RP}\), i.e., each \(x\in D_R\) has at least \(2^{q_R(|x|)-1}\) many
solutions)\@. As shown below, instance/witness pairs for {\Rmu} can be
easily generated by sampling a string \(x\) according to \(\mu\) and
randomly generating a witness for \(x\)\@.
%
\begin{alabsubtext}{Proposition}{2}{} \label{rp-mc-gen}
For every distributional\/ {\ComplexityClass{RP}} search problem {\Rmu} there exists
a Monte Carlo generator\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{2}{}
\prooffontshape
Let \(D\) be the domain of \(R\) and let \(q\) be a polynomial such that
\(|w|=q(|x|)\) for all solutions \(w\) of \(x\)\@. Let \(M\) be a
probabilistic Turing machine witnessing that \(\mu\) is p-samplable\@.
Consider the following {instance} generator \(G\) for \((R,\mu)\)\@.
\begin{quote}
Simulate \(M\)\@. In case \(M\) outputs an instance \(x\), \(|x|=n\), guess
randomly and independently a sequence \(w_1,\dots,w_n\) of strings
of length \(q(n)\) and determine the lexicographically largest
string \(w\) in the set \(\{\lambda\}\cup\{w_i\mid 1\leq i\leq n\) and
\(R(x,w_i)\) holds\(\}\)\@. Output the pair \(\pair{x,w}\)\@.
\end{quote}
It is easy to verify that \(G\) is a Monte Carlo test instance
generator for \((R,\mu)\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 2}}
\end{alabsubtext}
%
\apar{4-10}
As shown in the next theorem, it is unlikely that every \(\ComplexityClass{NP}\) search
problem has a Monte Carlo generator under the standard probability
distribution\@. In fact, Monte Carlo generators can only exist if the
domain of the search problem belongs to {\co\AM}\@. As a consequence,
Monte Carlo generators do not exist for \(\ComplexityClass{NP}\) search problems with an
\ComplexityClass{NP}-complete domain unless the polynomial-time hierarchy collapses to
the second level \cite{cj99-04-09,cj99-04-28}\@. For definitions of the class
{\AM} and of Arthur-Merlin games we refer the reader to \cite{cj99-04-11} or
to a textbook, such as \cite{cj99-04-05} or \cite{cj99-04-22}\@.
%
\begin{alabsubtext}{Theorem}{5}{} \label{mcg-am-thm}
If there exists a Monte Carlo generator for an \(\ComplexityClass{NP}\) search problem
\(R\) under the standard distribution, then the domain of \(R\) is
contained in \(\co\AM\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Theorem}{5}{}
\prooffontshape
\apar{Proof of Theorem 5-1}
Let \(G\) be a Monte Carlo generator for \((R,\must)\) and let \(p\) be a
polynomial bounding the running time of \(G\)\@. From Definition
2(3), we can conclude that, for some polynomial \(s\),
\[
\frac{1}{s(|x|) 2^{|x|}} \leq \mu_G(x) \leq
\frac{s(|x|)}{2^{|x|}}.
\]
From Definition 3(1), we know that, for any polynomial
\(q\),
\[
\mu_G(x,\lambda)\leq\frac{\mu_G(x)}{q(|x|)}
\]
holds for all but finitely many \(x\) in the domain \(D\) of \(R\)\@. Now
let \(S_x\) be the set of strings \(r\in\Sigma^{p(n)}\) such that \(G\)
outputs the pair \(\pair{x,\lambda}\) when using \(r\) as random source\@.
Let \(q(n)\) be the polynomial \(2^5s(n)^2\)\@. Then, for all but finitely
many \(x\), the following implications are true (where \(n=|x|\))\@.
\[
\begin{array}{c@{\;\;}c@{\;\;}c@{\;\;}c@{\;\;}l@{\;\;}c@{\;\;}l}
\ds x \in D &\ds\Rightarrow&\ds \mu_G(x,\lambda)\leq\frac{s(n)}{2^nq(n)}
&\ds\Rightarrow&\ds \card{S_x} \leq\frac{s(n)2^{p(n)}}{2^nq(n)}
&\ds\Rightarrow&\ds \card{S_x} \leq \frac{2^{p(n)-n-5}}{s(n)},\\[4mm]
\ds x\not\in D &\ds\Rightarrow&\ds \mu_G(x,\lambda)\geq\frac{1}{s(n)2^n}
&\ds\Rightarrow&\ds \card{S_x} \geq\frac{2^{p(n)}}{s(n)2^n}
&\ds\Rightarrow&\ds \card{S_x} \geq \frac{2^{p(n)-n}}{s(n)}.\\[4mm]
\end{array}
\]
%
Therefore, letting \(k(n)=p(n)-n-\lceil\log_2 s(n)\rceil-2\), it follows,
for all but finitely many \(x\in D\), that \(\card{S_x}\leq2^{k(n)-2}\)
and for all but finitely many \(x\not\in D\), that
\(\card{S_x}\geq2^{k(n)+2}\)\@.
%
\apar{Proof of Theorem 5-2}
Now consider for a fixed instance \(x\) (for which the above
implications are true) the random variable \(Z\) that for a uniformly
chosen \(h\) in \(\Hash[p(n),k(n)]\) gives the number of
strings \(r\in S_x\) for which \(h(r)=0^{k(n)}\)\@. Then the expectation
of \(Z\) is \(E(Z)=2^{-k(n)}\card{S_x}\) and the variance is
\(V(Z)=2^{-k(n)}(1-2^{-k(n)})\card{S_x}0\) for all \(x\) and since
\(G\) is a Las Vegas generator for
\((R,\must)\), \(M\) accepts \(x\) if and only
if \(x\in\overline{D}\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 5}}
\end{alabsubtext}
%
\begin{alabsubtext}{Corollary}{2}{}
If there exists a Las Vegas generator
for some distributional \ComplexityClass{NP} search
problem \((R,\must)\) whose domain is
\ComplexityClass{NP}-complete, then
\(\ComplexityClass{NP} =\co{\ComplexityClass{NP}}\)\@.
\end{alabsubtext}
%
\apar{4-16}
As a final remark, it is not hard to see that, for every set \(L\) that
has self-computable witnesses (in the sense of \cite{cj99-04-04,cj99-04-12}),
there is a Las Vegas generator using \(L\) as an oracle\@. For example,
since graph isomorphism (GI) and graph automorphism (GA) have
self-computable witnesses and non-adaptively self-computable
witnesses, respectively
(cf. \cite{cj99-04-27} and \cite[Lemmas~5.2 and~5.3]{cj99-04-24}),
there exist Las Vegas generators for the
corresponding search problems that ask adaptive (non-adaptive) queries
to GI (GA, respectively)\@.
%
\asection{5}{Hard Problems for Test Instance Generation}
%
\apar{5-1}
In this section we consider the question
of how the test instance generation
problem for one \ComplexityClass{NP} search problem
\((S,\nu)\) can be reduced to the test
instance generation problem for another
\ComplexityClass{NP} search problem \((R,\mu)\)\@. Inspired
by \cite{cj99-04-35}, we provide a way to
transform a Monte Carlo or Las Vegas
generator for \((S,\nu)\) to a generator for
any problem \((R,\mu)\) that reduces to
\((S,\nu)\) via the following kind of
reduction\@.
%
\begin{alabsubtext}{Definition}{4}{}\label{genred}
Let \((R,\mu)\) and \((S,\nu)\) be distributional \ComplexityClass{NP} search problems\@.
Let \(D_R\) (\(D_S\)) be the domain of \(R\) (\(S\), respectively)\@. Then \((R,\mu)\) is
\hv{generator reducible} to \((S,\nu)\) if there exist functions \(f, g
\in \FP\) and polynomials \(p,q\) such that:
\begin{enumerate}
\item For all \(y\) and \(r\), if \(f(y,r) \not= \bot\), then \(f(y,r) \in
D_R\) if and only if \(y \in D_S\)\@.
\item For all \(y\), \(v\), and \(r\), if \(f(y,r) \not= \bot\) and \(S(y,v)\),
then \(R(f(y,r),g(y,v,r))\)\@.
\item \(f\) is honest, {i.e.}, for all \(y,r\), if \(f(y,r) \not= \bot\)
then \(p(|f(y,r)|)\geq |y|\)\@.
\item \(\mu\) is equivalent to \(\phi\), where \(\phi\) is the
distribution induced by \(\nu\), \(q\), and \(f\), {i.e.},
\(\phi(x)=\sum_{y,r}\nu(y)2^{-|r|}\), where the sum ranges over all
strings \(y,r\) such that \(|r|=q(|y|)\) and \(f(y,r)=x\)\@.
\end{enumerate}
\end{alabsubtext}
%
Two distributional \ComplexityClass{NP} search problems
are called \hv{generator equivalent} if
they are generator reducible to each
other\@.
%
\apar{5-2}
In the sequel, we assume without loss of generality that \(f(y,r r') =
f(y,r)\) for all \(y,r,r'\) such that \(|r|=q(|y|)\), and that
\(g(y,\lambda,r) = \lambda\) for all \(y,r\)\@.
%
\begin{alabsubtext}{Proposition}{6}{}
If \((R,\mu)\) is generator reducible to
\((S,\nu)\) and if there exists a Monte
Carlo (Las Vegas) generator for
\((S,\nu)\), then there exists a Monte
Carlo (Las Vegas, respectively) generator for
\((R,\mu)\)\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{6}{}
\prooffontshape
\apar{Proof of Proposition 6-1}
Assume that \((R,\mu)\) reduces to
\((S,\nu)\) via the functions \(f\), \(g\) and
the polynomials \(p, q\)\@. Let \(G\) be a
generator for \((S,\nu)\)\@. Consider the
following algorithm \(G'\)\@.
\begin{quote}
Simulate \(G\)\@. If \(G\) produces a pair
\(\pair{y,v}\), then randomly guess a
string \(r \in \{0,1\}^{q(|y|)}\)\@. If
\(f(y,r) \neq \bot\), then output the
pair \(\pair{f(y,r), g(y,v,r)}\);
otherwise, loop forever\@.
\end{quote}
We claim that \(G'\) is an {instance} generator for \((R,\mu)\) (of the
same type as \(G\))\@. By Definition~4 we know that if \(G'\)
outputs \(\pair{x,w}\), then either \(R(x,w)\) or \(w=\lambda\)\@. Let \(C(x)
= \set{(y,r)}{f(y,r)=x, |r|=q(|y|)}\) and let \(t\) and \(t'\) be
polynomials such that \(\mu_G(x)\geq \nu(x)/t(|x|)\) and \(\phi(x)
\geq\mu(x)/t'(|x|)\), where \(\phi\) is the distribution induced by
\(f\), \(q\), and \(\nu\)\@. Then we have, for all \(x\),
\begin{eqnarray*}
\mu_{G'}(x) %
&= & \sum_{(y,r) \in C(x)} \mu_G(y) \, 2^{-|r|} \\ %
&\geq& \sum_{(y,r) \in C(x)} \frac{1}{t(|y|)} \nu(y) \, 2^{-|r|} \\ %
&\geq& \frac{1}{t(p(|x|))} \underbrace{\sum_{(y,r) \in C(x)} \nu(y) \,
2^{-|r|}}_{\phi(x)} \\ %
&\geq& \frac{1}{t(p(|x|)) \, t'(|x|)} \mu(x).
\end{eqnarray*}
Thus, \(\mu \pdom \mu_{G'}\), and since
\(\mu_{G'} \pdom \mu\) can be derived in a
similar way, \(\mu_{G'} \pdequiv \mu\)
follows\@. Furthermore, since \(f\) is
honest, the time needed by \(G'\) to
output a pair \(\pair{x,w}\) is
polynomially bounded in \(|x|\)\@.
%
\apar{Proof of Proposition 6-2}
Assume now that \(G\) is a Monte Carlo
generator for \((S,\nu)\)\@. We have to show
that for any polynomial \(t\) and almost
all \(x\in D_R\), \(\mu_{G'}(x) \geq t(|x|)
\cdot \mu_{G'}(x,\lambda)\)\@. Let \(s\) be a
polynomial such that \(|f(y,r)|\leq
s(|y|)\) for all \(y\) and \(r\),
\(|r|=q(|y|)\)\@. Since \(G\) is a Monte Carlo
generator, it holds for almost all \(y\in
D_S\) that \(\mu_G(y) \geq t(s(|y|)) \cdot
\mu_G(y,\lambda)\)\@. Then, for all
sufficiently large \(x \in D_R\) it
follows that
\begin{align*}
\mu_{G'}(x) %
&= \sum_{(y,r) \in C(x)} \mu_G(y) \, 2^{-|r|} \\ %
&\geq \sum_{(y,r) \in C(x)} t(s(|y|)) \, \mu_G(y,\lambda) \,
2^{-|r|} \\ %
&\geq t(|x|) \sum_{(y,r) \in C(x)} \mu_G(y,\lambda) \, 2^{-|r|} \\ %
&= t(|x|) \cdot \mu_{G'}(x,\lambda).
\end{align*}
Thus, it follows that \(G'\) is a Monte
Carlo generator for \(\Rmu\)\@.
%
\apar{Proof of Proposition 6-3}
In the case that \(G\) is a Las Vegas
generator for \((S,\nu)\), it follows
immediately from the properties of \(f\)
and \(g\) that \(G'\) is a Las Vegas
generator for \(\Rmu\)\@.
{\nopagebreakbeginpar\markendlst{Proof of Proposition 6}}
\end{alabsubtext}
%
Another important property of the generator reducibility is transitivity\@.
%
\begin{alabsubtext}{Proposition}{7}{}
The generator reducibility is transitive\@.
\end{alabsubtext}
%
\begin{alabsubtext}{Proof of Proposition}{7}{}
\prooffontshape
Let \((R_i,\mu_i)\), \(i=1,2,3\), be
distributional \ComplexityClass{NP} search problems\@.
Let \(D_i\) be the domain of \(R_i\),
\(i=1,2,3\)\@. Assume that \((R_i,\mu_i)\) is
generator reducible to
\((R_{i+1},\mu_{i+1})\) via the functions
\(f_i, g_i \in \FP\) and the polynomials
\(p_i, q_i\), \(i=1,2\)\@. Consider the
functions \(f\) and \(g\) defined as
\begin{align*}
f(y,r_1,r_2) &=
\begin{cases}
f_1(f_2(y,r_2),r_1), & f_2(y,r_2)
\neq \bot,\\ %
\bot, & \text{otherwise\@.}
\end{cases} \\ %
\intertext{and}
g(y,v,r_1,r_2) &=
\begin{cases}
g_1(f_2(y,r_2),g_2(y,v,r_2),r_1), & R_3(y,v), f(y,r_1,r_2) \neq \bot, \\ %
\lambda, & \text{otherwise}.
\end{cases}
\end{align*}
Since \(f_1\) and \(f_2\) satisfy the
conditions of Definition 4,
it follows that \(f\) and \(g\) fulfill the
following conditions for all \(y,r_1,r_2\)
with \(f(y,r_1,r_2) \neq \bot\)\@.
\begin{enumerate}
\item \(\displaystyle y \in D_3
\Leftrightarrow f_2(y,r_2) \in D_2
\Leftrightarrow
\underbrace{f_1(f_2(y,r_2),r_1)}_{f(y,r_1,r_2)}
\in D_1\),
\item \(\begin{array}[t]{lll} R_3(y,v)
&\Rightarrow& R_2(f_2(y,r_2),
g_2(y,v,r_2))\\[1mm] &\Rightarrow&
R_1(\underbrace{f_1(f_2(y,r_2),r_1)}_{f(y,r_2
r_1)}, \underbrace{g_1(f_2(y,r_2),
g_2(y,v,r_2), r_1)}_{g(y,v,r_2
r_1)}),
\end{array}\)
\item \(\displaystyle |y| \leq
p_2(p_1(|f_1(f_2(y,r_2),r_1)|)) \leq
p_2(|f_2(y,r_2)|)\), and
\item \(\displaystyle \mu_1 \pdequiv
\phi(x)\), where \(\phi\) is the
distribution induced by \(f\), \(q\), and
\(\mu_3\)\@. Indeed, since \(|f_2(y,r_2)|\)
is polynomially bounded in \(|y|\),
there exists a polynomial \(q\) such
that \(q_2(|y|) + q_1(|f_2(y,r_2)|)
\leq q(|y|)\) for all \(y,r_2\) with
\(f_2(y,r_2)\neq\bot\)\@. For a given \(x\),
consider the following sets:
\begin{align*}
A(x) &=
\set{(y,r_1,r_2)}{f(y,r_1,r_2)=x,
|r_2|=q_2(|y|),
|r_1|=q(|y|)-q_2(|y|)}, \\ %
B(x) &=
\set{(y,r_1,r_2)}{f(y,r_1,r_2)=x,
|r_2|=q_2(|y|),
|r_1|=q_1(|f_2(y,r_2)|)}, \\ %
C_1(x) &= \set{(z,r_1)}{f_1(z,r_1)=x,
|r_1|=q_1(|z|)}, \\ %
C_2(z) &= \set{(y,r_2)}{f_2(y,r_2)=z,
|r_2|=q_2(|y|)}.
\end{align*}
For \(i=1, 2\), let \(\phi_i\) be the
distribution induced by \(f_i\), \(q_i\), and
\(\mu_{i+1}\)\@. Since \(\m