%MWM 18 July 1999
\documentstyle[12pt]{article}
%% Whoa!!!
\lefthyphenmin=2
\righthyphenmin=3
%au query
\newcounter{querno}
\setcounter{querno}{1}
\newcommand{\auquery}[1]{\marginpar{\raggedright\scriptsize
\thequerno. au: #1}\addtocounter{querno}{1}}
\newcommand{\query}[1]{\marginpar{\raggedright\scriptsize #1}}
%operators and MM macros
\def\operator#1{\mathop{\it#1}\nolimits}
\newcommand{\peek}{{\operator{peek}}}
\newcommand{\arglist}{{\operator{arglist}}}
\newcommand{\call}{{\operator{call}}}
\newcommand{\readit}{{\operator{read}}}
\newcommand{\writeit}{{\operator{write}}}
\newcommand{\resp}{{\operator{resp}}}
\newcommand{\retlist}{{\operator{retlist}}}
\newcommand{\ret}{{\operator{ret}}}
\newcommand{\ack}{{\operator{ack}}}
\newcommand{\OP}{{\operator{OP}}}
\newcommand{\op}{{\operator{op}}}
\newcommand{\ops}{{\operator{ops}}}
\newcommand{\WRITE}{{\operator{WRITE}}}
\newcommand{\READ}{{\operator{READ}}}
\newcommand{\RSUC}{{\operator{RSUC}}}
\newcommand{\RCUS}{{\operator{RCUS}}}
\newcommand{\rsuc}{{\operator{rsuc}}}
\newcommand{\rcus}{{\operator{rcus}}}
\newcommand{\RCUL}{{\operator{RCUL}}}
\newcommand{\RLUS}{{\operator{RLUS}}}
\newcommand{\rcul}{{\operator{rcul}}}
\newcommand{\rlus}{{\operator{rlus}}}
\newcommand{\uc}{{\operator{uc}}}
\newcommand{\UC}{{\operator{UC}}}
\newcommand{\rs}{{\operator{rs}}}
\newcommand{\RS}{{\operator{RS}}}
\newcommand{\Maxdom}{{\operator{Maxdom}}}
\newcommand{\NC}{{\operator{NC}}}
\newcommand{\CG}{{\operator{CG}}}
\newcommand{\RCG}{{\operator{RCG}}}
\newcommand{\VC}{{\operator{VC}}}
\newcommand{\MOP}{{\operator{MOP}}}
\newcommand{\mop}{{\operator{mop}}}
\newcommand{\AOP}{{\operator{AOP}}}
\newcommand{\aop}{{\operator{aop}}}
\newcommand{\FIND}{{\operator{FIND}}}
\newcommand{\INS}{{\operator{INS}}}
\newcommand{\UP}{{\operator{UP}}}
\newcommand{\DoMop}{{\operator{DoMop}}}
\newcommand{\DoOp}{{\operator{DoOp}}}
\newcommand{\INC}{{\operator{INC}}}
\newcommand{\HALF}{{\operator{HALF}}}
\newcommand{\FOP}{{\operator{FOP}}}
\newcommand{\fop}{{\operator{fop}}}
\newcommand{\SELFOP}{{\operator{SELFOP}}}
\newcommand{\selfop}{{\operator{selfop}}}
\newcommand{\scratch}{{\operator{scratch}}}
\newcommand{\shift}{{\operator{shift}}}
\newcommand{\WOP}{{\operator{WOP}}}
\newcommand{\Wop}{{\operator{Wop}}}
\newcommand{\SOP}{{\operator{SOP}}}
\newcommand{\Sop}{{\operator{Sop}}}
\newcommand{\WADD}{{\operator{WADD}}}
\newcommand{\WMUL}{{\operator{WMUL}}}
\newcommand{\Wadd}{{\operator{Wadd}}}
\newcommand{\Wmul}{{\operator{Wmul}}}
\newcommand{\wMUL}{{\operator{WMUL}}}
\newcommand{\WREAD}{{\operator{WREAD}}}
\newcommand{\Wread}{{\operator{Wread}}}
\newcommand{\WAOP}{{\operator{WAOP}}}
\newcommand{\Waop}{{\operator{Waop}}}
%\newcommand{\}{{\operator{}}}
%\setlength{\textheight}{9.0in}
%\setlength{\topmargin}{0.25in}
%\makeatletter
%\makeatother
\setcounter{footnote}{1}
\newtheorem{lemma}{Lemma}
\newtheorem{claim}[lemma]{Claim}
\newtheorem{theorem}[lemma]{Theorem}
\newtheorem{corollary}[lemma]{Corollary}
\newtheorem{formalrule}[lemma]{Rule}
\newcommand{\qedsymb}{\mbox{ }~\hfill~{\rule{2mm}{2mm}}}
\newenvironment{proof}{\begin{trivlist}
\item[\hspace{\labelsep}{\bf\noindent Proof\,}]
}{\qedsymb\end{trivlist}}
\newenvironment{proofof}[1]{\begin{trivlist}
\item[\hspace{\labelsep}{\bf\noindent Proof of #1\,}]
}{\qedsymb\end{trivlist}}
\title{%\setlength{\baselineskip}{1pc}
%\bf
Time Bounds for Strong and Hybrid Consistency for Arbitrary Abstract
Data Types\\}
\author{
{Martha J. Kosa} \\
%{Department of Computer Science}\\
{(Tennessee Technological University)}\\
\texttt{http://www.csc.tntech.edu/\char`\~mjkosa/}\\
mjk9504@tntech.edu
%{Box 5101}\\
%{Cookeville, TN 38505}\\
%{mjkosa@tntech.edu}\\%email addr correct --MM
}
\date{2 September 1999}
\begin{document}
%%
%% Generic Titlepage, Insert after \begin{document}
%%
\def\registered{\({}^{\ooalign%
{\hfil\raise.07ex\hbox{{\tiny\textup{R}}}%
\hfil\crcr\scriptsize\mathhexbox20D\cr}}\)}
\begin{titlepage}
\pagenumbering{roman}
\null\vfil\vskip 60pt
\begin{center}
{\Huge\bf Chicago\nolinebreak[1] Journal of Theoretical Computer\nolinebreak[1] Science\par}
\vskip 3em
{\huge\it The MIT Press\par}
\end{center}
\par\vskip 1.5em
{\Large
\begin{center}
Volume 1999, Article 9\\
\textit{Time Bounds for Strong and Hybrid Consistency for Arbitrary Abstract Data Types}
\end{center}
}
\par\noindent
ISSN 1073--0486. MIT Press Journals, Five Cambridge Center, Cambridge, MA
02142-1493 USA; (617)253-2889;
\emph{journals-orders@mit.edu, journals-info@mit.edu}.
Published one article at a time in \LaTeX\ source form on the
Internet. Pagination varies from copy to copy. For more
information and other articles see:
\begin{itemize}
\item \emph{http://mitpress.mit.edu/CJTCS/}
\item \emph{http://www.cs.uchicago.edu/publications/cjtcs/}
\item \emph{ftp://mitpress.mit.edu/pub/CJTCS}
\item \emph{ftp://cs.uchicago.edu/pub/publications/cjtcs}
\end{itemize}
\par\noindent
The \emph{Chicago Journal of Theoretical Computer Science} is
abstracted or indexed in \emph{
Research Alert,\registered
SciSearch, \registered
Current Contents\registered/Engineering
Computing \& Technology}, and \emph{CompuMath Citation Index.\registered}
\par\noindent\copyright 1999 The Massachusetts Institute of Technology.
Subscribers are licensed to use journal articles in a variety of
ways, limited only as required to ensure fair attribution to authors
and the journal, and to prohibit use in a competing commercial
product. See the journal's World Wide Web site for further
details. Address inquiries to the Subsidiary Rights Manager, MIT
Press Journals; (617)253-2864;
\emph{journals-rights@mit.edu}.\pagebreak[2]
\par\noindent The \emph{Chicago Journal of Theoretical Computer Science}
is a peer-reviewed scholarly journal in theoretical computer
science. The journal is committed to providing a forum for
significant results on theoretical aspects of all topics in computer
science.
\par
\begin{trivlist}
\item[\emph{Editor-in-Chief:}] Janos Simon
\item[\emph{Consulting Editors:}]
Joseph Halpern, Eric Allender, Raimund Seidel
\item[\emph{Editors:}]
\begin{tabular}[t]{lll}
Martin Abadi & Greg Frederickson & John Mitchell \\
Pankaj Agarwal & Andrew Goldberg & Ketan Mulmuley \\
Georg Gottlob & Gil Neiger & Tetsuo Asano \\
Vassos Hadzilacos & David Peleg & Laszl\'o Babai \\
Juris Hartmanis & Andrew Pitts & Eric Bach \\
Maurice Herlihy & James Royer & Stephen Brookes \\
Ted Herman & Alan Selman & Jin-Yi Cai \\
Stephen Homer & Nir Shavit & Anne Condon \\
Neil Immerman & Eva Tardos & Cynthia Dwork \\
Howard Karloff & Sam Toueg & David Eppstein \\
Philip Klein & Moshe Vardi & Ronald Fagin \\
Phokion Kolaitis & Stuart Kurtz & Jennifer Welch \\
Lance Fortnow & Stephen Mahaney & Pierre Wolper \\
Steven Fortune & Michael Merritt
\end{tabular}
\vspace{1ex}
\item[\emph{Managing Editor:}] Michael J.\ O'Donnell
\vspace{1ex}
\item[\emph{Electronic Mail:}] \emph{chicago-journal@cs.uchicago.edu}
\end{trivlist}
\vfil\null
\end{titlepage}
\maketitle
\pagenumbering{arabic}
%%
%% End Titlepage code
%%
\thispagestyle{empty}
\subsection*{\centering Abstract}
We compare the costs of three well-known consistency guarantees (sequential
consistency, linearizability, and hybrid consistency) for shared memory objects
of {\it arbitrary\/} data types, generalizing previous work on specific
types. We
evaluate the impact of the desired consistency guarantee, the amount
of synchrony among the users of the objects, and algebraic properties
of a type on the worst-case time complexity of
operations of the type. These algebraic properties are sufficient for proving
many lower bounds and some upper bounds (even 0,
meaning only local computation is required) on the worst-case times. Under
certain reasonable assumptions, sequential consistency and linearizability are
equally costly in perfectly synchronized systems, linearizable operations are
more expensive in approximately synchronized systems than in perfectly
synchronized systems, sequentially consistent operations are cheaper than
linearizable operations in approximately synchronized systems, and hybrid
consistency is not necessarily cheaper than sequential consistency or
linearizability.
\section{Introduction}
\subsection{Background material}
In concurrent systems, processes
need to share information with each other.
Because of software engineering concerns,
using shared data objects
from arbitrary abstract data types is a popular
method for organizing this information.
However, processes may not have access to a
physical shared memory, because they may be running on computers
separated by long distances or their computer architecture may not
provide a physical shared
memory. They must simulate a physical shared
memory with a virtual shared memory.
A consistency
guarantee tells the processes
using the virtual shared objects what they can
expect about the values returned
as the result of applying operations, even when
operations are executed concurrently
on the same virtual object. Researchers
have defined many types of consistency
guarantees---some strong, some
weak, and some in between. A
strong guarantee is at least as expensive to
implement as a weaker guarantee,
but it may be impossible (or very hard) to
solve a problem by using virtual shared objects providing a weaker
guarantee (see~\cite{cj99-09-04}).
Sequential consistency and linearizability are two strong
consistency guarantees. They ensure
that operations appear to have executed
atomically in some sequential order
that reflects the order in which operations
were executed at each process. In addition,
linearizability ensures that
this sequential order preserves the relative ordering of all
nonoverlapping operations, even
those executed by different processes.
Sequential consistency is a very
popular consistency guarantee, used for
virtual shared memories and multiprocessor
caches (see~\cite{cj99-09-02,cj99-09-05}). However,
linearizability is a
stronger, more intuitive guarantee,
because sequential consistency provides
no clues about the relative ordering of
nonoverlapping operations performed
by different processes. Although linearizability is more
powerful than sequential
consistency, it is still interesting to study both
guarantees; the difference between their definitions is very slight.
Attiya and Friedman~\cite{cj99-09-04} investigated
practical shared memory consistency
models (refer to~\cite{cj99-09-01,cj99-09-07,cj99-09-09,cj99-09-10})
and formally
defined hybrid consistency, a generalization of those models;
it systematically
weakens
either sequential consistency or linearizability.
In hybrid
consistency, there are strong and weak operations.
Strong operations appear to satisfy
a strong consistency guarantee, while
weak operations can appear to execute
in different orders at different
processes as long as the relative
ordering of weak and strong operations
executed by the same process is preserved.
System designers should understand the costs of providing these consistency
guarantees in order to choose
a cost-effective consistency guarantee that
best suits the needs of their applications.
We focus on (distributed)
message-passing implementations
because they are scalable. Each process has a local memory, and all
processes run a protocol to emulate a physical shared memory
with a given consistency guarantee.
Attiya and Welch~\cite{cj99-09-05} compared the costs
of implementing sequential consistency and linearizability
for basic read/write
objects, queues, and stacks. They measured the worst-case
time for operations
to complete, giving upper and
lower bounds to show that the amount of
process synchrony caused the cost difference between
sequential consistency and linearizability to vary. With perfect
synchrony, their costs are
equal. However, sequential consistency is cheaper
when processes are only approximately synchronized.
Mavronicolas and
Roth~\cite{cj99-09-17} continued
the Attiya-Welch
comparative study, improving some of their
lower bounds and giving a distributed implementation of linearizable
read/write objects in a system
with only approximately synchronized processes.
Attiya and Friedman~\cite{cj99-09-04} compared the cost of hybrid
consistency with the costs of sequential consistency and
linearizability for read/write
objects, showing that hybrid consistency is
cheaper when mostly weak operations are executed.
Friedman~\cite{cj99-09-08} compared the cost of
hybrid consistency with the costs of stronger guarantees
for read/modify/write objects, queues, and stacks,
showing that hybrid consistency is not cheaper, even when mostly weak
operations are executed.
\subsection{Overview of results}
Instead of concentrating on specific data types, as in earlier
work, we study the worst-case response times for operations of
{\it arbitrary\/} abstract
data types in sequentially consistent, linearizable,
and hybrid consistent implementations. We show that
algebraic properties of
the operations of an abstract data type
are sufficient for proving many lower
bounds and some upper bounds on
the worst-case response times. Our work
generalizes and unifies previously
known results, for
example, \cite{cj99-09-04,cj99-09-05,cj99-09-08,cj99-09-17}.
Some of these algebraic
properties, such as equivalence and commutativity,
were defined in Weihl's study of concurrency control and recovery
in transaction systems~\cite{cj99-09-18}.
Weihl's definitions helped us to identify
interesting new algebraic properties of operations.
We now give a brief description of our results.
Let $d$ be the maximum message delay of
a system of processes connected by a
network, and let $u$ be the
uncertainty in the message delay ($0 \le u < d$). This
implies that the actual message delay may vary between $d-u$ and $d$.
In the first part of the paper, we
assume that processes have perfectly synchronized clocks and constant
message delays ($u=0$). Any lower bounds can
hold in systems with weaker, more realistic timing assumptions.
We consider interactions among operations
to determine the worst-case completion times for operations
of the abstract data type to be implemented.
We find several algebraic properties that
cause either a single operation or
a pair (collectively) to have a
worst-case completion time of at least $d$ or $2d$.
It is often the case that a particular operation
or group of operations of an
abstract data type is
known to be used most frequently. Thus it is
desirable to optimize (in terms
of worst-case completion time) that
operation or group of operations. We investigate conditions that
allow this operation or the operations of the group to have
worst-case completion times of $0$ (to be {\em fast}),
meaning that an operation can return immediately based on
current local information at its invoking process, in a linearizable
implementation.
Making one operation fast may require another operation to be
{\em slow}, that is, to have
a worst-case completion time of $\Omega(d)$.
Operations may be classified as accessors,
pure mutators, self-oblivious
operations, immediately
self-commuting operations, or none of the above.
An {\em accessor\/} returns information about
an object without changing
it. A {\em pure mutator\/} changes an object
without returning any information
about it. The information returned by a
{\em self-oblivious\/} operation
does not depend on operations with the same name that
are executing at nearly the
same time. The information returned by an
{\em immediately self-commuting\/}
operation does not depend on operations with
the same name that are executing simultaneously.
Figure~\ref{op-rels} shows the relationships among these
classes of operations and
describes the smallest possible worst-case completion
time (fast or slow) of a single
operation
in these classes.\footnote{In the case of
accessors and pure mutators, time is given for a
group of operations.}
\begin{figure}[tbh]
\begin{center}
\setlength{\unitlength}{0.01in}
\begin{picture}(300,300)(100,100)
\thicklines
\put(225,225){\oval(300,300)}
\put(225,225){\oval(250,250)}
\put(225,225){\oval(200,200)}
\put(225,225){\oval(90,50)}
\put(225,287.5){\oval(90,50)}
\put(195,212){\shortstack{pure\\mutator}}
\put(195,284){\shortstack{accessor}}
\put(135,331){\shortstack{immediately self-commuting}}
\put(122,358){\shortstack{not immediately self-commuting}}
\put(187,158){\shortstack{self-oblivious}}
\put(135,215){\shortstack{f\\a\\s\\t}}
\put(105.5,200){\shortstack{u\\n\\k\\n\\o\\w\\n}}
\put(79,215){\shortstack{s\\l\\o\\w}}
\end{picture}
\end{center}
\caption{Operations and smallest worst-case completion
times\label{op-rels}}
\end{figure}%1
Our general results for systems with perfectly
synchronized clocks and no
uncertainty in the message delay are that any single
self-oblivious operation
can be made fast and that no operation that does not
immediately commute with itself can be made
fast. Still unknown is the
smallest possible worst-case completion time for a single operation
that is not self-oblivious but does immediately commute with itself. However,
a large class of
well-known abstract data types has self-oblivious operations.
We can use the lower and upper bounds proven to deduce that
sequential consistency and
linearizability are equally costly in systems with
perfectly synchronized clocks when the expected
completion time to perform all
operations in a linearizable implementation
matches the lower bound for
all operations under sequential consistency.
Computing the expected completion
time requires knowledge of the frequency
of executing each operation for the abstract data type. If
$\op_1, \ldots, \op_n$ are the operations for an abstract data type and
$p_i$ is the frequency of execution for each $\op_i$,
where \(\sum_{i=1}^{n} |p_i| = 1\),
the expected completion time for a single
operation is \(\sum_{i=1}^{n} p_i |\OP_i| \).
In the second part of the paper, we
consider systems with positive uncertainty in the message delay
and approximately synchronized clocks.
Our goal is to find a list of
algebraic properties that cause operations
(which satisfy a property in the list)
to have positive worst-case completion
times ($\Omega(u)$) in linearizable implementations
but worst-case completion times of $0$ in
sequentially consistent implementations.
Sequential consistency is then less expensive than linearizability
in this model of process synchrony.
We exhibit two properties that cause
operations to have worst-case completion
times of at least $u/2$ in linearizable implementations.
An operation satisfying one property or
a restriction of the other property can
be implemented with a worst-case
completion time of
$0$ in a sequentially consistent implementation.
These properties hold for a large class of
well-known abstract data types. Under these conditions, sequential
consistency is cheaper than linearizability.
Finally, we
consider the weaker guarantee of hybrid consistency.
We use many of the algebraic properties used for
sequential consistency
to yield similar lower bounds for hybrid consistency,
and we deduce that hybrid consistency is not necessarily
cheaper than stronger guarantees.
\subsection{Organization of this paper}
Section~\ref{defs} contains general definitions about abstract data
types and definitions of the three correctness
guarantees. Section~\ref{perfect} discusses the costs of the strong
guarantees in systems
with perfectly synchronized clocks, with subsections for lower bounds
(sequential consistency) and upper bounds (linearizability).
Section~\ref{approx} discusses the
costs of the strong guarantees in systems
with only approximately
synchronized clocks, with
subsections for linearizability (lower bounds) and
sequential consistency
(upper bounds). Section~\ref{study-hc} discusses the
costs of hybrid consistency.
We summarize our results in
Section~\ref{summary}.
\section{Definitions}
\label{defs}
A {\em sequential specification\/}
(see \cite{cj99-09-11}) for an abstract data type contains a
set of {\em operations}, which
are ordered pairs of call-and-response events, and a set of legal
operation sequences. The legal
sequences of operations reflect the semantics of the abstract data
type. For example, for read/write objects, each read operation in a
legal sequence returns the value written by the last preceding write
operation in the sequence.
The call event represents the
call for the corresponding operation from the abstract data type,
while the response event represents
the value returned by applying the
operation. We refer to call
events and their matching response events
separately because they may not
occur atomically; they may be separated in time.
For operation $\op$, a call event is of the form $\call(\arglist)$,
where $\arglist$ is
the argument list (possibly empty) for the operation.
For example, a call event for a read operation (respectively,
write operation)
on a read/write object is of the form $\readit()$
(respectively, $\writeit(v)$).
Similarly, a response event is of the form
$\resp(\retlist)$, where $\retlist$
is the list of values returned by the
operation and is possibly empty. For example, a response event
for a read operation (respectively, write
operation) on a read/write object
is of the form $\ret(v)$ (respectively, $\ack()$).
We can combine the call-and-response events to yield
$\op(\arglist)(\retlist)$.
In our running example,
we get $\readit()(v)$ and $\writeit(v)()$.
We can partition all operations
into equivalence classes. All operations
with the same name for their call events are in the same
{generic operation}, or operation type.
$\OP_i$ denotes a generic operation; $\op_i$ and
$\op_i^j$ denote instantiations
of $\OP_i$ (i.e., nongeneric operations).
The notion of sequential specification implicitly assumes some initialization
of the object;
for example, $\readit()(3)$ is
legal for a read/write object that has $3$
as its initial value. We assume the ability to explicitly initialize an
object $O$ using any sequence of operations $\rho$ that is legal for $O$.
Formally, the object $O_\rho$ is
a {\em $\rho$-initialized version of $O$} if it
has the same set of operations as $O$, and the
operation sequence $\sigma$ is legal
for $O_\rho$ if and only if $\rho \circ \sigma$
is legal
for $O$. The assumption of arbitrary explicit initialization is not
unreasonable,
since initialization normally occurs at the beginning of program executions.
We present two basic algebraic properties of operations from~\cite{cj99-09-18}.
Let $\alpha$ and $\beta$ be operation sequences. $\alpha$
{\em looks like} $\beta$ if for every operation
sequence $\gamma$,
$\alpha \circ \gamma$ is legal only if $\beta \circ \gamma$
is legal.
If $\alpha$ looks like $\beta$, then the user of the abstract data type
never sees the result of an operation that allows the user to distinguish
$\beta$ from $\alpha$ after $\alpha$ is executed. If $\alpha$ looks like
$\beta$ and $\beta$ looks like $\alpha$, then $\alpha$ and $\beta$ are
{\em equivalent}.
Future operations cannot distinguish between $\alpha$ and
$\beta$.
The preceding definitions are stated in terms of sequential
operations on a single object. Now we need to enhance our
notation to handle multiple shared objects in concurrent systems. If $e$ is a
call event, response event, or whole operation,
then $e[O,p]$ denotes that $e$ is performed by a process $p$ on an
object $O$. If $\alpha$ is a sequence of operations, then
$\alpha[O,p]$ denotes that all operations in
$\alpha$ are performed by a process
$p$ on an object $O$. A
sequence $\tau$ of operations for a set of objects is
legal if, for
each object $O$, the restriction of $\tau$ to operations of
$O$, denoted $\tau|O$, is legal for $O$'s abstract data
type.
We now describe the system model.\footnote{Our system model is
the same as in~\cite{cj99-09-05}.}
A {\em memory consistency system\/} (MCS)
is a set of processes $P$ and a set of
clocks $\cal C$, one for each $p$ in $P$.
Our assumed system consists of a collection of nodes connected by a network.
An application program, a real-time clock, and an
MCS process are running on each node.
An MCS process can read the real-time
clock residing at its node. A {\em clock} is a monotonically increasing
function from real time to clock time (both sets are the set of real numbers).
A process cannot modify the real-time clock. Processes can only obtain
information about time from their clocks.
The following events can occur at the MCS process on node $p$:
\begin{itemize}
\item A {\em call event\/} occurs when the application program on node $p$
accesses a shared object.
\item A {\em response event\/} occurs when the MCS process on node $p$ gives
a response from a shared object to node $p$'s application program.
\item {\em Message receive events\/} are of the form
receive$(p,m,q)$ for all
messages $m$ and all nodes $q$. A message receive event occurs when the MCS
process on node $p$ receives message $m$ from the MCS process on node $q$.
\item {\em Message send events\/} are of the form send$(p,m,q)$ for all messages
$m$ and all nodes $q$. A message send event happens when the MCS process on
node $p$ sends message $m$ to the MCS process on node $q$.
\item{\em Timer set events\/} are of the form timerset$(p,T)$ for all clock
times $T$. This means that $p$ sets a timer to go off when its clock reads
$T$.
\item{\em Timer events\/}
are of the form timer$(p,T)$ for all clock times $T$.
This means that a timer that was set for time $T$ on $p$'s clock goes off.
\end{itemize}
Call, message receive, and timer events are {\em interrupt events}.
An {\em MCS process\/} (or just {\em process})
is an automaton with a set of states,
including an initial state, and a transition function. Each interrupt event
causes the transition function to be applied. The transition function is
a function from states, clock times, and interrupt events to states, sets of
response events, sets of message send events, and sets of timer set events
(for future clock times). This means that the transition function takes as
input the current state, clock time, and interrupt event and then produces a
new state, a set of response events for the application process, a set of
messages to be sent, and a set of timers to be set for the future.
A {\em step\/} of $p$ is a tuple $(s,T,i,s',R,M,S)$, where $s$ and $s'$ are
states ($s$ is the current state and $s'$ is the new state), $T$ is a clock
time, $i$ is an interrupt event, $R$ is a set of
response events, $M$ is a set of message send events, $S$ is a set of timer
set events, and $s'$, $R$, $M$,
and $S$ are the results of $p$'s transition function
acting on $s$, $T$, and $i$.
A {\em history\/} of a process $p$ with clock $C$ is a countable sequence of
steps such that the following hold:
\begin{itemize}
\item Steps are ordered by $T$, their time components, in increasing order.
\item The old state in the first step is $p$'s initial state.
\item The old state of each subsequent step is the new state of the previous
step.
\item For the subsequence of steps with time component $T = t$, all nontimer
events are ordered before any timer event, and there is at most one timer
event.
\end{itemize}
An {\em execution\/} of an MCS is a
set of histories, one for
each process $p$ in $P$ with clock $C_p$ in $\cal C$,
which satisfies the following
two conditions: First, for all pairs of processes
$p$ and $q$, every message sent from $p$ to $q$ is received by $q$, and
every message received by $q$ from $p$ was actually sent by $p$
(reliable message transmission and no duplicated messages). We use this
one-to-one correspondence to define the {\em delay\/} of any message in an
execution to be the difference between the real time of receipt and the real
time of sending. Second, a timer is received by $p$ at clock time $T$ if and
only if $p$ has previously set a timer for $T$.
An execution $\rho$ is {\em admissible\/} if the following are true:
\begin{itemize}
\item For every $p$ and $q$, every message in $\rho$ from $p$ to $q$ has
delay in the range $[d-u,d]$, for fixed nonnegative integers $d$ and $u$,
$u \le d$.
\item For every $p$, at most one call at $p$ is pending (lacks a matching
response) at any given time.
\end{itemize}
Our definitions of the correctness conditions are identical to those
found in~\cite{cj99-09-05,cj99-09-04}.
Given an execution $\sigma$, let $\ops(\sigma)$ be the sequence of
call-and-response events
appearing in $\sigma$ in real-time order. We need to specify
a tie-breaking mechanism for ordering events that
occur at the same real time
$t$. In this ordering, the first group of events is formed by the response
events that
happen at time $t$ and whose matching call events happen before
time $t$, ordered by their process identifiers. The second
group of events in
the ordering is formed by the call events that happen at time $t$ and whose
matching response events happen at time $t$, ordered by their process
identifiers with a call event immediately preceding its matching response
event. The third group of events in the ordering is formed by the call events
that happen at time $t$ and whose matching response events happen
after time $t$, ordered by their process identifiers.
We now define sequential consistency,
linearizability, and hybrid consistency. These definitions
all imply that every
call eventually has
a matching response and that call events and response events alternate
at a given process (a process never has more than one pending
call). If $s$ is
a sequence of operations and $p$ is a process, then we denote the restriction
of $s$ to operations of process $p$ by $s | p$. Given an execution $\rho$, a
sequence of operations $s$ is a {\em serialization\/} of $\rho$ if it is a
permutation of $\ops(\rho)$.
Sequential consistency ensures that all processes agree on some ordering of
the execution at the granularity of entire operations. In this ordering, the
operations for each process appear in the order in which they were executed
at that process.
An execution $\rho$ is {\em sequentially consistent\/} if there exists a legal
serialization $\tau$ of $\rho$
such that $\ops(\rho)|p = \tau|p$ for each process $p$.
Like sequential consistency, linearizability ensures that all processes agree
on some ordering of the execution (at the granularity of entire operations) that
preserves the operation sequences of the processes. In addition, the ordering
must preserve the actual timings of operations.
An execution $\rho$ is {\em linearizable\/} if there exists a legal
serialization $\tau$ of $\rho$ such that
$\ops(\rho)|p = \tau|p$ for each process $p$, and
$\op_1$ precedes $\op_2$ in $\tau$ if the
response for $\op_1$ precedes the call for $\op_2$ in $\ops(\rho)$.
Hybrid consistency tries to give us the best of strong consistency
(operations appear to execute everywhere in some fixed order) and weak
consistency (there is no fixed order,
admitting fast implementations). Each operation has a strong version and
a weak version. We want all processes to perceive
the same order of execution for all strong operations and
the same relative order of execution for each pair of operations
executed by the same process, where at least one operation in the pair is
strong.
An execution $\rho$ is {\em hybrid consistent\/} if there exists a serialization
$\sigma$ of the strong operations of $\rho$ such that for each process $p$,
there exists a legal sequence $\tau_p$ of operations such that the following
four statements are true:
(1) $\tau_p$ is a permutation of $\ops(\rho)$.
(2) If $\op_1$ and $\op_2$ are executed by
the same process, $\op_1$ precedes $\op_2$
in $\rho$, and at least one of $\op_1$
and $\op_2$ is a strong operation, then $\op_1$ precedes $\op_2$ in $\tau_p$.
(3) If $\op_1$ precedes $op_2$ in $\sigma$ and both are strong,
then $\op_1$ precedes $op_2$ in $\tau_p$.
(4) We have $\tau_p | p = \rho | p$.
An MCS is a sequentially consistent (respectively, linearizable or hybrid
consistent) implementation
of a set of objects if any admissible execution of the MCS is sequentially
consistent (respectively, linearizable or hybrid consistent).
We measure the efficiency of an implementation by the worst-case response
time for any operation on any object in the set. Let $O$ be an object and
$\OP$ be a generic operation. $|\OP(O)|$ is the maximum time taken by
an $\op$ operation
on $O$ in any admissible execution. $|\OP|$, the worst-case time for
the generic operation $\OP$ to
be completed, is the maximum of $|\OP(O)|$
over all objects implemented by the MCS.
\section{Perfect clocks}
\label{perfect}
In this section we assume that all processes have perfectly synchronized
(perfect) clocks and a constant, known message delay $d$. (These two concepts
are
equivalent; see~\cite{cj99-09-05}).
We model constant message delay by letting the message uncertainty be
$u = 0$. We give lower and upper bounds on the costs of providing
sequentially consistent and linearizable implementations of virtual shared
objects. These lower bounds hold under weaker, more realistic
assumptions about process synchrony.
We prove our lower bounds on the costs of operations in sequentially
consistent implementations, implying corresponding lower bounds for
linearizable implementations. Perfect clocks are necessary for the algorithms
demonstrating our upper bounds. Our algorithms are primarily of theoretical
interest.
Subsection~\ref{spt-lb} describes lower bounds on the costs of implementing
single operations, pairs of operations, and larger groups of operations from
abstract
data types. Subsection~\ref{lb-all} presents lower bounds on the costs of
implementing all operations from abstract data types.
Subsection~\ref{opt-one} contains implementations
of classes of abstract data types in which a single operation or group of
operations is optimized and describes
abstract data types for which the bounds in Subsection~\ref{spt-lb} are tight.
\subsection{Lower bounds for sequential consistency for singles, pairs, and
groups}
\label{spt-lb}
We give lower bounds on the costs in sequentially consistent implementations
of single operations, pairs of operations, and larger groups of operations
satisfying certain algebraic properties, which
we present as needed. The
properties are also useful in Section~\ref{study-hc}, where we consider
hybrid consistency.
If $\beta$ and $\gamma$ are
operation sequences, then $\beta$ and $\gamma$
{\em commute\/}~\cite{cj99-09-18}
if, for every operation sequence $\alpha$ such that
$\alpha \circ \beta$ and $\alpha \circ \gamma$ are legal,
$\alpha \circ \beta \circ \gamma$ and $\alpha \circ \gamma \circ \beta$ are
legal and equivalent.\footnote{The
concept is called {\em commute forward\/} in~\cite{cj99-09-18}.}
Formally we say that two generic operations {\em do not commute\/}
by negating
the previous definition. $\OP_1$ and $\OP_2$ do not commute if
there exist operation instances $\op_1$ and $\op_2$
and a sequence of operations
$\alpha$ such that $\alpha \circ \op_1$
and $\alpha \circ \op_2$ are legal and (at least) one of the following is true:
\begin{enumerate}
\item $\alpha \circ \op_1 \circ \op_2$ is not legal.
\item $\alpha \circ \op_2 \circ \op_1$ is not legal.
\item $\alpha \circ \op_1 \circ \op_2$ does not look like
$\alpha \circ \op_2 \circ \op_1$, which means that
there exists an operation sequence $\gamma$ such that
$\alpha \circ \op_1 \circ \op_2 \circ \gamma$
is legal but $\alpha \circ \op_2 \circ \op_1 \circ \gamma$ is not.
\item $\alpha \circ \op_2 \circ \op_1$ does not look like
$\alpha \circ \op_1 \circ \op_2$, which means that
there exists an operation sequence $\gamma$ such that
$\alpha \circ \op_2 \circ \op_1 \circ \gamma$
is legal but $\alpha \circ \op_1 \circ \op_2 \circ \gamma$ is not.
\end{enumerate}
If item 1 or item 2 is true, then $\OP_1$ and
$\OP_2$ {\em immediately do not commute}.\footnote{If $\op_1$ and $\op_2$ are
the same instantiation of the same generic operation $\OP$, then $\OP$
{\em immediately does not commute with itself.}}
If item 3 or item 4 is true, then
$\OP_1$ and $\OP_2$ {\em eventually do not commute}.
If items 1 and 2 are true, $\OP_1$ and $\OP_2$
are {\em cyclically dependent}.
Appendix~\ref{adt-examples} contains several examples of abstract data types
with their commutativity properties.
The proofs of the theorems in this subsection are similar to each other.
They show that some legal executions, each performed by a single process, may
not be combined into a globally legal execution if minimum time bounds for
certain operations are not obeyed. This results in a violation of sequential
consistency.
We justify the existence of these legal executions and our
combining technique in Appendix~\ref{axioms}. With each proof, we
provide a picture of the illegal global
execution.
In each picture, time moves from left to right, with critical times shown at
the bottom, and each legal individual execution has its own line. Also, the
periods of time for message transit are shown. For each operation (sequence),
we show the object on which it is performed and the process performing it.
Each process completes its operations based on
the information available when the operations are executed. We
choose starting times for operations so that the finishing times
(based on the lower bounds) are too small for the processes to use
the information in messages sent by other processes.
We now give a condition under which an {\it individual\/} (generic)
operation of an abstract data type must be slow.
\begin{theorem}
\label{slow-op}
Let $T$ be an abstract data type with a generic
operation $\OP$ that immediately does
not commute with itself. In any
sequentially consistent implementation of objects of type $T$, $|\OP| \ge d$.
\end{theorem}
\begin{proof}
The following proof generalizes the proof in~\cite{cj99-09-05} that a dequeue
operation of the queue abstract data type must take at least time $d$.
Let $A$ be an object of type $T$. Let processes $1$ and $2$
access $A$.
Since $\OP$ immediately does not commute with itself, there exist
a sequence $\rho$ of operations and an operation instance $op$ such that
$\rho \circ \op$ is legal but
$\rho \circ \op \circ op$ is not legal.
Suppose in contradiction that there is a sequentially consistent
implementation of $A_\rho$
for which $|OP| < d$.\footnote{The use of the explicit initialization
assumption is necessary in this proof, because we must use
serialized operation sequences of the form $\rho \circ \gamma$ to prove a
violation of sequential consistency, and the definition of sequential
consistency does not require that $\rho$ appear as a prefix of the
serialization of the execution. The necessity of the explicit initialization
assumption is an open question.}
Figure~\ref{t:t1} shows possible admissible executions $\alpha_1$ and $\alpha_2$.
Since no messages are received
in $\alpha_1$ and $\alpha_2$, replacing $p_2$'s history in $\alpha_1$
with its history in $\alpha_2$ results in another admissible execution,
$\alpha$.
By assumption, $\alpha$ is sequentially consistent. Thus, there is a
$\tau$ that
is a permutation of $\ops(\alpha)$ and is legal for $A_{\rho}$.
However, all possible permutations of $\ops(\alpha)$ are of the form
$\op \circ \op$, which is not legal for $A_{\rho}$.
\end{proof}
\begin{figure}[tbh]
\begin{center}
\setlength{\unitlength}{0.00083300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(6150,3360)(1426,-3163)
\thicklines
\put(2401,-2761){\vector( 1, 0){4800}}
\put(2851,-2611){\line( 0,-1){300}}
\put(6676,-2611){\line( 0,-1){300}}
\put(2851,-1936){\line( 1, 0){3825}}
\put(2851,-1786){\line( 0,-1){300}}
\put(6676,-1786){\line( 0,-1){300}}
\put(2851,-1186){\line( 1, 0){1725}}
\put(2851,-1036){\line( 0,-1){300}}
\put(4576,-1036){\line( 0,-1){300}}
\put(2851,-211){\line( 0,-1){300}}
\put(4576,-211){\line( 0,-1){300}}
\put(2851,-361){\line( 1, 0){1725}}
\put(1426, 89){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution}}}
\put(1651,-361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_2$}}}
\put(1651,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_1$}}}
\put(3301,-1711){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $\alpha_1$ and $\alpha_2$}}}
\put(1426,-2761){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(2851,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}0}}}
\put(6676,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$d$}}}
\put(3301,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op[A_\rho,2]$}}}
\put(3301,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op[A_\rho,1]$}}}
\end{picture}
\end{center}
\caption{Counterexample for lower bound for an operation that immediately
does not commute with itself (Theorem~\ref{slow-op})}
\label{t:t1}
\end{figure}%2
For read/write objects, $\writeit(v)()$
immediately commutes with
$\writeit(w)()$, but the two writes eventually do not commute if $v \ne w$.
Therefore, the previous
theorem does not apply
for $|\WRITE|$. In fact, ~\cite{cj99-09-05,cj99-09-17}
present algorithms in which writes take less than time $d$.
Next we give a condition under which a {\it pair\/}
of distinct (generic) operations from an abstract data type must be slow.
\begin{theorem}
\label{sc-pair}
Let $T$ be an abstract data type, and let
$\OP_1$ and $\OP_2$ be distinct generic operations
on $T$ that immediately do not
commute. In any sequentially consistent implementation of at least
two objects of type $T$,
$|\OP_1| + |\OP_2| \ge d$.
\end{theorem}
\begin{proof}
This proof generalizes the proof in~\cite{cj99-09-05} that
$|\READ| + |\WRITE| \ge d$ for read/write objects.
Since $\OP_1$ and $\OP_2$ immediately do not commute, there is a sequence of
operations $\alpha$ and operation instances $\op_1$ and $\op_2$ such that
$\alpha \circ \op_1$ and $\alpha \circ \op_2$ are
legal, but (without loss of generality)
$\alpha \circ \op_1 \circ \op_2$ is not legal.
Let $A$ and $B$ be two objects of type $T$, and let processes $1$ and $2$ use
$A$ and $B$. Suppose in contradiction that there is a sequentially consistent
implementation of $A$ and $B$ for which
$|\OP_1| + |\OP_2| < d$.
Figure~\ref{t:t2} shows possible admissible executions $\alpha_1$ and $\alpha_2$.
Since no messages are received in $\sigma_1$ and $\sigma_2$ after time $t$,
replacing process $2$'s history in $\sigma_1$ with its history in $\sigma_2$
results in another admissible execution, $\sigma$. Then $\ops(\sigma)$
consists of the operations $\op_1[A,1]$ followed by $\op_2[B,1]$, and $\op_1[B,2]$
followed by $\op_2[A,2]$, where both pairs are preceded by $\alpha[A,1]$ and
$\alpha[B,2]$.
By assumption, $\sigma$ is sequentially consistent. Thus there exists a legal
operation sequence $\tau$ in which the following hold:
\begin{itemize}
\item The operations in $\alpha[A,1]$ are followed by $\op_1[A,1]$, and
$\op_1[A,1]$ is followed by $\op_2[B,1]$.
\item The operations in $\alpha[B,2]$ are followed by $\op_1[B,2]$, and
$\op_1[B,2]$ is followed by $\op_2[A,2]$.
\end{itemize}
Since $\alpha \circ \op_1 \circ \op_2$ is not legal, $\tau$ must have $\op_1[A,1]$
follow $\op_2[A,2]$. But that causes $\op_2[B,1]$ to follow $\op_1[B,2]$, which is
not legal.
\end{proof}
\begin{figure}[tbh]
\begin{center}
\setlength{\unitlength}{0.00063300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(7737,3594)(1,-3397)
\thicklines
\put(826,-1186){\line( 1, 0){1725}}
\put(2551,-1036){\line( 0,-1){300}}
\put(826,-361){\line( 1, 0){1725}}
\put(826,-211){\line( 0,-1){300}}
\put(2551,-211){\line( 0,-1){300}}
\put(826,-1036){\line( 0,-1){300}}
\put(826,-2761){\vector( 1, 0){6900}}
\put(2776,-1936){\line( 1, 0){4125}}
\put(2776,-1786){\line( 0,-1){300}}
\put(4726,-1186){\line( 1, 0){1125}}
\put(4726,-361){\line( 1, 0){1125}}
\put(2776,-1186){\line( 1, 0){1725}}
\put(2776,-1036){\line( 0,-1){300}}
\put(4501,-1036){\line( 0,-1){300}}
\put(2776,-361){\line( 1, 0){1725}}
\put(2776,-211){\line( 0,-1){300}}
\put(4501,-211){\line( 0,-1){300}}
\put(2776,-2611){\line( 0,-1){300}}
\put(4726,-1036){\line( 0,-1){300}}
\put(4726,-2611){\line( 0,-1){300}}
\put(5851,-211){\line( 0,-1){300}}
\put(4726,-211){\line( 0,-1){300}}
\put(5851,-1036){\line( 0,-1){300}}
\put(5851,-2611){\line( 0,-1){300}}
\put(6901,-2611){\line( 0,-1){300}}
\put(6901,-1786){\line( 0,-1){300}}
\put(226,-361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\sigma_2$}}}
\put(226,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\sigma_1$}}}
\put( 1, 89){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution}}}
\put( 1,-2761){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(1101,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha[A,1]\alpha[B,2]$}}}
\put(1101,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha[A,1]\alpha[B,2]$}}}
\put(3126,-1711){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $op_1$ and $op_2$}}}
\put(3226,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op_1[A,2]$}}}
\put(3226,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op_1[B,1]$}}}
\put(2776,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t$}}}
\put(4526,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+|\OP_1|$}}}
\put(5651,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+|\OP_1|+$}}}
\put(5651,-3411){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$|\OP_2|$}}}
\put(4951,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op_2[B,2]$}}}
\put(4951,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op_2[A,1]$}}}
\put(6901,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+d$}}}
\end{picture}
\end{center}
\caption{Counterexample for lower bound for a pair of operations that
immediately do not commute (Theorem~\ref{sc-pair})}
\label{t:t2}
\end{figure}%3
Cyclic dependences cause a pair of operations
to be even slower than the previous lower bound.
\begin{theorem}
\label{cd-pair}
Let $T$ be an abstract data type with cyclically dependent generic
operations $\OP_1$ and $\OP_2$.
In any sequentially consistent implementation of an object
of type $T$, $|\OP_1| + |\OP_2| \ge 2d$.
\end{theorem}
\begin{proof}
Since $\OP_1$ and $\OP_2$
are cyclically dependent, there is a sequence of
operations $\rho$ and operation instances $\op_1$ and $\op_2$ such that
$\rho \circ \op_1$
and $\rho \circ \op_2$ are legal, but
$\rho \circ \op_1 \circ \op_2$ and
$\rho \circ \op_2 \circ \op_1$ are not legal.
Let $A$ be an object of type $T$. Let processes $1$ and $2$ access $A$.
Assume in contradiction that there exists a sequentially consistent
implementation of $A_\rho$ for which
$|\OP_1| + |\OP_2| < 2d$. We can assume without loss
of generality that $|\OP_1| \ge |\OP_2|$.
Figure~\ref{t:t3} shows possible
admissible executions $\alpha_1$ and $\alpha_2$.
Since no messages are received in $\alpha_1$ and $\alpha_2$ before time
$d$,
replacing process $1$'s history in $\alpha_2$ with its history in $\alpha_1$
results in another admissible and sequentially consistent execution, $\alpha$.
Thus, there is a legal permutation of $\ops(\alpha)$
for $A_{\rho}$. However, because of cyclic dependency,
neither permutation of
$\ops(\alpha)$ is legal for $A_{\rho}$.
\end{proof}
\begin{figure}[tbh]
\begin{center}
\setlength{\unitlength}{0.00083300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(5787,3360)(1426,-3163)
\thicklines
\put(2401,-2761){\vector( 1, 0){4800}}
\put(2851,-2611){\line( 0,-1){300}}
\put(6676,-2611){\line( 0,-1){300}}
\put(2851,-1936){\line( 1, 0){3825}}
\put(2851,-1786){\line( 0,-1){300}}
\put(6676,-1786){\line( 0,-1){300}}
\put(2851,-1186){\line( 1, 0){1725}}
\put(2851,-1036){\line( 0,-1){300}}
\put(4576,-1036){\line( 0,-1){300}}
\put(2851,-211){\line( 0,-1){300}}
\put(4576,-211){\line( 0,-1){300}}
\put(2851,-361){\line( 1, 0){1725}}
\put(1651,-361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_2$}}}
\put(1651,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_1$}}}
\put(3301,-1711){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $\alpha_1,\alpha_2$}}}
\put(1476,-2761){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(2851,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}0}}}
\put(6676,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$d$}}}
\put(3301,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op[A_\rho,2]$}}}
\put(3301,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op[A_\rho,1]$}}}
\put(1426, 89){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution}}}
\end{picture}
\end{center}
\caption{Counterexample for lower bound for a pair of cyclically
dependent operations (Theorem~\ref{cd-pair})}
\label{t:t3}
\end{figure}%4
Dequeue and enqueue are not cyclically dependent,
because an enqueue is always
legal. Likewise, read and write are not cyclically dependent because
a write is always legal.
\cite{cj99-09-05} shows
that the lower bounds of Theorem~\ref{sc-pair} are tight.
We now give an example of
a type, BANKACCOUNT2,
with cyclically dependent operations. The object of the type consists of
values denoting savings and checking account balances.
The operations are $\RSUC$ and $\RCUS$.
$\rsuc(v)(w)$
adds $v$ to the checking account balance and returns the value of the
savings account balance in $w$. $\rcus(v)(w)$ adds
$v$ to the savings account balance and returns the value of the checking
account balance in $w$. $\RSUC$ and $\RCUS$ are cyclically dependent.
Initialize the checking account balance to 500 and the savings
account balance to 1000. An $\rsuc$ instance executed alone would return
1000, and an $\rcus$ instance executed alone would return 500. If the $\rcus$
instance followed the $\rsuc$ instance, the $\rcus$ instance would not be
legal, and vice versa.
We generalize cyclic dependence for larger groups of operations.
If there exists an operation sequence $\alpha$ and operation instances
$\op_1,\ldots,\op_n$ such that $\alpha \circ \op_1$, $\ldots$,
$\alpha \circ \op_n$ are legal, but each operation sequence formed by
$\alpha$ followed by a permutation of $\op_1,\ldots,\op_n$ is illegal, then
$\OP_1,\ldots,\OP_n$ are {\em $n$-cyclically dependent}.
The previous definition of cyclic dependence corresponds to 2-cyclic
dependence.
\begin{theorem}
\label{not-all-less}
Let $T$ be an abstract data type with operations $\OP_1,\ldots,\OP_n$.
If $\OP_1,\ldots,\OP_n$ are $n$-cyclically dependent, then one of
$|\OP_1|,\ldots,|\OP_n|$ must be at least $d$ in any sequentially
consistent implementation
of objects of type $T$, where at least $n$ processes
are using the objects.
\end{theorem}
\begin{proof}
Let $A$ be an object of type $T$. Let processes $1,\ldots,n$ access $A$.
Since $\OP_1,\ldots,\OP_n$ are $n$-cyclically
dependent, there exist an operation sequence $\rho$ and operation instances
$\op_1,\ldots,\op_n$ such that they can individually legally follow $\rho$.
Assume in contradiction that there exists an implementation of $A_\rho$ where
$|\OP_1| < d,\ldots,|\OP_n| < d$,
but they cannot all together legally follow~$\rho$.
Figure~\ref{t:t4} shows admissible executions
$\alpha_1,\ldots,\alpha_n$.
Since no messages are received in $\alpha_1,\ldots,\alpha_n$,
for each $i > 1$, replacing $p_i$'s history in $\alpha_1$ with its history in
$\alpha_i$ results in another admissible and sequentially consistent execution,
$\alpha$. Thus, there is a $\tau$ that is a
permutation of $\ops(\alpha)$ and is legal for $A_\rho$. However, no possible
permutation of $\ops(\alpha)$ is legal for $A_\rho$.
\end{proof}
\begin{figure}[tbh]
\begin{center}
\setlength{\unitlength}{0.00083300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(5787,3360)(1426,-3163)
\thicklines
\put(2401,-2761){\vector( 1, 0){4800}}
\put(2851,-2611){\line( 0,-1){300}}
\put(6676,-2611){\line( 0,-1){300}}
\put(2851,-1936){\line( 1, 0){3825}}
\put(2851,-1786){\line( 0,-1){300}}
\put(6676,-1786){\line( 0,-1){300}}
\put(2851,-1186){\line( 1, 0){1725}}
\put(2851,-1036){\line( 0,-1){300}}
\put(4576,-1036){\line( 0,-1){300}}
\put(2851,-211){\line( 0,-1){300}}
\put(4576,-211){\line( 0,-1){300}}
\put(2851,-361){\line( 1, 0){1725}}
\put(1651,-361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_n$}}}
\put(1651,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_1$}}}
\put(3301,-1711){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $\alpha_1,...,\alpha_n$}}}
\put(1476,-2761){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(2851,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}0}}}
\put(6676,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$d$}}}
\put(3301,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op[A_\rho,n]$}}}
\put(3301,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op[A_\rho,1]$}}}
\put(1426, 89){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution}}}
\end{picture}
\end{center}
\caption{Counterexample for lower bound for $n$-cyclically dependent
operations (Theorem~\ref{not-all-less})}
\label{t:t4}
\end{figure}%5
We now give an example of a type, BANKACCOUNT3, with $3$-cyclically dependent
operations. The object of the type consists of
values denoting savings, checking, and loan account balances.
The
operations are $\RSUC$, $\RCUL$, and $\RLUS$.
$\rsuc(v)(w)$
adds $v$ to the checking account balance and returns the value of the
savings account balance in $w$. $\rcul(v)(w)$ adds
$v$ to the loan account balance and returns the value of the checking
account balance in $w$.
$\rlus(v)(w)$
adds $v$ to the savings account balance and returns the value of
the loan account balance in $w$. Initialize the savings account balance to
500, the checking account balance to 1000, and the loan account balance to
1500. An $\rsuc$ instance executed alone would return 500, an $\rcul$ instance
executed alone would return 1000, and an $\rlus$ instance would return 1500.
In any permutation of all three instances, at least one instance would be
illegal if some account balance has changed.
The number of accessing processes is important. If only two processes
access objects of type BANKACCOUNT3, there exists an implementation of
BANKACCOUNT3
where $|\RSUC| = d/2$, $|\RCUL| = d/2$, and $|\RLUS| = d/2$, matching the lower
bound of $3d/2$ given in Theorem~\ref{clique}. The implementation described
in Theorem~\ref{half-all} suffices.
The next theorem gives a larger lower bound for $3$-cyclic operations.
\begin{theorem}
\label{3c2d}
If $T$ is an abstract data type with operations $\OP_1$, $\OP_2$,
and $\OP_3$ that
are $3$-cyclic, then $|\OP_1| + |\OP_2| + |\OP_3| \ge 2d$ in any sequentially
consistent implementation of objects of type $T$, where there are at least
three accessing processes.
\end{theorem}
\begin{proof}
By Theorem~\ref{not-all-less}, at least one of $|\OP_1|$, $|\OP_2|$,
and $|\OP_3|$
must be at least $d$. Without loss of generality, assume
that $|\OP_1| \ge d$.
Assume in contradiction that $|\OP_2| + |\OP_3| < d$. We consider $A_\rho$,
the $\rho$-initialized version of $A$.
Figure~\ref{t:t5} shows admissible executions $\alpha_1$, $\alpha_2$, and
$\alpha_3$.
Since no messages are received in $\alpha_1$, $\alpha_2$, and $\alpha_3$ before
time $d$, replacing process 1's history in $\alpha_3$ with its history
in $\alpha_1$ and replacing process 2's history in $\alpha_3$ with its
history in $\alpha_2$ results
in another admissible and sequentially consistent
execution $\alpha$.
Thus, there is a legal permutation
of $\ops(\alpha)$ for $A_\rho$. However, no possible permutation
of $\ops(\alpha)$ is legal for $A_\rho$.
\end{proof}
\begin{figure}[tbh]
\begin{center}
\setlength{\unitlength}{0.00063300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(7087,5810)(1426,-5613)
\thicklines
\put(4126,-211){\line( 0,-1){300}}
\put(5251,-211){\line( 0,-1){300}}
\put(4126,-361){\line( 1, 0){1125}}
\put(4726,-4711){\line( 0,-1){300}}
\put(5326,-4711){\line( 0,-1){300}}
\put(6601,-4711){\line( 0,-1){300}}
\put(3526,-2686){\line( 1, 0){3675}}
\put(3526,-2536){\line( 0,-1){300}}
\put(3526,-1936){\line( 1, 0){3075}}
\put(6601,-1786){\line( 0,-1){300}}
\put(2851,-3286){\line( 1, 0){3750}}
\put(2851,-3136){\line( 0,-1){300}}
\put(6601,-3136){\line( 0,-1){300}}
\put(2851,-4036){\line( 1, 0){3225}}
\put(6076,-3886){\line( 0,-1){300}}
\put(4126,-1036){\line( 0,-1){300}}
\put(7801,-1036){\line( 0,-1){300}}
\put(2851,-3886){\line( 0,-1){300}}
\put(3526,-1786){\line( 0,-1){300}}
\put(2851,-4711){\line( 0,-1){300}}
\put(6076,-4711){\line( 0,-1){300}}
\put(3526,-4711){\line( 0,-1){300}}
\put(4126,-4711){\line( 0,-1){300}}
\put(7201,-2536){\line( 0,-1){300}}
\put(7201,-4711){\line( 0,-1){300}}
\put(7801,-4711){\line( 0,-1){300}}
\put(4126,-1186){\line( 1, 0){3675}}
\put(2401,-4861){\vector( 1, 0){6100}}
\put(1651,-361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_3$}}}
\put(4126,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op_3[A_\rho,2]$}}}
\put(1426, 89){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution}}}
\put(1426,-4861){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(2851,-5236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}0}}}
\put(4326,-4600){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\frac{|\OP_1|+|\OP_3|}{2}$}}}
\put(3776,-5236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\frac{|\OP_1|-|\OP_3|}{2}$}}}
\put(5026,-5236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\frac{|\OP_1|+|\OP_2|}{2}$}}}
\put(6076,-4600){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$d$}}}
\put(6426,-5236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$|\OP_1|$}}}
\put(1651,-1936){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_2$}}}
\put(3751,-2461){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $\op_2$}}}
\put(1651,-3286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_1$}}}
\put(3301,-3061){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op_1[A_\rho,1]$}}}
\put(3976,-1711){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\op_2[A_\rho,2]$}}}
\put(3201,-3811){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $\op_1$}}}
\put(4301,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $\op_3$}}}
\put(3201,-4600){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\frac{|\OP_1|-|\OP_2|}{2}$}}}
\put(6726,-4600){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\frac{|\OP_1|-|\OP_2|}{2}+d$}}}
\put(7426,-5236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\frac{|\OP_1|-|\OP_3|}{2}+d$}}}
\end{picture}
\caption{Counterexample for lower bound for 3-cyclically
dependent operations (Theorem~\ref{3c2d})}
\end{center}
\label{t:t5}
\end{figure}%6
We now define a condition causing
either a pair of operations or an individual operation in a trio of
operations to be slow.
A generic operation $\OP$ is
{\em noninterleavable with respect to $\OP_1$ preceding $\OP_2$}
if there exist operation sequence $\rho$ and operation instances
$\op$, $\op_1$, and $\op_2$ such that $\rho \circ \op$
and $\rho \circ \op_1 \circ \op_2$ are legal, but
none of $\rho \circ \op \circ \op_1 \circ \op_2$,
$\rho \circ \op_1 \circ \op \circ \op_2$, and
$\rho \circ \op_1 \circ \op_2 \circ \op$ is legal.
\begin{theorem}
\label{sc-big-cycle}
Let $T$ be an abstract data type, and let $\OP_1$, $\OP_2$, and $\OP_3$ be
generic
operations of $T$ such that $\OP_3$ is
noninterleavable with respect to $\OP_1$
preceding $\OP_2$.
Then in any sequentially consistent implementation of objects of type $T$,
$|\OP_1| + |\OP_2| \ge d$ or $|\OP_3| \ge d$.
\end{theorem}
\begin{proof}
Since $\OP_3$ is
noninterleavable with respect to $\OP_1$ preceding $\OP_2$, there
exists an operation sequence $\rho$ and operation instances $\op_1$, $\op_2$,
and $\op_3$ such that $\rho \circ \op_3$ and $\rho \circ \op_1 \circ \op_2$
are legal, but none of $\rho \circ \op_3 \circ \op_1 \circ \op_2$,
$\rho \circ \op_1 \circ \op_3 \circ \op_2$, and
$\rho \circ \op_1 \circ \op_2 \circ \op_3$ is legal.
Let $A$ be an object of type $T$. Let processes $1$ and $2$ access $A$.
Assume in contradiction that there exists a sequentially consistent
implementation of $A_\rho$ for which $|OP_1| + |\OP_2| < d$ and
$|OP_3| < d$.
Figure~\ref{t:t6} shows admissible executions $\alpha_1$ and $\alpha_2$.
Since no messages are received in $\alpha_1$ and $\alpha_2$,
replacing process $1$'s history in $\alpha_2$ with its history in $\alpha_1$
results in another admissible and sequentially consistent execution, $\alpha$.
Thus, there is a permutation of $\ops(\alpha)$,
$\tau$, which preserves the order of process $1$'s
operations in $\alpha$ and is legal for $A_{\rho}$. However, because of
noninterleavability, none of the three permutations of
$\ops(\alpha)$ that satisfy the order of process $1$'s operations is
legal for $A_{\rho}$.
\end{proof}
\begin{figure}[tbh]
\begin{center}
\setlength{\unitlength}{0.00083300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(5787,3360)(1426,-3163)
\thicklines
\put(2401,-2761){\vector( 1, 0){4800}}
\put(2851,-2611){\line( 0,-1){300}}
\put(6676,-2611){\line( 0,-1){300}}
\put(2851,-1936){\line( 1, 0){3825}}
\put(2851,-1786){\line( 0,-1){300}}
\put(6676,-1786){\line( 0,-1){300}}
\put(2851,-1186){\line( 1, 0){1725}}
\put(4801,-1186){\line( 1, 0){1725}}
\put(2851,-1036){\line( 0,-1){300}}
\put(4576,-1036){\line( 0,-1){300}}
\put(6526,-1036){\line( 0,-1){300}}
\put(2851,-211){\line( 0,-1){300}}
\put(5251,-211){\line( 0,-1){300}}
\put(2851,-361){\line( 1, 0){2400}}
\put(4801,-1036){\line( 0,-1){300}}
\put(1651,-361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_2$}}}
\put(1651,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\alpha_1$}}}
\put(3301,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$op_3[A_\rho,2]$}}}
\put(3301,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$op_1[A_\rho,1]$}}}
\put(3301,-1711){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $op_1$ and $op_3$}}}
\put(1426,-2761){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(2851,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}0}}}
\put(6676,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$d$}}}
\put(1426, 89){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution}}}
\put(5251,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$op_2[A_\rho,1]$}}}
\end{picture}
\end{center}
\caption{Counterexample for noninterleavability lower
bound (Theorem~\ref{sc-big-cycle})}
\label{t:t6}
\end{figure}%7
Consider our BANKACCOUNT2 objects, and define the additional
operations $\UC$
(which adds a certain amount
to the checking account balance) and $\RS$ (which
reads and returns the value of the savings account balance). $\RCUS$ is
noninterleavable with respect to $\UC$ preceding $\RS$. Initialize the checking
account balance to 500 and the savings account balance to 1000. An $\rcus$
instance executed alone would return 500, and an $\rs$ instance following a
$\uc$ instance would return 1000. Including all three instances would make
either the $\rcus$ or the $\rs$ illegal, depending on where
the $\rcus$ instance was placed.
\begin{table}
\caption{Corollaries for Subsection~\ref{spt-lb}\label{t:spt-cors}}
\begin{center}\footnotesize
\begin{tabular}{llll}\hline
Operations & Type/table references & Theorems & Citations\\ \hline
\it DEQ & Table~\ref{queue-rel} & ~\ref{slow-op} & ~\cite{cj99-09-05}\\
\it POP & Table~\ref{stack-rel} & ~\ref{slow-op} & ~\cite{cj99-09-05}\\
\it DEL & Table~\ref{bst-rel} & ~\ref{slow-op} & \\
\it WITHDRAW & Table~\ref{bank-acct} & ~\ref{slow-op} & \\
\it READ, WRITE & Read/write objects & ~\ref{sc-pair} & ~\cite{cj99-09-05,cj99-09-15}\\
Any two distinct generic & Tables~\ref{queue-rel}--\ref{bank-acct} & ~\ref{sc-pair} & \\
operations & & & \\
\it RSUC, RCUS & BANKACCOUNT2 & ~\ref{cd-pair} & \\
\it RSUC, RCUL, RLUS & BANKACCOUNT3 & ~\ref{not-all-less},~\ref{3c2d}
& \\
\it RCUS, UC, RS & BANKACCOUNT2 & ~\ref{sc-big-cycle}
& \\ \hline
\end{tabular}
\end{center}
\end{table}
Table~\ref{t:spt-cors}
displays types applicable to the theorems in this subsection.
For each row of the table, the operation from the first column, which
is from the type or table reference listed in the second column, meets the
hypotheses for the theorem(s) listed in the third column. Any references to a
specific lower bound corresponding to the given operation are
presented in the fourth column.
\subsection{Lower bounds for sequential consistency for all
operations of a type}
\label{lb-all}
We now use the previous results
and the structure of the commutativity properties to deduce
lower bounds on the worst-case time complexity for {\it all\/}
operations of abstract data types in sequentially consistent implementations.
An alternate way to represent the commutativity properties of an abstract
data type $T$ is to use a
{\em commutativity graph} $\CG(T)$, with the
generic operations as nodes. An edge exists between two nodes if their
corresponding operations immediately do not commute. A loop exists at a
node if the corresponding operation immediately does not commute with itself.
Figure~\ref{t:cgpic}
shows some examples of commutativity graphs.
\begin{center}
\begin{figure}[tbh]
\setlength{\unitlength}{0.00083300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\thicklines
\begin{picture}(5270,5673)(-11,-4822)
\put(4701,-136){\circle{7500}}
\put(1651,-3661){\circle{750}}
\put(3301,-1861){\circle{750}}
\put(4876,-3661){\circle{750}}
\put(1651,-136){\circle{750}}
\put(2026,-136){\line( 1, 0){2325}}
\put(3001,-2086){\line(-5,-6){1088.115}}
\put(3601,-2086){\line( 5,-6){1057.377}}
\put(4501,-3736){\line(-1, 0){2475}}
\put(1351,4){\line( 0, 1){385}}
\put(1351,389){\line( 1, 0){600}}
\put(1951,389){\line( 0,-1){385}}
\put(1426,-211){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}delete}}}
\put(4426,-211){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}enqueue}}}
\put(1450,-1111){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}(a) Commutativity
graph for queue operations}}}
\put(1070,-4786){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}(b) Commutativity
graph for reference-count set operations}}}
\put(1376,-3736){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}update}}}
\put(4726,-3736){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}find}}}
\put(3076,-1936){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}insert}}}
\end{picture}
\caption{Examples of commutativity graphs}
\label{t:cgpic}
\end{figure}%8
\end{center}
$\NC(T)$ is the subset of nodes in $\CG(T)$ such that each node's
corresponding operation immediately does not commute with itself.
$\RCG(T)$ (the reduced commutativity graph for $T$) is the subgraph of
$\CG(T)$ formed by deleting all nodes in $\NC(T)$ and their incident edges.
$\Maxdom(\RCG(T))$ is a subgraph of $\RCG(T)$ formed by a maximum
independent edge dominating set of $\RCG(T)$. Notice that a maximum
independent
edge dominating set of a graph is a largest subset of edges of the graph
such that
distinct edges in the subset do not have nodes in common and all other edges in
the graph have a node in common with one of the edges in the set. It is clear
that the size of a maximum independent edge dominating set is at most half the
number of nodes in the graph.
This result gives lower bounds on the costs of implementing abstract data types
with general commutativity graphs.
\begin{theorem}
\label{cgt}
Let $T$ have operations $\OP_1, \OP_2, \ldots, \OP_n$
and commutativity graph $\CG(T)$. In
any sequentially consistent implementation of
$T$,
$$
\sum_{i=1}^{n} |\OP_i|
\ge (|\NC(T)| + |\Maxdom(\RCG(T))|) d.
$$
\end{theorem}
\begin{proof}
If $\OP_i$ immediately does not commute with itself, then
$|\OP_i| \ge d$
by Theorem~\ref{slow-op}.
Thus, \(\sum_{\OP_i \in \NC(T)} |\OP_i| \)
$\ge |\NC(T)| d$.
Let $(\OP_i,\OP_j)$ be an edge in $\Maxdom(\RCG(T))$.
By the definition of
$\Maxdom(\RCG(T))$, $\OP_i$ and $\OP_j$ immediately do not commute.
By Theorem~\ref{sc-pair},
$|\OP_i| + |\OP_j| \ge d$.
Adding these inequalities yields the desired result.
\end{proof}
We now give a lower bound on the time required for all operations of an
abstract data type with a {\it clique\/} in its commutativity graph.
\begin{theorem}
\label{clique}
Let $T$ have operations $\OP_1, \OP_2, \ldots, \OP_n$
such that for all $i \in \{1,\ldots,n\}$, $\OP_i$ immediately does not
commute with $\OP_j$ if $i \ne j$. In any sequentially consistent
implementation of $T$,
\(\sum_{i=1}^{n} |\OP_i| \ge |\NC(T)| d+ (n-|\NC(T)|) d/2 \).
\end{theorem}
\begin{proof}
Let $s = |\NC(T)|$.
Let $\OP_{i_1}, \ldots, \OP_{i_s}$ be the operations
that immediately do not
commute with themselves. By Theorem~\ref{slow-op},
$|\OP_{i_k}| \ge d$ for all $k$ in $\{1,\ldots,s\}$. Thus,
\(\sum_{k=1}^{s} |\OP_{i_k}| \ge s d \) . Let
$\OP_{j_1}, \ldots, \OP_{j_{n-s}}$
be the remaining operations. We can assume without loss of generality that
$t = n-s > 2$, because Theorem~\ref{sc-pair} handles the $t=2$ case.
Since $\OP_{j_a}$ and $\OP_{j_b}$
do not commute for all $a,b$ in $\{1,\ldots,t\}$
where $a \ne b$,
by Theorem~\ref{sc-pair}, we get a system of
$C(t,2) = t(t-1)/2$ inequalities
of the form $|\OP_{j_a}| + |\OP_{j_b}| \ge d$, where
$a \neq b$.\footnote{$C(t,2)$ means the
number of ways to
choose two distinct items from a set of $t$ items.}
We want to minimize \(\sum_{k=1}^{t} |\OP_{j_k}| \) given the above
constraints and the constraints $|\OP_{j_k}| \ge 0$ for all
$k \in \{1,\ldots,t\}$. We want to minimize the sum, because
we want the the sum of each $|\OP_{j_a}|$
and $|\OP_{j_b}|$ in the global sum
to be as close as possible to $d$ from the corresponding individual
constraint. If the system of equations of the form
$|\OP_{j_a}| + |\OP_{j_b}| = d$
has a solution, the solution would yield a lower
bound for \(\sum_{k=1}^{t} |\OP_{j_k}| \), because each $|OP_{j_k}|$ is
nonnegative. Let us use the system of
equations. Without loss of generality,
we consider the following:
\begin{itemize}
\item $|\OP_{j_1}| + |\OP_{j_2}| = d$.
\item $|\OP_{j_1}| + |\OP_{j_3}| = d$.
\item $|\OP_{j_2}| + |\OP_{j_3}| = d$.
\end{itemize}
We can subtract the third equation from the second to yield the
equation $|\OP_{j_1}| - |\OP_{j_2}| = 0$. Adding the new equation
to the first yields $2 |\OP_{j_1}| = d$. We now get that
$|\OP_{j_1}| = d/2$. It easily follows that
$|\OP_{j_k}| = d/2$ for all $k \in \{1,\ldots,t\}$.
Thus,
\(\sum_{k=1}^{t} |\OP_i| = \sum_{k=1}^{s} |\OP_{i_k}| +
\sum_{k=1}^{t} |\OP_{j_k}| \ge (n-s) d/2 \), as desired.
\end{proof}
Most of our lower bounds do not restrict the costs of
individual operations, provided that certain sums of costs exceed the lower
bounds. Let us consider implementations of abstract data types where each
operation takes either time $0$ or time $d$. Thus each implementation
corresponds
to a labeling of
the commutativity graph where each node is labeled with a $0$
or a $d$, a
{\em 0-$d$ assignment}. The total cost of the implementation
equals $d$ multiplied by the number of $d$'s in the labeling.
We now show that the the minimum number of $d$'s required in a labeling
is the size of a minimum vertex cover
($\VC$).\footnote{A minimum vertex cover is a smallest subset
of nodes of a graph
such that each edge in the graph has an endpoint contained
in the subset.}
\begin{theorem}
\label{vcodt}
Let $T$ have operations $\OP_1, \ldots, \OP_n$. In
any sequentially consistent implementation of $T$ corresponding to a 0-$d$
assignment,
$$
\sum_{i=1}^{n} |\OP_i| \ge |\VC(\CG(T))|d.
$$
\end{theorem}
\begin{proof}
Assume in contradiction that there exists a sequentially consistent
implementation of $T$ corresponding to a 0-$d$ assignment such that
$$
\sum_{i=1}^{n} |\OP_i| < |\VC(\CG(T))| d.
$$
If $(\OP_i,\OP_j)$ is an edge in $\CG(T)$, then $\OP_i$ and
$\OP_j$ immediately
do not commute by the definition of $\CG(T)$. Thus
$|\OP_i| + |\OP_j| \ge d$ by
Theorem~\ref{sc-pair}, implying that
at least one of $\OP_i$ and $\OP_j$ must be
labeled with $d$ in the 0-$d$ assignment. Let $S$ be the set of operations
labeled with $d$. $|S| < |\VC(\CG(T))|$.
Since $\VC(\CG(T))$ is a smallest
set of nodes that cover the edges of $\CG(T)$, $S$
does not cover the edges. Thus there exists an edge
$(\OP_i,\OP_j)$ in $\CG(T)$ such that
$\OP_i$ and $\OP_j$ are not in $S$, that is,
they are labeled with $0$ in
the 0-$d$
assignment. By the definition of $\CG(T)$,
$\OP_i$ and $\OP_j$ immediately do not
commute, but
$|\OP_i| + |\OP_j| = 0$, violating Theorem~\ref{sc-pair}.
\end{proof}
Table~\ref{t:all-cors} displays types applicable to the theorems in this
subsection. For each row of the table, the type referenced in the first
column meets the hypotheses of the theorem(s) listed in the second column,
with commutativity graph properties as shown in the remaining columns.
\begin{table}
\caption{Corollaries for Subsection~\ref{lb-all}}
\begin{center}
\begin{tabular}{lccccc} \hline
Type/Table reference & Theorems & $|\Maxdom|$ & $|\NC|$ & $n$ &
$|\VC|$ \\ \hline
Table~\ref{bst-2-rel} & ~\ref{cgt},~\ref{vcodt} & 1 & & & 2 \\
Table~\ref{bst-2-rel} $+$ delete operation & ~\ref{clique} &
& 2 & 5 & \\ \hline
\end{tabular}
\end{center}
\label{t:all-cors}
\end{table}
\subsection{Upper bounds for linearizability}
\label{opt-one}
We know various algebraic properties of operations to prove lower bounds on
their worst-case completion times in linearizable implementations.
Now we show
that the lower bounds are tight. First we explain how certain classes of
operations and individual operations can be made fast. Then we present
types where the sum of the worst-case completion times for all operations is
optimized. Finally we discuss the effects of the results on the relative
costs of sequential consistency and linearizability.
\subsubsection{Optimizing a class of operations or a single operation}
The worst-case completion time of an
operation is at least $d$ if it immediately does not commute with itself.
Given an operation that immediately commutes with itself,
can we implement it so that it performs only local computation,
for which the time is assumed to be negligible compared to the message
delay in
the communication network?
We demonstrate three algebraic properties allowing
an operation or group of operations to be made fast.
Making a single operation from an abstract data type fast has proved to
be a nontrivial task. Although we have not quite developed an exact
characterization of when a single operation can be made fast, we
have determined a property (self-obliviousness)
that is sufficiently general
in allowing a single operation to be made fast.
Any operation that immediately does not commute with
itself cannot be made fast. If an operation is
noninterleavable with
respect to a pair of operations, then some slowdown must
occur (either the individual operation or the pair). A lack of
self-obliviousness is related to both of these properties. If an operation
immediately does not commute with itself, then it is not self-oblivious. If
an operation is not self-oblivious, then it may be
noninterleavable with respect to another operation and itself.
We now define more properties of generic operations. Although the
properties are noted in the object-oriented programming literature, we
are not aware of any formal definitions like ours. Let $\alpha$ and
$\beta$ be arbitrary operation sequences. If the legality of
$\alpha \circ \aop \circ \beta$ implies the legality of $\alpha \circ \beta$
for any instance $\aop$ of generic operation $\AOP$ and if the legality of
$\alpha \circ \beta$ implies the legality of $\alpha \circ \aop^* \circ \beta$
for some instance $\aop$ of generic operation $\AOP$
(where $\op^*$ denotes $0$
or more copies of $\op$), then $\AOP$ is an {\em accessor}. Informally,
an accessor does not change an object's state. $\FIND$ for reference-count sets
(Table~\ref{bst-2-rel}) is an accessor.
$\MOP$ is a {\em mutator\/}
if there exist operation
sequences $\alpha$ and $\beta$ such that $\alpha \circ \mop \circ \beta$ is
legal but $\alpha \circ \beta$ is not legal for some instance $\mop$ of $\MOP$.
Informally, a mutator
changes an object's state, and this change can be detected. $\INS$ and $\UP$ for
reference-count sets are mutators.
For any abstract data type, we can always implement accessor operations
such that they only perform local computation. Other operations take
time $d$ in the implementation.
\begin{theorem}
\label{fast-pure-acc}
Let $T$ be an abstract data type. If $T$ has generic accessor operations
$\AOP_1, \ldots, \AOP_n$, then there exists a linearizable implementation
of objects of type $T$ where
$|\AOP_1| = \cdots = |\AOP_n| = 0$ and $|\MOP_1| = \cdots = |\MOP_m| = d$ for
all other generic operations $\MOP_1,\ldots,\MOP_m$.
\end{theorem}
\begin{proof}
We exhibit an implementation where
$|\AOP_1| = \cdots = |\AOP_n| = 0$ and
$|\MOP_1| = \cdots = |\MOP_m| = d$. Each process
keeps a copy of all objects in its local memory. When an $\aop_i$ is invoked
at process $p$, $p$ performs the operation locally and returns
the result from
the operation. When a $\mop_j$ operation on object $X$ is invoked at process
$p$, $p$ sends a message $\DoMop_j(X)$ with the argument list for the operation
to all processes (including itself), waits $d$ time, and returns the result of
performing the operation. When a process receives any form of $\DoMop$ message,
it performs the operation on $X$ in its local memory. If the message was
sent by that process, it saves the result so that the process can return it.
We can break ties in the following way: $\DoMop_k$ is handled
before $\DoMop_l$ if $k < l$, and we use process identifiers to break any
remaining ties.
Our correctness proof is very similar to
a proof in~\cite{cj99-09-05} for read/write objects.
Let $\rho$ be an admissible execution. We systematically construct the
desired $\tau$. Each operation in $\rho$ {\it occurs\/} at the time of its
response. Let $\tau$ be the sequence of operations in $\rho$ ordered by the
times of occurrence, breaking ties by placing $\mop$ operations before $\aop$
operations, $\aop_k$ before $\aop_l$ if $k < l$, $\mop_k$ before $\mop_l$ if
$k < l$, and using process identifiers to break any remaining ties. By
construction, $\rho|p = \tau|p$ for all $p$, and $\tau$ preserves the relative
ordering of nonoverlapping operations.
We now must show that $\tau$ is legal or that, for each object $X$, $\tau|X$ is
in the sequential specification of $T$.
Consider an accessor operation. It returns based on its local state. Its
local state reflects all changes made by mutators occurring up to the time
of the accessor in $\rho$. Thus the accessor operation returns a legal value
list in $\tau$.
Consider a mutator operation that returns a value list. It returns based on
its local state at its response time in $\rho$. Its local state reflects all
changes
made by mutators occurring up to the time of the mutator. Let the sequence
of mutators occurring up to the time of the mutator in $\rho$ be $\alpha$.
The mutator is legal after $\alpha$, which is a subsequence of $\tau$.
Any accessors interleaved with $\alpha$ in $\tau$
do not affect the legality
of the mutator. Thus the mutator
operation returns a legal value list in $\tau$.
\end{proof}
We now define a restriction of mutator operations. If the
legality of arbitrary operation sequence $\alpha$ implies the legality of
$\alpha \circ \mop$ for any instance $\mop$
of generic mutator operation $\MOP$,
then $\MOP$ is a {\em pure mutator}. Informally, the return value
lists of pure mutators do not depend on the states of the objects
on which they are invoked. A push operation for an unbounded stack is a pure
mutator.
For any abstract data type, we can make pure mutator operations
fast. Other operations take time $d$ in the
implementation.
\begin{theorem}
\label{fast-pure-mod}
Let $T$ be an abstract data type. If $T$ has generic pure mutator operations
$\MOP_1, \ldots, \MOP_n$, then there exists a linearizable implementation of
objects of type $T$ where
$|\MOP_1| = \cdots = |\MOP_n| = 0$ and $|\OP_1| = \cdots =
|\OP_m| = d$ for all
other generic operations $\OP_1,\ldots,\OP_m$.
\end{theorem}
\begin{proof}
We exhibit an implementation where
$|\MOP_1| = \cdots = |\MOP_n| = 0$ and
all other operations ($\OP_1, \ldots, \OP_m$) take time $d$. Each process
keeps a copy of all objects in its local memory. When a $\mop_i$ is invoked
at process $p$, $p$ sends a message $\DoMop_i(X)$
with the argument list for the
operation to all processes (including itself) and returns immediately.
When a process receives any form of $\DoMop$ message,
it performs the operation on $X$ in its local memory. When an $\op_j$
on object
$X$ is invoked at process $p$, $p$ sends a message $\DoOp_j(X)$ with the
argument list for the operation to all processes (including
itself), waits $d$
time, and returns the result of performing the operation. When a process
receives any form of $\DoOp$
message, it performs the operation on $X$ in its
local memory. If the message was sent by that process, it saves the result so
that the process can return it.
We can break ties in the following way: $\DoMop$
messages are handled before
$\DoOp$ messages, $\DoMop_k$ is handled
before $\DoMop_l$ if $k < l$, $\DoOp_k$ is
handled before $\DoOp_l$ if $k < l$,
and we use process identifiers to break any remaining ties.
The correctness proof is very similar to
a proof in~\cite{cj99-09-05} for read/write objects.
Let $\rho$ be an admissible execution. We systematically construct the
desired $\tau$. Each operation in $\rho$ {\it occurs\/}
time $d$ after the time
of its call. Let $\tau$ be the sequence of operations in $\rho$
ordered by the
times of occurrence, breaking ties by placing $\mop$ operations before $\op$
operations, $\mop_k$ before $\mop_l$ if $k < l$, $\op_k$ before $\op_l$ if
$k < l$, and by using process identifiers to break any remaining ties. By
construction, $\rho|p = \tau|p$ for all $p$, and $\tau$ preserves the relative
ordering of nonoverlapping operations.
We now must show that $\tau$ is legal or that, for each object $X$, $\tau|X$ is
in the sequential specification of $T$.
All $\mop$ operations are legal because they are pure mutators.
Consider an $\op_k$ that returns a value list. It returns based on its local
state at its response time in $\rho$. Its local state reflects all changes
made by
operations occurring up to the time of the $\op_k$ in $\rho$. Thus
the $\op_k$
returns a legal value list in $\tau$.
\end{proof}
\begin{corollary}
There exists a linearizable implementation of increment-half objects
(Table~\ref{inc-half}), where $|\READ| = d$, $|\INC| = 0$,
and $|\HALF| = 0$.
\end{corollary}
Accessors and pure mutators can be (separately) optimized.
Can we optimize a self-commuting
operation that
both accesses and modifies the states of the objects on which it
is invoked?
We must be careful, because two fast
operation instances do not know about each other if they are executed less
than time $d$ apart.
To optimize generic operation $\FOP$, we show it is
sufficient for $\FOP$ to
be {\em self-oblivious}. Intuitively, this means that a $\fop$
operation instance does
not indirectly affect another $\fop$ operation instance.
Let $\alpha_1, \alpha_2, \ldots$ be operation
sequences.
$\FOP$ is self-oblivious if, whenever $\alpha_1 \circ \fop^1,
\alpha_1 \circ \alpha_2 \circ \fop^2, \ldots,
\alpha_1 \circ \alpha_2 \circ \cdots \circ \alpha_n \circ \fop^n,
\ldots$ are legal, there exists an
instantiation of return values for the operations in $\alpha_i$ for each
$i \ge 1$, creating new
sequences of operations $\alpha_i'$ for each $i \ge 1$, such that
$\alpha_1' \circ \fop^1 \circ \alpha_2' \circ \fop^2
\circ \cdots \circ \alpha_n' \circ \fop^n \cdots$ is legal. It is
assumed that no $\alpha_i$ contains a $\fop$ instance.
An operation that immediately does not commute with itself is
not self-oblivious, because the $\alpha$'s can be empty, with the
possible exception of $\alpha_1'$.
What kinds of operations are self-oblivious? Accessors are
self-oblivious because they do not change object state information. A pure
mutator is self-oblivious because it
is always legal after a legal sequence of operations.
However,
determining whether an arbitrary operation is self-oblivious involves
the semantics for all operations of its type. An operation
that immediately
commutes with itself is self-oblivious if all other operations
of its type do not perform conditional updates (whether and how
to perform updates are based on object state information).
For read/write objects, reads and writes are self-oblivious.
Reads are self-oblivious because they are accessors.
We now show how writes satisfy the self-oblivious
definition. Each write in
an operation sequence is legal. If $\alpha_1 \circ \writeit^1,
\alpha_1 \circ \alpha_2 \circ \writeit^2, \ldots,
\alpha_1 \circ \alpha_2 \cdots \alpha_n \circ \writeit^n,
\ldots$ are
legal, then
$\alpha_1 \circ \writeit^1 \circ \alpha_2' \circ
\writeit^2 \cdots \alpha_n'
\circ \writeit^n$ is legal,
where the return value for the reads in $\alpha_i'$
for $i \ge 2$ is the value written by $\writeit^{i-1}$.
For unbounded
queues and
stacks, enqueues and pushes are self-oblivious.
We now show how enqueues satisfy the self-oblivious
definition. The argument
for pushes is analogous. If $\alpha_1 \circ enqueue^1,
\alpha_1 \circ \alpha_2 \circ enqueue^2, \ldots,
\alpha_1 \circ \alpha_2 \cdots \alpha_n \circ enqueue^n,
\ldots$ are legal, then
$\alpha_1 \circ enqueue^1 \circ \alpha_2' \circ enqueue^2 \cdots \alpha_n'
\circ enqueue^n$ is legal, where the return value for the first dequeue
returning $\perp$ in $\alpha_i$
for $i \ge 2$ is changed in $\alpha_i'$ to the value enqueued by
$enqueue^{i-1}$.
For BANKACCOUNT2 objects,
$\RSUC$ and $\RCUS$ are
self-oblivious, because each immediately commutes with
itself and does not perform conditional updates. We now show how $\RSUC$
satisfies the self-oblivious definition. The argument for
$\RCUS$ is analogous.
An $\RSUC$ operation in an
operation sequence is legal if the value returned is
the value written by the last $\RCUS$ preceding the $\RSUC$ in the
sequence. If
$\alpha_1 \circ \rcus^1,
\alpha_1 \circ \alpha_2 \circ \rcus^2$, $\ldots,
\alpha_1 \circ \alpha_2 \cdots \alpha_n \circ \rcus^n,
\ldots$ are legal, then
$\alpha_1 \circ \rcus^1 \circ \alpha_2' \circ \rcus^2 \cdots \alpha_n'
\circ \rcus^n$ is legal, where the return value for the $\RSUC$ operations
in $\alpha_i'$ for $i \ge 2$ is the value written by $\rcus^{i-1}$.
\begin{theorem}
\label{selfopthm}
Let $T$ be an abstract data type with a self-oblivious generic operation
$\SELFOP$.
There exists a linearizable implementation of objects of type $T$ where
$|\SELFOP| = 0$ and $|\OP| = 2d$ for all other generic operations $\OP$.
\end{theorem}
We instantiate the algorithm in Figure~\ref{opt-fig}
\auquery{In Figure~\ref{opt-fig}, Is everything italic that should be?
Also, check capitalization and punctuation. Put periods at ends of
sentences?}
to yield our
implementation of objects of type $T$. Each assignment
statement is executed locally. Each process keeps both an actual copy and a
scratch copy of each object.
In addition, each process maintains an ordered set of message slots, where
each slot is indexed by a time. A slot holds messages that are either sent
or received at a given time.
{\footnotesize
\begin{figure}%[tbh]
${\it fastop}({\it args})[X,p]$: \\
$\scratch_X := X$ \\
Handle all unhandled message in slots up to (current time) \\
about $X$ in message slot order, \\
updating $\scratch_X$ as necessary \\
Determine return value based on $\scratch_X$\\
Send handle-fastop$(X,{\it args},p)$ to all processes \\
${\it fastopret}({\it return value})[X,p]$ \\
${\it otherop}({\it args})[X,p]$: \\
send handle-otherop$(X,{\it args},p)$ to all processes \\
receive a message: \\
if it is a handle-fastop, then \\
\null\quad Insert in slot ($\mbox{current time} - d$), tiebreaking after
other \\
\null\quad message types and then by process id \\
else /* handle-otherop message */ \\
\null\quad Insert in slot (current time)
with a fixed tiebreaking \\
\null\quad order among message types
and then by process id \\
\null\quad timerset($p,d$) \\
when timer goes off: \\
for each unhandled message in slots up to
($\mbox{current time} - d$), \\
considered in message slot order, do: \\
\null\quad Use it to update the actual copy of its object \\
\null\quad if it is not a handle-fastop message, then \\
\null\quad /* it is of the form handle-otherop$(X,{\it args},q)$ */ \\
\null\qquad if $p = q$, then \\
\null\qquad\quad Determine return value based on $X$ \\
\null\qquad\quad ${\it otheropret}({\it return~value})[X,p]$ \\
\null\quad Mark message as handled
\caption{Algorithm for optimizing one operation: Code for
process $p$\label{opt-fig}}
\end{figure}%9
}
When a $\selfop$ operation is invoked, the
invoking process sends a message about the operation to all other processes,
determines the
return value list (possibly by updating its scratch copy of the
object based on messages it has received), and returns. When an instance of
another generic operation is invoked, the invoking process sends a message
about the operation to all other processes.
Messages about $\selfop$
have a higher priority than messages about other operations.
If a message about a
self-oblivious operation is received at time $t$, then it is placed in slot
$t-d$. If any other message
is received at time $t$, then it is placed in slot
$t$.
If a received message is not about a $\selfop$
operation, then a timer for $d$ later is set. When a timer goes off, all
messages are handled that are
in slots indexed by time up to $d$ before the current time,
updating actual copies of objects
as necessary. At this time, if the
process has a pending operation for which its message has been handled, the
process completes its pending operation. The timers and slots are also used
to give $\selfop$ higher
priority than other operations because of its need to
complete quickly.
\begin{proof}
We must prove that the implementation guarantees
linearizable executions.
Let $\rho$ be an admissible execution. To form
$\tau$, we place all operations
in order according to their message slots and, if their message slots
contain multiple messages,
according to their positions in the message slots.
First, we show that $\tau$ preserves the relative ordering of nonoverlapping
operations in $\rho$. Suppose that $\op_1$ precedes $\op_2$ in $\rho$; that
is, $t_1$, the finishing time for $\op_1$, is less than $t_2$, the starting
time for $\op_2$. We
must analyze the ordering based on the classification of $\op_1$ and $\op_2$.
Because $\op_1$ may or may not be a $\selfop$ operation
and $\op_2$ may or may not be
a $\selfop$ operation, we have four cases to check:
\begin{itemize}
\item[a.] $\op_1$ and $\op_2$ are $\selfop$ operations. The starting time
for $\op_1$ is $t_1$. $\op_1$'s message slot
is $(t_1 + d) - d = t_1$, and
$\op_2$'s message slot is $(t_2 + d) - d = t_2$.
We have $t_1 < t_2$, and thus
$\op_1$ is correctly placed before $\op_2$ in $\tau$.
\item[b.] $\op_1$ is a $\selfop$ operation, but $\op_2$ is not a $\selfop$
operation. As before, $\op_1$'s message slot is $t_1$, and $\op_2$'s message
slot is $t_2 + d$. We have $t_1 < t_2$,
and so $t_1 < t_2 + d$. Thus, $\op_1$ is
correctly placed before $\op_2$ in $\tau$.
\item[c.] $\op_1$ are $\op_2$ are not $\selfop$
operations. The starting time
for $\op_1$ is $t_1 - 2d$. $\op_1$'s message slot is
$(t_1 - 2d) + d = t_1 - d$, and $\op_2$'
s message slot is $t_2 + d$. We have
$t_1 < t_2$, and so $t_1 - d < t_2 + d$. Thus, $\op_1$ is
correctly placed before $\op_2$ in $\tau$.
\item[d.] $\op_1$ is not a $\selfop$ operation, but $\op_2$ is.
As before, $\op_1$'s message slot is $t_1 - d$, and $\op_2$'s message
slot is $(t_2 + d) - d = t_2$. We have $t_1 < t_2$,
and so $t_1 - d < t_2$. Thus,
$\op_1$ is correctly placed before $\op_2$ in $\tau$.
\end{itemize}
We now prove the legality of
$\tau = \op_1 \circ \op_2 \circ \cdots \circ
op_n \cdots$ by induction.
We must first show that $op_1$ is legal. Suppose $\op_1$ was performed on
object $X$. If $\op_1$ is a $\selfop$ operation,
then $\op_1$'s return value is based on the sequence of operations that are
executed on $\scratch_X$ at $\op_1$'s process by the time $\op_1$ is
invoked in $\rho$. Since $\op_1$ is the first operation placed, it is in
the earliest time slot. Thus we have an empty sequence of operations that
are
executed on $scratch_X$ at $\op_1$'s process by the time $\op_1$ is invoked
in $\rho$. $\op_1$ is legal, because the return value is based on the
semantics of $\op_1$'s abstract data type. If $\op_1$ is not a $\selfop$
operation, then $\op_1$'s
return value is based on the sequence of operations
that are executed at $\op_1$'s process in $\rho$ by the time $\op_1$
returns in $\rho$. As before, $\op_1$ is legal.
The induction step comes next. Suppose
that $\op_1 \circ \cdots \circ \op_k$
is legal. We must prove that
$\op_1 \circ \cdots \circ \op_k \circ \op_{k+1}$
is legal. We have two cases to consider. Either $\op_{k+1}$ is a $\selfop$
operation, or it is not. Suppose that $\op_{k+1}$ is
performed on object $X$.
Case 1. If $\op_{k+1}$ is a $\selfop$ operation, then
its return value is based on the
sequence of operations that are executed on $\scratch_X$ at $\op_{k+1}$'s
process by the time $\op_{k+1}$ is invoked in $\rho$. We must now analyze
the structure of $\op_1 \circ \cdots \circ \op_k$ to continue.
$\op_1 \circ \cdots \circ \op_k$ has no $\selfop$ operation, or the sequence
has at least one $\selfop$ operation.
Case 1.1. If $\op_1 \circ \cdots \circ \op_k$
has no $\selfop$ operation,
then the message
slots of $\op_1, \ldots, \op_k$ are earlier than or at the same time as
$\op_{k+1}$'s message slot. Thus, $\op_1, \ldots, \op_k$ have
been performed, or their messages have been received at $\op_{k+1}$'s
process, implying that they have been performed on $\scratch_X$ (if
they use $X$ at all). $\scratch_X$ reflects the work done by
$\op_1, \ldots, \op_k$. Thus, $\op_{k+1}$ is legal, because it was performed
based on a legal sequence of operations and the semantics of $\op_{k+1}$'s
abstract data type.
Case 1.2. Let $\op_l$
be the last $\selfop$ operation that starts before
$\op_{k+1}$. Either $\op_l$ starts more than time $d$ before $\op_{k+1}$ or
$\op_l$ starts less than time $d$ before $\op_{k+1}$.
Case 1.2.1. If $\op_l$ is performed on $X$, then $scratch_X$ reflects
the change to $X$, implying that $\op_{k+1}$ is legal.
Case 1.2.2.
Consider the operations that start less than time $d$ before $\op_{k+1}$
and are placed before $\op_{k+1}$ in $\tau$. The time slot for any operation
$\op$ that
is not a $\selfop$ is $t_{\op} + d$, where $t_{\op}$ is the starting
time for $\op$. The time slot for $\op_{k+1}$ is
$t_{\op_{k+1}}$, its starting
time, since $\op_{k+1}$ is a $\selfop$. Because
$t_{\op_{k+1}} - t_{\op} < d$, we have
$t_{\op_{k+1}} < t_{\op} + d$. Thus, $\op$ is placed after $\op_{k+1}$ in
$\tau$.
This means that any operation starting less than time $d$ before $\op_{k+1}$
and placed before $\op_{k+1}$ in $\tau$ is a $\selfop$. In particular, $\op_k$
is a $\selfop$. Let $\op_f$ be the first $\selfop$
starting less than time $d$
before $\op_{k+1}$ and placed before $\op_{k+1}$.
$\op_f, \ldots, \op_{k+1}$
are all $\selfop$ operations. By the induction hypothesis,
$\op_1 \cdots \op_k$ is legal. Let $\alpha_1 = \op_1 \cdots \op_{f-1}$.
$\alpha_1 \circ \op_f$ is legal,
$\alpha_1 \circ \op_{f+1}$ is legal, $\ldots, \alpha_1 \cdot
\op_k$ is legal, and $\alpha_1 \circ \op_{k+1}$ is legal,
because the processes
performing $\op_f, \ldots, \op_k, \op_{k+1}$
have received information about all operations in $\alpha_1$. Each
operation is performed based on a legal sequence of operations and the
semantics of the operation's abstract data type. We now use the
self-oblivious property. Let each of
$\alpha_2, \ldots, \alpha_{k-f+2}$ be the empty
sequence. The largest subscript on an $\alpha_i$ is $k-f+2$, because
we need to place $\op_f, \ldots, \op_{k+1}$, a total of $k-f+2$
operations. $\alpha_1 \circ \op_f \circ \alpha_2 \circ \op_{f+1} \cdots
\alpha_{k-f+1} \circ
\op_k \circ \alpha_{k-f+2} \circ \op_{k+1}$ is legal
by the definition.
Case 2. If $\op_{k+1}$ is not a $\selfop$
operation, then its return
value is based on the sequence of operations that execute at
$\op_{k+1}$'s process in $\rho$ by the time $\op_{k+1}$ returns in $\rho$.
This sequence is a prefix of $\tau$,
because the execution order of operations
is the message slot ordering. By the induction hypothesis, the sequence is
legal. Since $\op_{k+1}$
is executed based on a legal sequence of operations and the semantics of
$\op_{k+1}$'s abstract data type, it is legal.
\end{proof}
In our implementation optimizing a self-oblivious operation, all nonoptimized
operations have a worst-case response time of $2d$. Our implementation
assumes that the optimized operation has cyclic dependences with all other
operations. Thus, for certain abstract data types, there are
gaps between our
upper bound and the lower bounds.
\subsubsection{Types having a tight lower bound for all operations}
We now exhibit some abstract data types with implementations
in which the total time for {\em all\/} operations matches the lower bounds
from Subsection~\ref{lb-all}.
Minimizing the total time required for all operations
may help when the frequencies of invoking each operation are approximately
equal.
A pure modify-read (PMR) object is a variable $X$ that can
be read or modified by a pure mutator operation. This is a generalization
of the pseudo--read-modify-write (PRMW) object,
because the pure mutator operation may be such that
it eventually does not commute with itself.
(A PRMW object is a variable that can be
read, written, or modified by a pure mutator operation that is a commutative
arithmetic operation; see~\cite{cj99-09-03}.)
There exists a linearizable implementation of PMR objects with the
total worst-case response time for all operations matching the lower bound
on the total worst-case response time for all operations. In the
implementation, the pure mutators take time $0$, and the read operation takes
time $d$.
\begin{theorem}
\label{prmw-sc-lin}
For any set of PMR objects with operations $\READ$ and
pure mutator operations
$\MOP_1,\ldots,\MOP_n$, there exists a
linearizable implementation of the set
with $|\READ| = d$ and
$|\MOP_1| = \cdots = |\MOP_n| = 0$.
\end{theorem}
\begin{proof}
Since all nonread operations are pure mutators, we can instantiate the
implementation described in Theorem~\ref{fast-pure-mod} to yield an
implementation achieving the desired time bounds.
\end{proof}
An $n$-component array object is an array of $n$ items with each item
capable of being read and/or written by some operation. For example,
$R1W2R3(v_2)(v_1,v_3)$ reads $v_1$ from item $1$, writes $v_2$ to item $2$,
and reads $v_3$ from item $3$.
Thus,
we combine the call and response events, as described in
Section~\ref{defs}.
Any operation in which the items being read and the
items being written are disjoint sets is easily seen to be immediately
self-commuting, and any operation that is not immediately self-commuting
must read and write the same item. If $\OP_1$ reads item $i$ and $\OP_2$
writes item $i$, then $\OP_1$ and $\OP_2$ immediately do not commute.
We now exhibit some implementations of component array data types without
$n$-cyclic dependences that match the lower bounds given earlier.
\begin{theorem}
\label{half-all}
For any component array data type $T$ with immediately self-commuting
operations $\OP_1,\ldots,\OP_n$ and
no $k$-cyclic dependences (for each $k \ge 2$), there exists a linearizable
implementation of $T$ such that \(\sum_{i=1}^{n} |\OP_i| = nd/2 \).
\end{theorem}
\begin{proof}
We exhibit an implementation where $|\OP_1| = \cdots = |\OP_n| = d/2$.
We need our assumption of no $k$-cyclic dependences here because of
Theorem~\ref{cd-pair}. Each
process keeps a copy of all objects in its local memory. When an $\op_i$ on
object $X$ is invoked at process $p$, $p$
sends a message $\DoOp_i(X)$ with the
argument list for the operation to all processes (including itself), waits
$d/2$ time, and returns
the result of performing the operation. When a process
receives any form of $\DoOp$ message, it performs the operation on the
appropriate object in its local memory. We can break ties in the following
way: $\DoOp_l$ is handled before $\DoOp_m$ if $\op_l$ reads from a component that
$\op_m$ writes. We
use process identifiers to break any remaining ties. The
assumption about no $k$-cyclic dependences always allows ties to be broken.
We now prove that this algorithm is correct. Let $\rho$ be an admissible
execution. We
systematically construct $\tau$. Each operation that does not
write occurs at its response time. Every other operation occurs
$d/2$ time after its response time.
Let $\tau$ be the sequence of operations
in $\rho$ ordered by times of occurrence. We break ties by placing $\op_l$
before $\op_m$ if $\op_l$ reads a component that $\op_m$ writes
and by using process
identifiers to break any remaining ties.
By construction, $\rho | p = \tau | p$ for all $p$, and $\tau$ preserves the
relative ordering of nonoverlapping operations.
We must now show that
$\tau = \op_1 \circ \op_2 \circ \cdots$ is legal. Consider $\op_i$,
which returns based on the local
state of its process $d/2$ time after its invocation time, $t$. The local
state reflects all changes made by other operations starting by time $t - d/2$.
Any operation starting after time $t - d/2$ does not appear in $\tau$ before
$\op_i$ by construction
unless it does not write. Therefore, it does not affect
the legality of $\op_i$.
\end{proof}
If $\OP_i$ and $\OP_j$
immediately do not commute for all $i \ne j$, then our
upper bound matches the lower bound from Theorem~\ref{clique}.
The following abstract data type satisfies the hypothesis of
Theorem~\ref{half-all}.
Consider a 2-component array object with operations $R1R2$, $W1R2$, and $W2$.
The abstract data type has a $3$-clique as its commutativity graph. Each
operation is immediately self-commuting. No cyclic dependences exist. This
type is similar to BANKACCOUNT2.
We can optimize implementations of certain data
types with 0-$d$ assignments if they have no $n$-cyclic dependences.
\begin{theorem}
\label{vct}
For any component array data type $T$ with immediately self-commuting
operations $\OP_1,\ldots,\OP_n$ and
no $k$-cyclic dependences (for each $k \ge 2$),
there exists a linearizable
implementation of $T$ such that \(\sum_{i=1}^{n} |\OP_i| = |\VC(T)| d \).
\end{theorem}
\begin{proof}
Assume that $\VC(T)=\{\OP_1,\ldots,\OP_{|\VC(T)|}\}$.
We exhibit an implementation where $|\OP_1| = \cdots = |\OP_{|VC(T)|}| = d$
and
$|\OP_{|\VC(T)|+1}| = \cdots = |\OP_n| = 0$. By the definitions of
vertex cover and commutativity graph,
$\OP_i$ and $\OP_j$ immediately
commute for all $i,j$ in $\{|\VC(T)+1|, \ldots, n\}$.
Each
process keeps a copy of all objects in its local memory. When an $\op_i$ on
object $X$ is invoked at process $p$, $p$ sends a
message $\DoOp_i(X)$ with the
argument list for the operation to all processes (including itself). If
$i > |\VC(T)|$, the process immediately returns the result of the operation
(reading the components that should be read). If $i \le |\VC(T)|$, the
process waits
$d$ time and then
returns the result of performing the operation. When a process
receives any form of $\DoOp$ message, it performs the operation on the
appropriate object in its local memory. We can break ties in the following
way: $\DoOp_i$ is handled before $\DoOp_j$ if $\OP_i \not \in \VC(T)$ and
$\OP_j \in \VC(T)$. Otherwise, $\DoOp_i$ is handled before $\DoOp_j$ if
$i < j$. We use process identifiers to break any remaining ties.
We now prove that this algorithm is correct. Let $\rho$ be an admissible
execution. We systematically construct $\tau$. Each operation occurs
at its response time.
Let $\tau$ be the sequence of operations
in $\rho$ ordered by times of occurrence, breaking ties by placing $\op_k$
before $\op_l$ and using the same method as in message handling. By construction,
$\rho | p = \tau|p$ for all $p$, and $\tau$ preserves the relative ordering
of nonoverlapping operations.
We must now show that $\tau$ is legal. We have
$\tau = \op_{1i_1} \circ \op_{2i_2} \circ \cdots$, where $ki_k$ means that
the $k$th operation in $\tau$ is an $\op_{i_k}$ operation.
In $\op_{ki_k}$, if
$i \le |\VC(T)|$, then the operation returns based on the local state of its
process $d$ time after its invocation time, $t$. The local
state reflects all changes made by other operations starting by time $t$.
Any operation starting after time $t$ does not appear in $\tau$ before
$\op_{ki_k}$ by construction unless it immediately commutes with $\op_{ki_k}$.
Therefore, it does
not affect the legality of $\op_{ki_k}$. If $i > |\VC(T)|$,
then the operation returns based on the local state of its process at its
invocation time, $t$. The local state reflects all changes made by other
operations starting by time $t-d$. Any operation starting after time $t-d$
does not appear in $\tau$ before $\op_{ki_k}$ by construction unless it
immediately commutes with $\op_{ki_k}$. Therefore, it does not affect the
legality of $\op_{ki_k}$.
\end{proof}
If $T$'s commutativity graph has a small vertex cover ($|\VC(T)| < n/2$), then
the implementation described in the proof of Theorem~\ref{vct} is faster than
that in the proof of Theorem~\ref{half-all} with
respect to the total worst-case time complexity for all operations.
The following
abstract data type satisfies the hypothesis of Theorem~\ref{vct}.
Consider a 3-component array object with operations $W1$, $R2$, $R3$, $R1W2W3$,
and $R1R2W3$.
Figure~\ref{t:vctpic} shows
the commutativity graph for the abstract data type.
The set $\{R1W2W3, R1R2W3\}$ forms a vertex cover for the graph.
Each operation is immediately self-commuting. No cyclic dependences exist.
This type is similar to BANKACCOUNT3.
\begin{center}
\begin{figure}[tbh]
\setlength{\unitlength}{0.00083300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(5270,4895)(-11,-4044)
\thicklines
\put(1651,-3661){\circle{750}}
\put(3301,-1861){\circle{750}}
\put(4876,-3661){\circle{750}}
\put(4701,-136){\circle{750}}
\put(1651,-136){\circle{750}}
\put(3001,-2086){\line(-5,-6){1088.115}}
\put(3601,-2086){\line( 5,-6){1057.377}}
\put(4501,-3736){\line(-1, 0){2475}}
\put(1876,-436){\line( 1,-1){1200}}
\put(4726,-511){\line( 0,-1){2850}}
\put(4651,-511){\line(-1,-1){1125}}
\put(3026,-1936){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$r1w2w3$}}}
\put(4651,-3736){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$r1r2w3$}}}
\put(1576,-3736){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$w1$}}}
\put(1576,-211){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$r2$}}}
\put(4651,-211){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$r3$}}}
\end{picture}
\caption{Example commutativity graph for Theorem~\ref{vct}}
\label{t:vctpic}
\end{figure}
\end{center}
Theorems~\ref{half-all} and~\ref{vct} hold for abstract data types
without $n$-cyclic dependences. However,
some abstract data types do have $n$-cyclic dependences. Improving the
bounds for implementations of those types is much more complicated than
improving the bounds for implementations of types without $n$-cyclic
dependences. We show a tight bound for component array data types
with three immediately self-commuting operations and a $3$-cyclic dependence.
\begin{theorem}
\label{2dimpcd}
If $T$ is a component array data type with
operations $\OP_1$, $\OP_2$, and
$\OP_3$ having a $3$-cyclic dependence,
where $\OP_1$ is
immediately self-commuting and has no cyclic dependences with
$\OP_2$ and $\OP_3$,
then there exists a linearizable
implementation of $T$ such that \(\sum_{i=1}^{3} |\OP_i| = 2d \).
\end{theorem}
\begin{proof}
We exhibit an implementation where $|\OP_1| = 0$, $|\OP_2| = d$, and
$|\OP_3| =
d$. $|\OP_1| + |\OP_2| + |\OP_3| \ge 2d$ by
Theorem~\ref{3c2d}. At least one of
$|\OP_1|, |\OP_2|,$ and $|\OP_3|$ must be at least $d$ by
Theorem~\ref{sc-big-cycle}.
The assumption that $\OP_1$ is
immediately self-commuting is necessary because
of Theorem~\ref{slow-op}.
Each process keeps a copy of all objects in its local memory. When an
$\op_1$ is invoked at process $p$, $p$ sends a message $\DoOp_1(X)$ with the
argument list for the operation to all processes (including itself) and returns
immediately. When a process receives any form of $\DoOp_1$ message, it performs
the operation on $X$ in its local memory. When an $\op_j$
($j = 2$ or $3$) on
object $X$ is invoked at process $p$, $p$ sends a message $\DoOp_j(X)$ with the
argument list for the operation to all processes (including itself), waits $d$
time, and returns the result of performing the operation. When a process
receives any form of $\DoOp_j$ ($j = 2$ or $3$) message, it performs the
operation on $X$ in its local memory.
We can break ties in the
following way: $\DoOp_i$ messages are handled before
$\DoOp_j$ messages if $i <
j$, and we use process identifiers to break any remaining ties.
We now prove that this algorithm is correct.
Let $\rho$ be an admissible execution. We systematically construct the
desired $\tau$. Each operation in $\rho$ occurs time $d$ after the time
of its call. Let $\tau$ be the sequence of
operations in $\rho$ ordered by the
times of occurrence, breaking ties by placing
$\op_k$ before $\op_l$ if
$k < l$, and using process identifiers to break any remaining ties. By
construction, $\rho|p = \tau|p$ for all $p$, and $\tau$ preserves the relative
ordering of nonoverlapping operations.
We now must show that $\tau$ is legal, or that for each object $X$, $\tau|X$ is
in the sequential specification of $T$.
Consider an $\op_k$ that returns a value list. It returns based on the local
state of its process at its response time in $\rho$. The local state reflects
all changes made by
operations occurring up to the time of the $\op_k$ in $\rho$. Thus
the $\op_k$
returns a legal value list in $\tau$.
\end{proof}
The BANKACCOUNT3 abstract data type satisfies the hypothesis of
Theorem~\ref{2dimpcd}.
\subsubsection{Relative costs}
Let us summarize this section's findings. We know
several algebraic properties of operations causing individual
operations
or groups of operations satisfying one of those properties to have a worst-case
response time of $\Omega(d)$.
Let $L_1$ be the list of properties.
We have linearizable implementations of classes of abstract data types
in which the worst-case response time for one operation or a group of
operations is optimized (the worst-case response time is $0$), provided that
the operation or group satisfies one of three algebraic properties.
Let $L_2$ be the list of three properties.
If not satisfying any property in $L_2$ implies satisfying a property in
$L_1$, then we could say that
sequential consistency and linearizability are equally costly. Unfortunately,
$L_1$ and $L_2$ do not quite satisfy that desired relationship.
However,
if the operation or group of operations to be optimized is known to be
invoked most frequently, then sequential consistency and
linearizability are equally costly for those abstract data types.
In addition, now we have linearizable implementations of
certain classes of abstract data types with the total worst-case completion
time for all operations matching the sequential consistency lower bound for
all operations. This implies that
sequential consistency and linearizability are equally costly for these
classes.
\section{Imperfect clocks}
\label{approx}
In this section we assume that clocks run at the same rate as real time but
are not initially synchronized, and that message delays are in the range
$[d-u,d]$ for some $u > 0$.
We prove that many operations in linearizable implementations of virtual
shared objects must have worst-case response times that are $\Omega(u)$,
under reasonable assumptions about the amount of sharing of objects by
processes. We show that for many abstract data types, we can provide
operations with worst-case time complexities of $0$ in sequentially
consistent implementations. This shows that linearizability can be more
expensive than sequential consistency under our assumptions about process
synchrony. Subsection~\ref{lin-lb}
contains our lower bounds on the costs of
operations under linearizability, and
Subsection~\ref{sc-ub} contains
our sequentially consistent implementations of
classes of abstract data types.
To prove our lower bounds, we use shifting techniques
introduced in~\cite{cj99-09-16}
to prove lower bounds on the precision achieved by
clock synchronization algorithms. These techniques are used
in~\cite{cj99-09-05} to prove $\Omega(u)$ worst-case
response times for read, write,
and enqueue operations. Shifting changes the
timing and ordering of system events without changing the local
view of each process.
In an execution with a certain set of clocks, if process $p$'s history is
changed so that the real times at which the events occur are shifted by some
amount $s$ and if $p$'s clock is also shifted by amount $s$, then the result
is another execution in
which every process ``sees'' the same events happening
at the same real times. Because its clock has changed by the same amount,
$p$ cannot detect the changes in the real times at
which events occur.
The {\em view\/} of process $p$ in history $\pi$ of $p$ with clock $C$ is the
concatenation of the sequences of steps in $\pi$, arranged in real-time
order. The view does not contain the real times
when the steps occurred. History
$h$ of process $p$ with clock $C$ and history $h'$ of process $p$ with
clock $C'$ are {\em equivalent\/}
if $p$'s view is the same in both histories.
Execution $\rho$ of the system $(P,\cal C)$ and execution $\rho'$ of the system
$(P,\cal C')$ are equivalent if the component histories for $p$ in $\rho$ and
$\rho'$ are equivalent for all $p$ in $P$. This means that the processes
cannot tell the difference between the two executions.
Given a history $\pi$ of a process $p$ with clock $C$ and real number $s$, a
new history $\pi' = \shift(\pi,s)$ is defined by $\pi'(t) = \pi(t+s)$ for
all $t$. This means that all tuples are shifted earlier in $\pi'$ by $s$ if
$s > 0$ and later in $\pi'$ by $-s$ if $s < 0$. Given a clock $C$ and a real
number $s$, a new clock $C' =
\shift(C,s)$ is defined by $C'(t) = C(t) + s$ for
all $t$. This means that the clock is shifted forward by $s$ if $s > 0$ and
backward by $s$ if $s < 0$.
Shifting a history of process $p$ and $p$'s clock by the same amount produces
another history.
\begin{lemma}[\cite{cj99-09-16}]
\label{new-hist}
Let $\pi$ be a history of a
process $p$ with clock $C$, and let $s$ be a real
number. Then $\shift(\pi,s)$ is a history of $p$ with clock $\shift(C,s)$.
\end{lemma}
Given an execution $\rho$ of a system $(P,\cal C)$ and a real number $s$, we
define a new execution $\rho' = \shift(\rho,p,s)$ by replacing $\pi$ ($p$'s
history in $\rho$) with $\shift(\pi,s)$
and by retaining the same correspondence
between sends and receives of messages. (We redefine the correspondence so
that a pairing in $\rho$ that involves $p$'s event at time $t$ involves
$p$'s event at time $t-s$ in $\rho'$.) All tuples for a
process $p$ are shifted
by $s$, but no other tuples are changed.
Given a set of clocks
${\cal C} = \{C_q\}_{q \in P}$ and a
real number $s$, we define a new set of
clocks ${\cal C'} = \shift({\cal C},p,s)$ by replacing $C_p$ with
$\shift(C_p,s)$.
Process $p$'s clock is shifted forward by $s$, but no other clocks are changed.
Shifting one process's history and its clock by the same amount in an execution
results in an execution that is equivalent to the original.
\begin{lemma}[\cite{cj99-09-16}]
\label{shift-equiv}
Let $\rho$ be an execution of system $(P,\cal C)$,
let $p$ be a process, and let $s$
be a real number. Let ${\cal C'} = \shift({\cal C},p,s)$ and
$\rho' = \shift(\rho,p,s)$.
Then $\rho'$ is an execution of $(P,{\cal C'})$, and
$\rho'$ is equivalent to $\rho$.
\end{lemma}
The following lemma tells us the amount of change encountered in
message delays when an execution
is shifted. Shifting an admissible execution may produce a nonadmissible
execution.
\begin{lemma}[\cite{cj99-09-16}]
\label{shift}
Let $\rho$ be an execution of the system $(P,\cal C)$, let
$p$ be a process, and let $s$
be a real number. Let ${\cal C'} = \shift({\cal C},p,s)$ and
$\rho' = \shift(\rho,p,s)$. Suppose $x$ is the delay of message $m$ from
process $q$ to process $r$ in $\rho$. Then the delay of $m$ in $\rho'$ is
$x$ if $q \neq p$ and $r \neq p$, $x-s$ if $r=p$, and $x+s$ if $q=p$.
\end{lemma}
\subsection{Lower bounds for linearizability}
\label{lin-lb}
We now give two algebraic properties of operations that cause individual
operations to have a worst-case time complexity of
$\Omega(u)$ in linearizable
implementations of objects of their abstract data types, under reasonable
assumptions about the amount of sharing of objects by processes.
This first result is that an operation must have worst-case
time complexity
of $\Omega(u)$ if the legality of some sequence depends on the
order in which two instances of the operation are executed.
\begin{theorem}
\label{imp-no-commute}
Let $T$ be an abstract data type with an operation $\OP$
such that
$\rho \circ \op^1 \circ \op^2$ does not look like
$\rho \circ \op^2 \circ \op^1$ for some operation sequence $\rho$ and some
operation instances $\op^1$ and $\op^2$.
For any linearizable implementation of an object of type $T$ in a system
with at least
three different processes, $|\OP| \ge u/2$.
\end{theorem}
\begin{proof}
We generalize the proofs in~\cite{cj99-09-05}
that a write for a read/write object and an enqueue for
a queue must take time at least $u/2$.
Since $\rho \circ \op^1 \circ \op^2$ does not look like
$\rho \circ \op^2 \circ \op^1$, there exists an operation sequence $\gamma$
such that $\rho \circ \op^1 \circ \op^2 \circ \gamma$ is legal but
$\rho \circ \op^2 \circ \op^1 \circ \gamma$ is not legal.
Let $A$ be an object of $T$.
Let $p_1$ and $p_2$ be
two processes that modify $A$, and let $p_3$ be a process that performs
operations on $A$.
Assume in contradiction that there is a linearizable implementation of $A$ for
which $|\OP| < u/2$.
By the specification of $A$ and Lemma~\ref{axone}, there is an
admissible execution $\alpha$ such that the following hold:
\begin{itemize}
\item $\ops(\alpha)$ is $\rho[A,p_3] \circ \op^1[A,p_1] \cdot
\op^2[A,p_2] \circ \gamma[A,p_3]$.
\item $\rho[A,p_3]$ starts at time $0$ and ends at time $t$, $\op^1[A,p_1]$
starts at time $t$, $\op^2[A,p_2]$
starts at time $t+u/2$, and $\gamma[A,p_3]$ starts at time $t+u$.
\item The message delays in $\alpha$ are $d$ from $p_1$ to $p_2$, $d-u$ from
$p_2$ to $p_1$, and $d-u/2$ for all other ordered pairs of processes.
\end{itemize}
Let $\beta = \shift(\shift(\alpha,p_1,-u/2),p_2,u/2)$
(shift $p_1$ later by $u/2$
and $p_2$ earlier by $u/2$). We have admissible $\beta$ because,
by Lemma~\ref{shift},
the delay of a message from $p_1$ or to $p_2$ is $d-u$,
the delay of a message
from $p_2$ or to $p_1$ is $d$, and all other delays are unchanged. But the
linearization of $\ops(\beta)$ is
$\rho[A,p_3] \circ \op^2[A,p_2] \circ \op^1[A,p_1] \circ \gamma[A,p_3]$,
which is not legal.
\end{proof}
\begin{center}
\begin{figure}%[tbh]
\setlength{\unitlength}{0.00063300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(7212,7785)(301,-7063)
\thicklines
\put(1501,-3886){\line( 1, 0){2100}}
\put(1501,-3736){\line( 0,-1){300}}
\put(3601,-3736){\line( 0,-1){300}}
\put(6001,-3886){\line( 1, 0){1500}}
\put(1501,-5986){\vector( 1, 0){6000}}
\put(1501,-5836){\line( 0,-1){300}}
\put(3601,-5836){\line( 0,-1){300}}
\put(4801,-5836){\line( 0,-1){300}}
\put(6001,-5836){\line( 0,-1){300}}
\put(6001,-3736){\line( 0,-1){300}}
\put(1501,239){\line( 1, 0){2100}}
\put(1501,389){\line( 0,-1){300}}
\put(4801,-361){\line( 0,-1){300}}
\put(4576,-1111){\line( 0,-1){300}}
\put(6001,239){\line( 1, 0){1500}}
\put(4801,-511){\line( 1, 0){975}}
\put(3601,-1111){\line( 0,-1){300}}
\put(3601,-1261){\line( 1, 0){975}}
\put(1501,-1861){\vector( 1, 0){6000}}
\put(1501,-1711){\line( 0,-1){300}}
\put(3601,-1711){\line( 0,-1){300}}
\put(4801,-1711){\line( 0,-1){300}}
\put(6001,-1711){\line( 0,-1){300}}
\put(6001,389){\line( 0,-1){300}}
\put(5776,-361){\line( 0,-1){300}}
\put(5776,-5236){\line( 0,-1){300}}
\put(4801,-5236){\line( 0,-1){300}}
\put(4801,-5386){\line( 1, 0){975}}
\put(3601,-4486){\line( 0,-1){300}}
\put(3601,-4636){\line( 1, 0){975}}
\put(4576,-4486){\line( 0,-1){300}}
\put(3601,389){\line( 0,-1){300}}
\put(7501,-3736){\line( 0,-1){300}}
\put(7501,389){\line( 0,-1){300}}
\put(301,-3961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_3$}}}
\put(301,-4711){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_2$}}}
\put(301,-5461){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_1$}}}
\put(2401,-3811){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\rho$}}}
\put(301,-3511){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Process}}}
\put(301,-6061){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(1501,-6361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$0$}}}
\put(3601,-6361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t$}}}
\put(4651,-6361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+u/2$}}}
\put(5851,-6361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+u$}}}
\put(4201,-7036){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution $\beta$}}}
\put(301,164){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_3$}}}
\put(301,-586){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_2$}}}
\put(301,-1336){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_1$}}}
\put(2401,314){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\rho$}}}
\put(301,614){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Process}}}
\put(301,-1936){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(5101,-436){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$op^2$}}}
\put(3901,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$op^1$}}}
\put(1501,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$0$}}}
\put(3601,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t$}}}
\put(4651,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+u/2$}}}
\put(5851,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+u$}}}
\put(4201,-2911){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution $\alpha$}}}
\put(5101,-5311){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$op^1$}}}
\put(3901,-4561){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$op^2$}}}
\put(6451,314){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\gamma$}}}
\put(6451,-3811){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\gamma$}}}
\end{picture}
\caption{Counterexample for
Theorem~\ref{imp-no-commute}\label{newfig}}
\end{figure}
\end{center}
A variant of Theorem~\ref{imp-no-commute} can be obtained if the operation
instances in the statement of the theorem
are not in the
same generic operation.
See Figure~\ref{newfig}.
The conclusion is that at least one of the generic
operations must take time at least $u/2$. We omit the proof because of its
similarity to the proof of Theorem~\ref{imp-no-commute}.
\begin{theorem}
Let $T$ be an abstract data type with operations $\OP_1$ and $\OP_2$
such that
$\rho \circ \op_1 \circ \op_2$ does not look like
$\rho \circ \op_2 \circ \op_1$ for some operation sequence $\rho$ and some
operation instances $\op_1$ and $\op_2$.
For any linearizable implementation of an object of type $T$ in a system
with at least
three different processes, $|\OP_1| \ge u/2$ or $|\OP_2| \ge u/2$.
Figure~\ref{newfig} shows $\alpha$ and~$\beta$.
\end{theorem}
This next result is that an accessor operation must have worst-case time
complexity of $\Omega(u)$ if the legality of an instance of the accessor
depends on whether an instance of another
operation is executed.
\begin{theorem}
\label{cantell}
Let $T$ be an abstract data type with an operation $\MOP$
and an accessor $\AOP$
such that $\rho \circ \aop^{\it beforemop}$ and
$\rho \circ \mop \circ \aop^{\it aftermop}$
are legal but $\rho \circ \aop^{\it aftermop}$ and
$\rho \circ mop \circ \aop^{\it beforemop}$
are not legal for some operation
sequence $\rho$ and some operation instances
$\mop$, $\aop^{\it beforemop}$, and $\aop^{\it aftermop}$.
For any linearizable implementation of an object of type $T$
in a system with at least three different processes, $|\AOP| \ge u/2$.
\end{theorem}
\begin{proof}
The following proof generalizes the proof in~\cite{cj99-09-17}
that $|\READ| \ge u/2$ for read/write objects. Their proof improved a lower
bound of $u/4$ in~\cite{cj99-09-05}.
Let $A$ be an object of type $T$. Let
$p_1$ and $p_2$ be two processes that access $A$, and let $p_3$ be a process
that performs operations on $A$.
Assume in contradiction that there exists
a linearizable implementation of $A$
for which $|\AOP| < u/2$.
Let $w = \lceil |\MOP|/u \rceil$. By the specification
of $A$ and Lemma~\ref{axone}, there exists an admissible timed execution
$\alpha$ that is as follows:
\begin{itemize}
\item $\ops(\alpha)|p_3$ = $\rho[A,p_3] \circ \mop[A,p_3]$, where $\rho$
starts at time $0$ and ends at time $t$, $\mop$'s call
occurs at time $t + u/2$, and its response occurs at or before
time $t + u/2 + |\MOP|$.
\item $\ops(\alpha)|p_1$ is a sequence of $w+1$ operations
$\aop^{2i}[A,p_1]$, where $i$ ranges from $0$ to $w$
and the $i$th call occurs at time $t + iu$.
\item $\ops(\alpha)|p_2$ is a sequence of $w+1$ operations
$\aop^{2i+1}[A,p_2]$, where $i$ ranges from $0$ to $w$
and the $i$th call occurs at time $t + iu+u/2$.
\end{itemize}
We assume that $\alpha$ has the following message delays: messages from $p_1$
to $p_2$ have delay $d$, messages from $p_2$ to $p_1$ have delay $d-u$, and
all others have delay $d-u/2$.
By the definition of $w$, $t + u/2 + |\MOP| < t + u/2+wu$. Thus, $\mop$
completes before $\aop^{2w+1}$ begins. This fact, together with the
linearizability of $\alpha$ and the accessor property of $AOP$,
allows
$\aop^{2w+1}$ to be an $\aop^{\it aftermop}$
(i.e., it has the same return value). Also,
$\rho$ finishes before $\aop^0$
begins, $\aop^0$ completes before time $t+u/2$, and $\mop$
does not begin until
time $t+u/2$, and therefore
$\aop^0$ is an $\aop^{\it beforemop}$
by the linearizability of $\alpha$ and the accessor property of
$\AOP$. Thus, by the linearizability of
$\alpha$, there exists an index $i$, $0 \le i \le 2w$, such that
$\aop^i$ is an $\aop^{\it beforemop}$ and $\aop^{i+1}$
is an $\aop^{\it aftermop}$.
This implies that the linearization of $\alpha$ is
$\rho \circ \aop^0 \circ \cdots \circ \aop^i \circ \mop \circ \aop^{i+1}
\circ \cdots \circ \aop^{2w+1}$.
Since
$\AOP$ is an accessor and the linearization is legal,
$\aop^1,\ldots,\aop^{i-1}$ are of the form $\aop^{\it beforemop}$, and
$\aop^{i+1},\ldots,\aop^{2w}$ are of the form $\aop^{\it aftermop}$.
We can assume that $i$ is even so that $\aop^i$
is performed by $p_1$.
Let $\beta = \shift(\shift(\alpha,p_1,-u/2),p_2,u/2)$.
Now $\beta$ is admissible,
because by Lemma~\ref{shift}, the delay of a message
from $p_1$ is $d-u$, the delay of a message from $p_2$ is $d$, and all other
message delays are unchanged. In $\beta$, $\rho$ precedes all $\AOP$ operations,
and the order of the $\AOP$ operations performed on
$A$ by $p_1$ and $p_2$ is
$\aop^1,\aop^0,\aop^3,\aop^2,\ldots,\aop^{i+1},\aop^i,\ldots$.
If $\mop$ is linearized before $\aop^i$ in $\beta$, then by the accessor property
of $\AOP$, $\rho \circ \mop \circ \aop^i$ is legal, a contradiction.
If $\mop$ is linearized after $\aop^i$ in $\beta$,
then by the accessor property
of $\AOP$, $\rho \circ \aop^{i+1}$ is legal, a
contradiction.
\end{proof}
\begin{center}
\begin{figure}%[tbh]
\setlength{\unitlength}{0.00063300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(7212,7710)(301,-6988)
\thicklines
\put(1501,-3811){\line( 1, 0){900}}
\put(1501,-3661){\line( 0,-1){300}}
\put(1501,-5911){\vector( 1, 0){6000}}
\put(1501,-5761){\line( 0,-1){300}}
\put(2401,-5761){\line( 0,-1){300}}
\put(2401,-3661){\line( 0,-1){300}}
\put(3001,-3661){\line( 0,-1){300}}
\put(3001,-3811){\line( 1, 0){3750}}
\put(6751,-3661){\line( 0,-1){300}}
\put(3601,-5761){\line( 0,-1){300}}
\put(4201,-5761){\line( 0,-1){300}}
\put(4801,-5761){\line( 0,-1){300}}
\put(6226,-5761){\line( 0,-1){300}}
\put(6826,-5761){\line( 0,-1){300}}
\put(3001,-5761){\line( 0,-1){300}}
\put(1501,239){\line( 1, 0){900}}
\put(1501,389){\line( 0,-1){300}}
\put(1501,-1861){\vector( 1, 0){6000}}
\put(1501,-1711){\line( 0,-1){300}}
\put(2401,-1711){\line( 0,-1){300}}
\put(2401,389){\line( 0,-1){300}}
\put(3001,389){\line( 0,-1){300}}
\put(3001,239){\line( 1, 0){3750}}
\put(6751,389){\line( 0,-1){300}}
\put(3601,-1711){\line( 0,-1){300}}
\put(6826,-361){\line( 0,-1){300}}
\put(7326,-361){\line( 0,-1){300}}
\put(6826,-511){\line( 1, 0){500}}
\put(4201,-1711){\line( 0,-1){300}}
\put(4801,-1711){\line( 0,-1){300}}
\put(6226,-1111){\line( 0,-1){300}}
\put(6726,-1111){\line( 0,-1){300}}
\put(6226,-1261){\line( 1, 0){500}}
\put(6226,-1711){\line( 0,-1){300}}
\put(6826,-1711){\line( 0,-1){300}}
\put(3001,-1711){\line( 0,-1){300}}
\put(3601,-1111){\line( 0,-1){300}}
\put(4101,-1111){\line( 0,-1){300}}
\put(3601,-1261){\line( 1, 0){500}}
\put(2401,-1111){\line( 0,-1){300}}
\put(2901,-1111){\line( 0,-1){300}}
\put(2401,-1261){\line( 1, 0){500}}
\put(3501,-361){\line( 0,-1){300}}
\put(3001,-511){\line( 1, 0){500}}
\put(3001,-361){\line( 0,-1){300}}
\put(4201,-361){\line( 0,-1){300}}
\put(4701,-361){\line( 0,-1){300}}
\put(4201,-511){\line( 1, 0){500}}
\put(2901,-4411){\line( 0,-1){300}}
\put(2401,-4561){\line( 1, 0){500}}
\put(2401,-4411){\line( 0,-1){300}}
\put(3601,-4411){\line( 0,-1){300}}
\put(4101,-4411){\line( 0,-1){300}}
\put(3601,-4561){\line( 1, 0){500}}
\put(6226,-4411){\line( 0,-1){300}}
\put(6726,-4411){\line( 0,-1){300}}
\put(6226,-4561){\line( 1, 0){500}}
\put(6826,-5161){\line( 0,-1){300}}
\put(7326,-5161){\line( 0,-1){300}}
\put(6826,-5311){\line( 1, 0){500}}
\put(4201,-5161){\line( 0,-1){300}}
\put(4701,-5161){\line( 0,-1){300}}
\put(4201,-5311){\line( 1, 0){500}}
\put(3001,-5161){\line( 0,-1){300}}
\put(3501,-5161){\line( 0,-1){300}}
\put(3001,-5311){\line( 1, 0){500}}
\put(301,-3886){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_3$}}}
\put(301,-5386){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_1$}}}
\put(301,-3436){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Process}}}
\put(301,-5986){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(1501,-6286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$0$}}}
\put(4201,-6961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution $\beta$}}}
\put(1951,-3736){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\rho$}}}
\put(4501,-3736){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\mop$}}}
\put(301,-4636){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_2$}}}
\put(2851,-6286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+\frac{u}{2}$}}}
\put(3451,-6286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+u$}}}
\put(5326,-6211){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\ldots$}}}
\put(4051,-6286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+\frac{3u}{2}$}}}
\put(4651,-6286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+2u$}}}
\put(6026,-6286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+wu$}}}
\put(6726,-6286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+\frac{u}{2}+wu$}}}
\put(2401,-6286){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t$}}}
\put(301,164){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_3$}}}
\put(301,-1336){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_1$}}}
\put(301,614){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Process}}}
\put(301,-1936){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(1501,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$0$}}}
\put(4201,-2911){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution $\alpha$}}}
\put(1951,314){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\rho$}}}
\put(4501,314){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\mop$}}}
\put(301,-586){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$p_2$}}}
\put(2851,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+\frac{u}{2}$}}}
\put(3451,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+u$}}}
\put(6901,-436){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^{2w+1}$}}}
\put(5326,-2161){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\ldots$}}}
\put(4051,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+\frac{3u}{2}$}}}
\put(4651,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+2u$}}}
\put(6301,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^{2w}$}}}
\put(6026,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+wu$}}}
\put(6726,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t+\frac{u}{2}+wu$}}}
\put(2401,-2236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$t$}}}
\put(3676,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^2$}}}
\put(4501,-1261){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\ldots$}}}
\put(2476,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^0$}}}
\put(3076,-436){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^1$}}}
\put(4951,-511){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\ldots$}}}
\put(4276,-436){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^3$}}}
\put(2476,-4486){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^1$}}}
\put(4351,-4561){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\ldots$}}}
\put(3676,-4486){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^3$}}}
\put(6301,-4486){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^{2w+1}$}}}
\put(6901,-5236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^{2w}$}}}
\put(4276,-5236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^2$}}}
\put(5101,-5311){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\ldots$}}}
\put(3076,-5236){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\aop^0$}}}
\end{picture}
\caption{Counterexample for
Theorem~\ref{cantell}\label{otherfig}}
\end{figure}
\end{center}
\subsection{Sequentially consistent bounds}
\label{sc-ub}
In a system with approximately synchronized clocks and uncertainty in message
delay, we cannot automatically assume that processes receive and handle
messages in the same order.
Since sequential consistency needs a legal global ordering for operations
in an execution that
respects the individual local orderings of operations
by the same process, sequential consistency may be easier to implement in a
distributed system with a globally consistent message ordering.
The
{\em atomic broadcast\/} communication
primitive (see~\cite{cj99-09-06}) delivers all
broadcast messages in the same order at all processes, ensuring that two
messages broadcast by the same process are delivered in the order in which
they are broadcast. \cite{cj99-09-05} uses atomic broadcast in a modular
fashion to yield a sequentially consistent implementation of queues, where
enqueues are fast, and two implementations of read/write objects, one with
fast reads (and slow writes) and the other with fast writes (and slow reads).
~\cite{cj99-09-12} corrects potential deadlock problems in the atomic broadcast
algorithm given in~\cite{cj99-09-05}.
Accessor operations can be made fast.
\begin{theorem}
\label{fast-acc}
Let $T$ be an abstract data type with $m$ generic accessor operations ($\AOP_i$)
and $n$ generic mutator operations ($\MOP_j$).
Then there exists a sequentially
consistent implementation of $T$ with $|\AOP_i| = 0$ for each $i$ in
$\{1,\ldots,m\}$ and $|\MOP_j| = h$ for each $j$ in $\{1,\ldots,n\}$,
where $h$ is the maximum time for message delivery by the underlying atomic
broadcast algorithm.
Figure~\ref{otherfig} shows $\alpha$ and~$\beta$.
\end{theorem}
We now explain the details of the algorithm.
For each accessor operation, the invoking process applies just the operation
to its local copy of the object and returns the result. For each mutator
operation, the invoking process uses the atomic broadcast algorithm to send
a message to all processes (including itself) containing the invoking process's
identification, the name of the operation, the object on which the operation is
invoked, and the argument list for the operation. The mutator operation does
not complete until the invoking process receives the message and handles it.
\begin{proof}
Our proof of sequential consistency generalizes the proof in~\cite{cj99-09-05} for
read/write objects.
\begin{claim}
\label{all-done}
For every admissible execution and every process $p$, $p$'s local copies are
updated to reflect all update operations, all update operations occur in the
same order at every process, and this order preserves the order of update
operations on a per-process basis.
\end{claim}
\begin{proofof}{Claim~\ref{all-done}}
By the algorithm, an atomic broadcast send is performed
exactly once for each update operation. By the guarantees of atomic broadcast,
each process gets exactly one message for each update operation, these messages
are received in the same order at all processes, and this order agrees with the
sending order with respect to each individual process.
\end{proofof}
\begin{claim}
\label{local-f}
For every admissible execution, every process $p$, and all objects $X$ and
$Y$, if accessor $A$ of object $Y$ follows mutator $M$ of object $X$ in
$\ops(\sigma)|p$, then $A$'s access of $p$'s local copy of $Y$ follows $M$'s
modification of $p$'s local copy of $X$.
\end{claim}
\begin{proofof}{Claim~\ref{local-f}}
$M$ does not complete until its update is performed at
$p$, the process that begins it.
\end{proofof}
\begin{formalrule}\label{newrule}
Let $\rho$
be an admissible execution. We systematically build the desired $\tau$.
We first order all mutator
operations in the order that the atomic broadcast algorithm assigns to their
messages. We determine where to place the accessor operations, starting
from the beginning of $\rho$.
We have that $\aop_i^j[X,p]$ goes immediately
after the latter
of (1) the previous operation for process $p$ or (2) the
mutator that caused the latest update of $p$'s copy of $X$ before the
generation of the response for $aop_i^j[X,p]$.
We use process identifiers
to break any ties.
\end{formalrule}
Now we prove that $\ops(\rho) | p = \tau | p$ for all processes $p$. Choose
some $p$. By the definition of $\tau$, the relative ordering of two accessor
operations in $\ops(\rho) | p$ is the same as in $\tau | p$.
Claim~\ref{all-done} guarantees that the relative ordering of two mutator
operations in $\ops(\rho) | p$ is the same as in $\tau | p$. If accessor $A$
follows mutator $M$ in $\ops(\rho) | p$, then $A$ follows $M$ in $\tau$ by
the definition of $\tau$.
Suppose that accessor $A$ precedes mutator $M$
in $\ops(\rho) | p$. Suppose in contradiction that $M$ precedes $A$ in $\tau$.
We follow the above rule for placing $A$ in $\tau$. Rule~\ref{newrule}(1)
does not apply,
because $M$ is not a previous operation for process $p$.
Rule~\ref{newrule}(2) applies.
Thus, in $\rho$ there is some accessor
$A'$ and some mutator $M'$ such that the following hold:
\begin{enumerate}
\item $A'$ is $A$ or precedes $A$ in $\rho$.
\item $M'$ is $M$ or follows $M$ in the ordering of mutators generated by
the atomic broadcast algorithm.
\item $M'$ causes the
latest update to $p$'s copy of $A$'s object that precedes $A'$.
\end{enumerate}
$A'$ finishes before $M$ starts in $\rho$. Since mutator operations are
performed in $\rho$ in atomic broadcast order (Claim~\ref{all-done}),
$A'$ does not see the update
performed by $M'$, a contradiction.
We must now prove that $\tau$ is legal. We must check all operations,
mutators and accessors, for legality.
By Claim~\ref{all-done}, mutators are performed at every process in atomic
broadcast order. Thus,
each mutator returns correctly, based on all updates handled before it,
ensuring that each mutator is legal.
Consider accessor
$A = \aop_i^j[X,p]$ in $\tau$. Let $M$ be the mutator
(performed by process $q$) in
$\rho$ that causes the latest update to $p$'s copy of $X$ preceding $A$'s
access of $p$'s copy of $X$. $A$ follows $M$ in $\tau$ by the definition
of $\tau$. We must show that no other mutator operation on $X$ is placed
between $M$ and $A$ in $\tau$. Suppose in contradiction that a mutator~$M'$
performed by process $r$
does.
By Claim~\ref{all-done}, the update for $M'$ follows the
update for $M$ at each process in~$\rho$.
Suppose that $r = p$. $M'$ precedes $A$ in $\rho$ by
Rule~\ref{newrule}.
The update for $M'$ follows the update for $M$ in $\rho$. Thus $A$ sees
$M'$'s update and not $M$'s, contradicting the choice of $M$.
Suppose that $r \ne p$.
By
Rule~\ref{newrule}
($A$ immediately follows the previous
operation for process $p$), there is some
operation
in $\ops(\rho) | p$ that precedes $A$ and follows $M'$ in $\tau$; otherwise
$A$ would not follow $M'$. Let $O$ be the first such operation. Thus, $O$
immediately follows $M'$ in $\tau$.
Suppose $O$ is a mutator operation on some object $Y$. By
Claim~\ref{local-f},
$O$'s update to $p$'s
copy of $Y$ precedes $A$'s access of $p$'s copy of $X$. Since updates are
performed in atomic broadcast order, the update for $M'$ occurs at $p$ before
the update for $O$, and also before $A$'s access, contradicting the choice of
$M$.
Suppose that $O$ is an accessor operation. By
Rule~\ref{newrule}(2), $O$ is
an accessor of $X$, and $M'$'s update to $p$'s copy of $X$ is the latest one
preceding $O$'s access; otherwise $O$ would not immediately follow $M'$.
Since updates
are performed in atomic broadcast order, the value from $M'$ supersedes the
value from $M$, contradicting the choice of $M$.
\end{proof}
Pure mutators can be made fast, too.
\begin{theorem}
\label{fast-pm}
Let $T$ be an abstract data type with $m$ pure mutator operations $(\MOP_i)$
and $n$ other operations
$(\OP_j)$. Then there exists a sequentially consistent
implementation of $T$ with $|\MOP_i| = 0$ for each $i$ in
$\{1,\ldots,m\}$ and $|\OP_j| = h$ for each $j$ in $\{1,\ldots,n\}$,
where $h$ is the maximum time for message delivery by the underlying atomic
broadcast algorithm.
\end{theorem}
\begin{proof}
We now explain the details of the algorithm. For each operation,
the invoking process uses the atomic
broadcast algorithm to send a message to
all processes (including itself) containing the invoking process's
identification, the name of the operation, the
name of the object on which the
operation is invoked, and the argument list for the operation. $\MOP$
operations return immediately,
while an $\OP_j$ operation does not complete (and return a result) until
all pending updates by its invoking process have been completed. This is
managed by maintaining
a count of pending updates (initialized to 0) and waiting
for its value to become 0.
We now explain why
the algorithm guarantees sequential consistency. Let $\rho$
be an admissible execution. We construct $\tau$ by ordering all operations
in the order that the atomic broadcast algorithm assigned to their messages.
Since atomic broadcast preserves the per-process message orders,
$\tau|p = \ops(\rho)|p$
for each process $p$. We now show why $\tau$ is legal.
All pure mutator operations are legal in $\tau$,
because the return values for pure mutator operations do not depend on the
states of the objects on which they are invoked. Now we must show that
each $\op_j^i$ is legal. In $\rho$, $\op_j^i[X,p]$ returns based on the state of
$p$'s copy of object $X$ when its message is handled. The state of $p$'s
copy of object $X$ reflects all changes made at all processes before $\op^j_i$'s
message is handled. Thus $\op_j^i[X,p]$ returns a legal value list in $\tau$.
\end{proof}
\begin{table}
\caption{Corollaries for Section~\ref{approx}}
\begin{center}
\begin{tabular}{llll} \hline
Operations & Type/table references & Theorems & Citations\\
\hline
${\it ENQ}$ & Table~\ref{queue-rel} & ~\ref{imp-no-commute},~\ref{fast-pm} &
~\cite{cj99-09-05} \\
${\it PUSH}$ & Table~\ref{stack-rel} & ~\ref{imp-no-commute},~\ref{fast-pm} &
~\cite{cj99-09-05} \\
${\it WRITE}$ & Read/write objects & ~\ref{imp-no-commute},~\ref{fast-pm} &
~\cite{cj99-09-05} \\
${\it UP}$ & Table~\ref{bst-2-rel} & ~\ref{imp-no-commute} & \\
${\it PEEK}$ & Tables~\ref{queue-rel},~\ref{stack-rel} &
~\ref{cantell},~\ref{fast-acc} & \\
${\it READ}$ & Read/write objects & ~\ref{cantell},~\ref{fast-acc} &
~\cite{cj99-09-05,cj99-09-17} \\
${\it SEARCH}$ & Table~\ref{bst-rel} & ~\ref{cantell},~\ref{fast-acc} &
\\
${\it BALANCE}$ & Table~\ref{bank-acct} & ~\ref{cantell},~\ref{fast-acc} &
\\
${\it FIND}$ & Table~\ref{bst-2-rel} & ~\ref{cantell},~\ref{fast-acc} &
\\
${\it DEPOSIT}$ & Table~\ref{bank-acct} & ~\ref{fast-pm} & \\
${\it INC,HALF}$ & Table~\ref{inc-half} & ~\ref{fast-pm} & \\ \hline
\end{tabular}
\label{t:approx-cors}
\end{center}
\end{table}
Table~\ref{t:approx-cors} displays
types applicable to the theorems in this section.
For each row of the table, the operation from the first column, which
is from the type or table reference listed in the second column, meets the
hypotheses for the theorem(s) listed in the third column. Any references to a
specific lower or upper bound corresponding to the given operation are
presented in the fourth column.
We now discuss the implications of the results in this section with respect to
the relative costs of sequential consistency and linearizability in systems
with only approximately synchronized clocks and uncertainty in the network
message delay. Now we have lower bounds on the costs in linearizable
implementations of single operations satisfying certain algebraic properties.
The necessary property for Theorem~\ref{imp-no-commute} is very general. In
order for an abstract data type to be at all useful, all operation sequences
cannot look like each other. The necessary property for Theorem~\ref{cantell}
is reasonably general, too. Accessor instances should not be freely
interchangeable after sequences
including mutator instances.
We also have
two sequentially consistent implementations of abstract data types, with
accessors taking
time $0$ to complete in one (Theorem~\ref{fast-acc}) and pure
mutators taking
time $0$ to complete in the other (Theorem~\ref{fast-pm}). In
many abstract data types, pure mutators satisfy the hypothesis of
Theorem~\ref{imp-no-commute}, and accessors satisfy the hypothesis of
Theorem~\ref{cantell}, requiring at least $u/2$ time in linearizable
implementations of their abstract data types. Thus, our results imply that
sequential consistency is less expensive than linearizability for a reasonably
large class of abstract data types.
\section{Hybrid consistency}
\label{study-hc}
Attiya and Friedman~\cite{cj99-09-04} show that if weak operations
are used mostly, hybrid consistency is cheaper than sequential consistency
or linearizability for read/write objects.
We show that hybrid consistency is not necessarily
cheaper than stronger guarantees.
The lower bounds in Table~\ref{t:hca} are analogous to the lower
bounds for sequential consistency.
We omit proofs because they are very similar to previous proofs.
We assume all processes have perfectly synchronized
clocks.
$\WOP$ indicates
$\OP$'s weak version, $\SOP$ indicates $\OP$'s
strong version, and $*\OP$ indicates
an arbitrary version of $\OP$. $\Wop$ indicates
a weak instance of $\OP$, and $\Sop$ indicates a strong instance of $\OP$.
{\footnotesize
\begin{table}
\caption{Hybrid consistency analogues}
\begin{center}
\begin{tabular}{ll} \hline
Theorem for sequential consistency & Result for hybrid consistency \\ \hline
Theorem~\ref{slow-op} & $|*\OP| \ge d$ \\
Theorem~\ref{sc-pair} & $|\SOP_1| + |\WOP_2| \ge d$ \\
Theorem~\ref{cd-pair} & $|*\OP_1| + |*\OP_2| \ge 2d$ \\
Theorem~\ref{not-all-less} & $|\SOP_i| \ge d$ for some $i$ in $\{1,\ldots,n\}$ \\
& $|\WOP_i| \ge d$ for some $i$ in $\{1,\ldots,n\}$ \\
Theorem~\ref{3c2d} & $|*OP_1| + |*\OP_2| + |*\OP_3| \ge 2d$ \\
Theorem~\ref{sc-big-cycle} & $|*\OP_1| + |*\OP_2| \ge d$, or \\
& $|*\OP_3| \ge d$ \\ \hline
Theorem~\ref{cgt} & \(\sum_{i=1}^{n} (|\SOP_i| + |\WOP_i|) \)
$\ge 2d$ \\
& $(|NC(T)| + |\Maxdom(\RCG(T))|)$ \\
Theorem~\ref{clique} &
\(\sum_{i=1}^{n} (|\SOP_i| + |\WOP_i|) \) $\ge$ \\
Omit the -1 if $n-|\NC(T)|$ is even. & $2|\NC(T)|d+ (n-|NC(T)|-1) d$
\\ \hline
\end{tabular}
\end{center}
\label{t:hca}
\end{table}
}
Since
hybrid consistency is weaker than sequential consistency,
intuition tells us that there may be some abstract data types in which
hybrid consistent implementations of weak operations are faster than their
sequentially consistent (or linearizable)
counterparts. Indeed, \cite{cj99-09-04} describes a
hybrid consistent
implementation of read/write objects in which weak reads and writes are
fast, and strong reads and writes take time $O(d)$.
Is it always possible to develop hybrid consistent
implementations with similar time bounds (ideally, weak operations that are
faster than strong operations)?
The answer is negative.
Why could weak operations for read/write objects be optimized? One plausible
reason is that read/write objects have simple semantics. It may be
possible to optimize weak operations for other abstract data types with simple
semantics. Let us consider the PMR objects defined in
Subsection~\ref{opt-one}.
Let us consider read/add/multiply objects. $\readit()(v)[X,p]$
is legal if the value stored in $X$ is $v$. The effect of
$add(v)()[X,p]$ is to add $v$ to the value stored in $X$. The effect of
$mul(v)()[X,p]$ is to multiply $v$ to the value stored in $X$.
Let $X$ be one such object, initialized to $1$. Suppose there is a hybrid
consistent implementation of $X$ for which
$|\WADD| = |\wMUL| = |\WREAD| = 0$. Consider the
admissible execution $\rho$:
\begin{itemize}
\item $\ops(\rho)|p_1$ is $\Wadd(5)()[X,p_1] \circ \Wread()(6)[X,p_1]$,
where the
add starts at time $0$ and the read completes before time $d$.
\item $\ops(\rho)|p_2$ is $\Wmul(3)()[X,p_2] \circ \Wread()(3)[X,p_2]$,
where the
multiply starts at time $0$ and the read completes before time $d$.
\end{itemize}
This execution is not hybrid.
$p_1$'s read must return $6$, because the execution is
indistinguishable to $p_1$ from an execution in which $p_1$ is running alone.
Similarly, $p_2$'s read must return $3$.
There are two ways to order the operations that are not reads:
\begin{itemize}
\item $\Wmul(3)()[X,p_2] \circ \Wadd(5)()[X,p_1]$.
$\Wread()(6)[X,p_1]$ must be placed after
$\Wadd(5)()[X,p_1]$ for hybrid consistency, making it illegal.
\item $\Wadd(5)()[X,p_1] \circ \Wmul(3)()[X,p_2]$.
$\Wread()(3)[X,p_2]$ must be placed after
$\Wmul(3)()[X,p_2]$ for hybrid consistency, making it illegal.
\end{itemize}
Both reads cannot be legally placed.
We use this example to derive a new lower bound
on the worst-case time complexity for weak operations in hybrid consistent
implementations of abstract data types.
A generic operation $OP$ is
{\em doubly noninterleavable with respect to $\OP_1$ and $\OP_2$}
if there exist
operation sequence $\rho$ and operation instances $\op^1, \op^2, \op_1,\op_2$
such that $\rho \circ \op_1 \circ \op^1$ and $\rho \circ \op_2 \circ \op^2$ are
legal, but there is no way to place both $\op^1$ and $\op^2$ after $\rho$ in
$\rho \circ \op_1 \circ \op_2$ and
$\rho \circ \op_2 \circ \op_1$ to form a legal
sequence.
\begin{theorem}
\label{dn}
Let $T$ be an abstract data type, and let $\AOP$, $\OP_1$, and $\OP_2$ be
operations on objects of type $T$ in a system with at least two
different processes, where
$\AOP$ is a generic accessor operation.
Suppose
$\AOP$ is doubly
noninterleavable with respect to $\OP_1$ and $\OP_2$.
Then, in any hybrid consistent implementation of objects of type $T$,
$|\WOP_1| + |\WAOP| \ge d$ or
$|\WOP_2| + |\WAOP| \ge d$.
\end{theorem}
\begin{proof}
Let $A$ be an object of type $T$ in a system with at least two
different processes. Let processes
1 and 2 use $A$.
Since $\AOP$ is doubly noninterleavable with respect to $\OP_1$ and $\OP_2$,
there exists an operation sequence $\alpha$ and operation instances
$\aop^1, \aop^2, \op_1, \op_2$ such that $\alpha \circ \op_1 \circ \aop^1$
and $\alpha \circ \op_2 \circ \aop^2$ are legal, but there is no way to place
both $\aop^1$ and $\aop^2$ after $\rho$ in $\rho \circ \op_1 \circ \op_2$ and
$\rho \circ \op_2 \circ \op_1$ to form a legal sequence.
Assume in contradiction that there exists some hybrid consistent implementation
of $A_\alpha$ for which $|\WOP_1| + |\WAOP| < d$ and $|\WOP_2| + |\WAOP| < d$.
By the sequential specification for $A_{\alpha}$, there exists an admissible
execution $\rho_1$ with $\ops(\rho_1)$ equal to
$\Wop_1[A_\alpha,1] \circ \Waop^1[A_\alpha,1]$. $\Wop_1[A_\alpha,1]$ starts at
real time $0$,
and $\Waop^1[A_\alpha,1]$ starts immediately after $\Wop_1[A_\alpha,1]$ finishes.
Because we have assumed that the real time after the end of $\rho_1$ is less
than $d$, no process receives a message during $\rho_1$.
By the sequential specification for $A_{\alpha}$, there exists an admissible
execution $\rho_2$ with $\ops(\rho_2)$ equal to
$\Wop_2[A_\alpha,2] \circ
\Waop^2[A_\alpha,2]$. Then $\Wop_2[A_\alpha,2]$ starts at
real time $0$,
and $\Waop^2[A_\alpha,2]$ starts immediately after $\Wop_2[A_\alpha,2]$ finishes.
Because we have assumed that the real time after the end of $\rho_2$ is less
than $d$, no process receives a message during $\rho_2$.
Figure~\ref{t:double} shows admissible executions $\rho_1$ and $\rho_2$.
Since no messages are received in $\rho_1$ and $\rho_2$, we can produce an
admissible hybrid execution $\rho$ by replacing
process 2's history in $\rho_1$
with its history in $\rho_2$.
\begin{center}
\begin{figure}%[tbh]
\setlength{\unitlength}{0.00083300in}%
\begingroup\makeatletter\ifx\SetFigFont\undefined
\def\x#1#2#3#4#5#6#7\relax{\def\x{#1#2#3#4#5#6}}%
\expandafter\x\fmtname xxxxxx\relax \def\y{splain}%
\ifx\x\y % LaTeX or SliTeX?
\gdef\SetFigFont#1#2#3{%
\ifnum #1<17\tiny\else \ifnum #1<20\small\else
\ifnum #1<24\normalsize\else \ifnum #1<29\large\else
\ifnum #1<34\Large\else \ifnum #1<41\LARGE\else
\huge\fi\fi\fi\fi\fi\fi
\csname #3\endcsname}%
\else
\gdef\SetFigFont#1#2#3{\begingroup
\count@#1\relax \ifnum 25<\count@\count@25\fi
\def\x{\endgroup\@setsize\SetFigFont{#2pt}}%
\expandafter\x
\csname \romannumeral\the\count@ pt\expandafter\endcsname
\csname @\romannumeral\the\count@ pt\endcsname
\csname #3\endcsname}%
\fi
\fi\endgroup
\begin{picture}(6150,3360)(1426,-3163)
\thicklines
\put(2401,-2761){\vector( 1, 0){4800}}
\put(2851,-2611){\line( 0,-1){300}}
\put(6676,-2611){\line( 0,-1){300}}
\put(2851,-1936){\line( 1, 0){3825}}
\put(2851,-1786){\line( 0,-1){300}}
\put(6676,-1786){\line( 0,-1){300}}
\put(2851,-1186){\line( 1, 0){1725}}
\put(4801,-1186){\line( 1, 0){1725}}
\put(2851,-1036){\line( 0,-1){300}}
\put(4576,-1036){\line( 0,-1){300}}
\put(4801,-1036){\line( 0,-1){300}}
\put(6526,-1036){\line( 0,-1){300}}
\put(4801,-361){\line( 1, 0){1725}}
\put(2851,-361){\line( 1, 0){1725}}
\put(2851,-211){\line( 0,-1){300}}
\put(4576,-211){\line( 0,-1){300}}
\put(4801,-211){\line( 0,-1){300}}
\put(6526,-211){\line( 0,-1){300}}
\put(1426, 89){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Execution}}}
\put(1651,-361){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\rho_2$}}}
\put(1651,-1186){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\rho_1$}}}
\put(3301,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\Wop_2[A_\alpha,2]$}}}
\put(3301,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\Wop_1[A_\alpha,1]$}}}
\put(3301,-1711){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}earliest message transit for $\rho_1$ and $\rho_2$}}}
\put(5251,-961){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\Waop^1[A_\alpha,1]$}}}
\put(5251,-136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$\Waop^2[A_\alpha,2]$}}}
\put(1426,-2761){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}Time}}}
\put(2851,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}0}}}
\put(6676,-3136){\makebox(0,0)[lb]{\smash{\SetFigFont{10}{14.4}{rm}$d$}}}
\end{picture}
\caption{Counterexample for double noninterleavability lower bound
(Theorem~\ref{dn})}
\label{t:double}
\end{figure}
\end{center}
By assumption, $\rho$ is hybrid. Consider $\tau_1$. In $\tau_1$,
$\Wop_1[A_\alpha,1]$ precedes
$\Waop^1[A_\alpha,1]$. If $\Wop_1[A_\alpha,1]$ precedes $\Wop_2[A_\alpha,2]$, then
by the double noninterleavability
property, all possible placements of both $\Waop^1[A_\alpha,1]$ and
$\Waop^2[A_\alpha,2]$ in
$\Wop_1[A_\alpha,1] \circ \Wop_2[A_\alpha,2]$ are illegal for $A_{\alpha}$. If
$Wop_2[A_\alpha,2]$ precedes $\Wop_1[A_\alpha,1]$, then by the double
noninterleavability property,
all possible placements of both $\Waop^1[A_\alpha,1]$ and $\Waop^2[A_\alpha,2]$
in $\Wop_2[A_\alpha,2] \circ \Wop_1[A_\alpha,1]$
are illegal for $A_{\alpha}$. Thus,
we cannot produce a $\tau_1$ that is legal for $A_{\alpha}$,
because we need to place all operations.
\end{proof}
\begin{corollary}
In any implementation of {\em read/add/multiply} objects that is hybrid
consistent,
$|\WADD| + |\WREAD| \ge d$ or $|\wMUL| + |\WREAD| \ge d$.
\end{corollary}
For the above classes of PMR objects, the lower bound on total time complexity
of the operations matches the upper bound given in the
linearizable implementations. Thus, for these classes of
objects, sequential consistency and linearizability cost no more than hybrid
consistency.
\section{Summary}
\label{summary}
We have studied the impact of algebraic properties of operations, the degree of
process synchronization, and the type of consistency guarantee on the time
complexities of distributed implementations of abstract data types.
We have shown that sequential consistency and linearizability
are equally costly in systems with perfectly synchronized clocks under certain
reasonable assumptions; that sequential consistency is cheaper than
linearizability in systems with only approximately synchronized clocks under
certain reasonable assumptions; and that hybrid consistency is not necessarily
cheaper than the stronger consistency guarantees.
We attempted to find an algebraic property of operations such that an
operation could be optimized in a linearizable implementation assuming
perfect process synchrony if it satisfied the property and it could not
be optimized otherwise.
We determined a reasonably general algebraic property, self-obliviousness,
such that if an operation is self-oblivious, then it could be optimized
in a linearizable implementation assuming perfect process synchrony.
However, we still do not know if
we can optimize an operation that is not self-oblivious.
The results in~\cite{cj99-09-14} support the hypothesis that
self-obliviousness is necessary for optimizing a single operation.
Optimizing a given operation or group of
operations
is beneficial when it is frequently used. Although we do not have a complete
characterization of when a given operation or group of operations cannot be
optimized, we have made reasonable progress on this problem, and
we hope that our work can be used to determine this
elusive complete characterization.
\appendix
\Alph{section}
\section{Examples of abstract data types}
\label{adt-examples}
The {\em augmented queue\/} abstract data type (Table~\ref{queue-rel}) has
enqueue, dequeue, and peek operations. A peek returns the front item of a
queue without changing the queue.
Queues can be of arbitrary length. A $\perp$ is returned by a dequeue
or peek invoked on an empty queue.
An $e$ (respectively, $d$) in entry $(i,j)$ of the table means that
operation $i$ and operation $j$ eventually (respectively,
immediately) do not
commute. No relationship is assumed between the argument lists for operation
$i$ and operation $j$. No item in an argument list has $\perp$ for its value.
Any operation returning $\perp$ is {\em abnormal}. All other operations are
{\em normal}. These definitions hold for all
tables.
\begin{table}%[tbh]
\caption{Commutativity for the augmented queue abstract data type}
\begin{center}
\begin{tabular}{ lccccc}
\hline
Queue operation & ${\it enq}(x) \cdot$ & ${\it deq}() \cdot$ &
${\it deq}() \cdot$ &
$\peek() \cdot$ & $\peek() \cdot$
\\
& ${\it ok}()$ & $\ret(x)$ & $\ret(\perp)$ & $\ret(x)$
& $\ret(\perp)$ \\ \hline
${\it enq}(y) \circ {\it ok}()$ & $e$ & & $d$ & & $d$ \\
${\it deq}() \circ \ret(y)$ & & $d$ & $d$ & $d$ & $d$ \\
${\it deq}() \circ \ret(\perp)$ & $d$ & $d$ & & $d$ & \\
${\it peek}() \circ \ret(y)$ & & $d$ & $d$ & & $d$ \\
$\peek() \circ \ret(\perp)$ & $d$ & $d$ & & $d$ & \\ \hline
\end{tabular}
\end{center}
\label{queue-rel}
\end{table}
The {\em augmented stack\/} abstract data type (Table~\ref{stack-rel})
has push, pop, and peek operations. A peek returns the top item of a stack
without changing the stack.
Stacks can be of arbitrary height. $\perp$ is returned by a pop
or peek operation invoked on an empty stack.
\begin{table}%[tbh]
\caption{Commutativity for the augmented stack abstract data type}
\begin{center}
\begin{tabular}{ l ccccc}
\hline
Stack operation & ${\it push}(x) \cdot$ & ${\it pop}() \cdot$ &
${\it po}p() \cdot$ &
$\peek() \cdot$ & $\peek() \cdot$
\\
& ${\it ok}()$ & $\ret(x)$ & $\ret(\perp)$ & $\ret(x)$
& $\ret(\perp)$ \\ \hline
${\it push}(y) \circ {\it ok}()$ & $e$ & $d$ & $d$ & & $d$ \\
${\it pop}() \circ \ret(y)$ & & $d$ & $d$ & $d$ & $d$ \\
${\it pop}() \circ \ret(\perp)$ & $d$ & $d$ & & $d$ & \\
$\peek() \circ \ret(y)$ & & $d$ & $d$ & & $d$ \\
$\peek() \circ \ret(\perp)$ & $d$ & $d$ & & $d$ & \\ \hline
\end{tabular}
\end{center}
\label{stack-rel}
\end{table}
The {\em dictionary set} abstract data type (Table~\ref{bst-rel}) has
insert (an element and its key), delete (an element
and its key), and search (for an element and return its key) operations.
A set can have an arbitrary number of (element,key) pairs.
$\perp$ is
returned by a delete or search operation that is performed when the
input argument is not an element in the set.
\begin{table}[tbh]
\caption{Commutativity for the dictionary set abstract data type}
\begin{center}
\begin{tabular}{lccccc}
\hline
Set operation & ${\it ins}(x,v) \cdot$ & ${\it del}(x) \cdot$ &
${\it del}(x) \cdot$ &
${\it search}(x) \cdot$ & ${\it search}(x) \cdot$ \\
& ${\it ok}()$ & $\ack(ok)$ &
$\ack(\perp)$ &
$\ret(v)$ & $\ret(\perp)$ \\ \hline
${\it ins}(y,w) \cdot{\it ok}()$ & & $d$ & $d$ & & $d$ \\
${\it del}(y) \cdot{\it ok}(k)$ & $d$ & $d$ & $d$ & $d$ & \\
${\it del}(y) \cdot\ack(\perp)$ & $d$ & $d$ & & & \\
${\it search}(y) \cdot\ret(w)$ & & $d$ & & & \\
${\it search}(y) \cdot\ret(\perp)$ & $d$ & & & & \\ \hline
\end{tabular}
\end{center}
\label{bst-rel}
\end{table}
The bank account
abstract data type~\cite{cj99-09-18} (Table~\ref{bank-acct}) has
deposit, withdraw, and balance operations.
$\perp$ is returned by a withdraw operation performed when the account
contains less money than the amount specified in the input argument.
\begin{table}[tbh]
\caption{Commutativity for the bank account abstract data type}
\begin{center}
\begin{tabular}{lcccc}
\hline
Bank account & ${\it deposit}(j) \cdot$ & ${\it withdraw}(j) \cdot$ &
${\it withdraw}(j) \cdot$ &
${\it balance}() \cdot$ \\
operation & ${\it ok}()$ & $\ack({\it ok})$ &
$\ack(\perp)$ &
$\ret(j)$ \\ \hline
${\it deposit}(i) \cdot$ & & & $d$ & $d$ \\
${\it ok}()$ & & & & \\
${\it withdraw}(i) \cdot$ & & $d$ & & $d$ \\
$\ack({\it ok})$ & & & & \\
${\it withdraw}(i) \cdot$ & $d$ & & & \\
$\ack(\perp)$ & & & & \\
${\it balance}() \cdot$ & $d$ & $d$ & & \\
$\ret(i)$ & & & & \\
\hline
\end{tabular}
\end{center}
\label{bank-acct}
\end{table}
The {\em reference-count set\/}
abstract data type (Table~\ref{bst-2-rel}) has
insert, update, and find (search) operations.
A reference-count set can have an arbitrary number of elements.
When an item is inserted
into this
set, its data field is initialized to $1$. All inserts are
normal. When an
item is updated, its data field is incremented by $1$ (if the item is present).
When an item is
searched, its data field is returned (if the item is present).
$\perp$ is returned by an update or search operation performed when the
input argument is not in the set.
\begin{table}[t]
\caption{Commutativity for the reference-count set}
\begin{center}
\begin{tabular}{lccccc}
\hline
Reference set & ${\it ins}(x) \cdot$ & ${\it up}(x) \cdot$ &
${\it up}(x) \cdot$ &
${\it find}(x) \cdot$ & ${\it find}(x) \cdot$ \\
operation & ${\it ok}()$ & $\ack({\it ok})$ &
$\ack(\perp)$ &
$\ret(v)$ & $\ret(\perp)$ \\ \hline
${\it ins}(y) \circ {\it ok}()$ & & & $d$ & & $d$ \\
${\it up}(y) \circ \ack({\it ok})$ & & & & $d$ & \\
${\it up}(y) \circ \ack(\perp)$ & $d$ & & & & \\
${\it find}(y) \circ \ret(w)$ & & $d$ & & & \\
${\it find}(y) \circ \ret(\perp)$ & $d$ & & & & \\ \hline
\end{tabular}
\end{center}
\label{bst-2-rel}
\end{table}
In the {\em increment-half} abstract data type (Table~\ref{inc-half}), the
values are real numbers,
and the operations are read, increment, and half. The half operation causes
the value of the object on which it is invoked to become half of its previous
value.
\begin{table}[t]
\caption{Commutativity for the increment-half abstract data type}
\begin{center}
\begin{tabular}{lcccc}
\hline
INC-HALF & $\readit() \circ \ret(v)$ & ${\it inc}(x) \circ {\it ok}()$ &
${\it half}() \circ {\it ok}()$ \\ \hline
$\readit() \circ \ret(v)$ & & $d$ & $d$ \\
${\it inc}() \circ {\it ok}()$ & $d$ & & $e$ \\
${\it half}() \circ {\it ok}()$ & $d$ & $e$ & \\ \hline
\end{tabular}
\end{center}
\label{inc-half}
\end{table}
\section{Axioms about executions}
\label{axioms}
\begin{lemma}
\label{axone}
If $\alpha$ is a legal sequence of operations for a set of objects
$\cal O$, and $\alpha$ is of the form
$\op_1[{\cal O}_1,p_1] \cdots \op_n[{\cal O}_n,p_n]$,
then there exists an admissible execution $\sigma$ such that $\ops(\sigma)$
is $\alpha$.\footnote{$\op_i$'s are not
necessarily instantiations of different
generic operations, ${\cal O}_i$'s are not necessarily distinct
objects, and the $p_i$'s are not necessarily distinct processes.}
\end{lemma}
\begin{proof}
Choose nonnegative numbers $t_{1s}, t_{1f}, \ldots, t_{ns}, t_{nf}$, where
the numbers form a nondecreasing sequence.
$\op_i$ starts at time $t_{is}$ and finishes at time $t_{if}$.
Let all messages have delay in the range $[d-u,d]$. By construction,
$\ops(\sigma) = \alpha$, and at most one call per process is pending at a time.
We have satisfied the definition of admissibility.
\end{proof}
\begin{lemma}
If $\alpha$ is
a legal sequence of operations for a set of objects ${\cal O}_1$
that are performed by process $p_1$, and $\beta$ is a legal sequence of
operations for a set of objects ${\cal O}_2$ that are performed by process
$p_2$,
where ${\cal O}_1$ and ${\cal O}_2$ are disjoint, then there exists an admissible
execution $\sigma$ such that $\ops(\sigma) = \alpha \circ
\beta$.\footnote{$p_1$ and $p_2$ do not have to be distinct
processes.}
\end{lemma}
\begin{proof}
Suppose $\alpha$ has $n$ operations in it. By Lemma~\ref{axone}, there is
an admissible execution $\sigma_1$ such that $\ops(\sigma_1) = \alpha$. Let
the last operation finish at time $t_{nf}$. By Lemma~\ref{axone}, there
is an admissible execution $\sigma_2$ such that $\ops(\sigma_1) = \beta$.
Add $t_{nf} + \epsilon$
for some $\epsilon \ge 0$ to the start and finish times
for all operations
in $\sigma_2$. Consider $\sigma_1 \circ \sigma_2$. It is easy
to see that $\sigma_1 \circ \sigma_2$ satisfies the definition of
admissibility.
\end{proof}
\begin{lemma}
Any prefix of an admissible execution is admissible.
\end{lemma}
\begin{proof}
Let $\rho \circ \sigma$ be an admissible execution. $\rho$ is a
prefix of $\rho \circ \sigma$. The message delays for the operations
in $\rho$ are the same as what they were in $\rho \circ \sigma$, and
no process has multiple simultaneous pending operations in $\rho$ if
it does not have them in $\rho \circ \sigma$.
\end{proof}
\begin{lemma}
If $\beta$ and $\gamma$ are admissible executions with a common prefix
$\alpha$, and $(\beta - \alpha)$,\footnote{If $z = x \circ y$,
then $y = z - x$.}
and $(\gamma - \alpha)$ are such that
\begin{enumerate}
\item any messages sent in $(\beta - \alpha)$ would not be received until
after the completion time of $\gamma$, and
\item any messages sent in $(\gamma - \alpha)$ would not be received until
after the completion time of $\beta$,
\end{enumerate}
then, for each process $i$,
\begin{enumerate}
\item replacing process $i$'s history after $\alpha$ in
$\alpha (\beta - \alpha)$ with its history in $(\gamma - \alpha)$ results in
an admissible execution $\beta_{i\gamma}$, and
\item replacing process $i$'s history after $\alpha$ in
$\alpha (\gamma - \alpha)$ with its history in $(\beta - \alpha)$ results in
admissible execution $\gamma_{i\beta}$.
\end{enumerate}
\end{lemma}
\begin{proof}
The message delays remain unchanged in $\beta_{i\gamma}$ and
$\gamma_{i\beta}$ from their respective delays in $\beta$ and $\gamma$.
Process $i$ is the only process for which we need to
check for multiple simultaneous pending operations, because all other
process histories are unchanged. Process $i$'s history
in $\beta_{i\gamma}$ is the same as its history in $\gamma$, and its history
in $\gamma_{i\beta}$ is the same as its history in $\beta$. Since both
$\beta$ and $\gamma$ are admissible, process $i$ has no multiple simultaneous
pending operations in $\beta_{i\gamma}$ and $\gamma_{i\beta}$. Thus,
$\beta_{i\gamma}$ and $\gamma_{i\beta}$ are admissible.
\end{proof}
\section*{Acknowledgments}
We thank Soma Chaudhuri, Roy Friedman, Donald Stanat, Jennifer
Welch, and the referees for their helpful comments.
\subsection*{Acknowledgment of support}
Part of this work was performed
at the University of North Carolina at Chapel Hill, supported in part by
National Science Foundation
grant CCR-9010730 and an IBM Faculty Development Award. This
work was partially
presented in~\cite{cj99-09-13}.
\bibliographystyle{abbrv}
\bibliography{cj99-09}
\end{document}