jsquyres/diff-vs-portland-document.diff

## diff-vs-portland-document.diff
Index: chap-io/io-2.tex
===================================================================
--- chap-io/io-2.tex	(revision 2030)
+++ chap-io/io-2.tex	(working copy)
@@ -2,6 +2,7 @@
 % Version of Wed Jun 18 20:15:00 PDT 1997

 \chapter{I/O}
+\mpitermtitleindex{IO@I/O}
 \label{chap:io-2}
 \label{sec:io-2}

@@ -55,20 +56,20 @@
 \label{subsec:io-2:definitions}
 \begin{description}

-\item[file]
+\item[\mpitermdef{file}]
 An \MPI/ file is an ordered collection of typed data items.
 \MPI/ supports random or sequential access to any integral set of these items.
 A file is opened collectively by a group of processes.
 All collective I/O calls on a file are collective over this group.

-\item[displacement]
-A file \mpiterm{displacement} is an absolute byte position
+\item[\mpitermdef{displacement}]
+A file \mpitermni{displacement} is an absolute byte position
 relative to the beginning of a file.
-The displacement defines the location where a \mpiterm{view} begins.
+The displacement defines the location where a \mpiterm{view}\mpitermindex{file!view} begins.
 Note that a ``file displacement'' is distinct from a ``typemap displacement.''

-\item[etype]
-An \mpiterm{etype} (\mpiterm{elementary} datatype)
+\item[\mpitermdef{etype}]
+An \mpitermni{etype} (\mpitermni{elementary} datatype\mpitermdefindex{elementary datatype})
 is the unit of data access and positioning.
 It can be any \MPI/ predefined or derived datatype.
 Derived etypes can be constructed
@@ -86,8 +87,8 @@
 a data item of that type,
 or the extent of that type.

-\item[filetype]
-A \mpiterm{filetype} is the basis for partitioning a file among processes
+\item[\mpitermdefni{filetype}\mpitermdefindex{file!filetype}]
+A \mpitermni{filetype} is the basis for partitioning a file among processes
 and defines a template for accessing the file.
 A filetype is either a single etype or a derived \MPI/ datatype
 constructed from multiple instances of the same etype.
@@ -97,8 +98,8 @@
 The displacements in the typemap of the filetype are not required to be distinct,
 but they must be non-negative and monotonically nondecreasing.

-\item[view]
-A \mpiterm{view} defines the current set of data visible
+\item[\mpitermdef{view}\mpitermdefindex{file!view}]
+A \mpitermni{view} defines the current set of data visible
 and accessible from an open file as an ordered set of etypes.
 Each process has its own view of the file,
 defined by three quantities:
@@ -132,8 +133,8 @@
   \label{fig:io-comp-filetypes}
 \end{figure}

-\item[offset]
-An \mpiterm{offset} is a position
+\item[\mpitermdef{offset}\mpitermdefindex{file!offset}]
+An \mpitermni{offset} is a position
 in the file
 relative to the current view,
 expressed as a count of etypes.
@@ -146,24 +147,24 @@
 An ``explicit offset'' is an offset that is used as an argument
 in explicit data access routines.

-\item[file size and end of file]
-The \mpiterm{size} of an \MPI/ file is measured in bytes from the
+\item[\mpitermdefni{file size}\mpitermdefindex{file!size} and \mpitermdef{end of file}\mpitermdefindex{file!end of file}]
+The \mpitermni{size} of an \MPI/ file is measured in bytes from the
 beginning of the file.  A newly created file has a size of zero
 bytes.  Using the size as an absolute displacement gives
 the position of the byte immediately following the last byte in
-the file.  For any given view, the \mpiterm{end of file} is the
+the file.  For any given view, the \mpitermni{end of file} is the
 offset of the first etype accessible in the current view starting
 after the last byte in the file.

-\item[file pointer]
-A \mpiterm{file pointer} is an implicit offset maintained by \MPI/.
+\item[\mpitermdefni{file pointer}\mpitermdefindex{file!pointer}]
+A \mpitermni{file pointer} is an implicit offset maintained by \MPI/.
 ``Individual file pointers'' are file pointers that are local to
 each process that opened the file.
 A ``shared file pointer'' is a file pointer that is shared by
 the group of processes that opened the file.

-\item[file handle]
-A \mpiterm{file handle} is an opaque object created by \mpifunc{MPI\_FILE\_OPEN}
+\item[\mpitermdefni{file handle}\mpitermdefindex{file!handle}]
+A \mpitermni{file handle} is an opaque object created by \mpifunc{MPI\_FILE\_OPEN}
 and freed by \mpifunc{MPI\_FILE\_CLOSE}.
 All operations on an open file
 reference the file through the file handle.
@@ -171,6 +172,7 @@
 \end{description}

 \section{File Manipulation}
+\mpitermtitleindex{file!manipulation}
 %==========================
 \label{sec:io-filecntl}

@@ -646,6 +648,7 @@
 \end{example}

 \subsection{File Info}
+\mpitermtitleindex{info object!file info}
 %---------------------
 \label{sec:io-info}

@@ -867,6 +870,7 @@
 \end{description}

 \section{File Views}
+\mpitermtitleindexmainsub{file}{view}
 %===================
 \label{sec:io-view}

@@ -926,7 +930,7 @@
 \begin{rationale}
 For some sequential files,
 such as those corresponding to magnetic tapes or streaming network connections,
-the \emph{displacement} may not be meaningful.
+the \mpiterm{displacement} may not be meaningful.
 \const{MPI\_DISPLACEMENT\_CURRENT} allows the view to be changed
 for these types of files.
 \end{rationale}
@@ -957,7 +961,7 @@
 \end{figure}
 \end{users}

-An \mpiterm{etype} (\mpiterm{elementary} datatype)
+An \mpiterm{etype} (\mpitermni{elementary} datatype\mpitermindex{elementary datatype})
 is the unit of data access and positioning.
 It can be any \MPI/ predefined or derived datatype.
 Derived etypes can be constructed
@@ -971,7 +975,8 @@

 \begin{users}
 In order to ensure interoperability in a heterogeneous environment,
-additional restrictions must be observed when constructing the \mpiarg{etype}
+additional restrictions must be observed when constructing the
+\mpishortarg{etype}
 (see \sectionref{sec:io-file-interop}).
 \end{users}

@@ -1057,6 +1062,7 @@
 committed state.

 \section{Data Access}
+\mpitermtitleindex{file!data access}
 %====================
 \label{sec:io-access}

@@ -1080,49 +1086,49 @@
 \tiny%%ALLOWLATEX%
 \begin{tabular}{|l||l||l|l|}
 \hline
-\textbf{positioning} & \textbf{synchronism} & \multicolumn{2}{c|}\textbf{coordination} \\
+\textbf{positioning} & \textbf{synchronism} & \multicolumn{2}{c|}{\textbf{coordination}} \\
 \cline{3-4}
-                &                 & \emph{noncollective}   & \emph{collective} \\
+                &                 & \emph{noncollective}   & \mpiterm{collective} \\
 \hline
 \hline %-------------------------------------------------------------
-\emph{explicit} & \emph{blocking}
+\emph{explicit} & \mpiterm{blocking}
   & \mpifunc{MPI\_FILE\_READ\_AT} & \mpifunc{MPI\_FILE\_READ\_AT\_ALL} \\
 \emph{offsets} &
   & \mpifunc{MPI\_FILE\_WRITE\_AT} & \mpifunc{MPI\_FILE\_WRITE\_AT\_ALL} \\
 \cline{2-4}
-& \emph{nonblocking}
+& \mpiterm{nonblocking}
   & \mpifunc{MPI\_FILE\_IREAD\_AT}  & \mpifunc{MPI\_FILE\_IREAD\_AT\_ALL} \\
 & & \mpifunc{MPI\_FILE\_IWRITE\_AT} & \mpifunc{MPI\_FILE\_IWRITE\_AT\_ALL} \\
 \cline{2-4}
-& \emph{split collective} & {N/A} & \mpifunc{MPI\_FILE\_READ\_AT\_ALL\_BEGIN} \\
+& \mpiterm{split collective} & {N/A} & \mpifunc{MPI\_FILE\_READ\_AT\_ALL\_BEGIN} \\
 & & & \mpifunc{MPI\_FILE\_READ\_AT\_ALL\_END} \\
 & &                                  & \mpifunc{MPI\_FILE\_WRITE\_AT\_ALL\_BEGIN} \\
 & &                                  & \mpifunc{MPI\_FILE\_WRITE\_AT\_ALL\_END} \\
 \hline %-------------------------------------------------------------
-\emph{individual}    & \emph{blocking}
+\emph{individual}    & \mpiterm{blocking}
   & \mpifunc{MPI\_FILE\_READ}   & \mpifunc{MPI\_FILE\_READ\_ALL} \\
 \emph{file pointers} &
   & \mpifunc{MPI\_FILE\_WRITE}  & \mpifunc{MPI\_FILE\_WRITE\_ALL} \\
 \cline{2-4}
-& \emph{nonblocking}
+& \mpiterm{nonblocking}
   & \mpifunc{MPI\_FILE\_IREAD}  & \mpifunc{MPI\_FILE\_IREAD\_ALL} \\
 & & \mpifunc{MPI\_FILE\_IWRITE} & \mpifunc{MPI\_FILE\_IWRITE\_ALL} \\
 \cline{2-4}
-& \emph{split collective} & {N/A} & \mpifunc{MPI\_FILE\_READ\_ALL\_BEGIN} \\
+& \mpiterm{split collective} & {N/A} & \mpifunc{MPI\_FILE\_READ\_ALL\_BEGIN} \\
 & & & \mpifunc{MPI\_FILE\_READ\_ALL\_END} \\
 & &                                  & \mpifunc{MPI\_FILE\_WRITE\_ALL\_BEGIN} \\
 & &                                  & \mpifunc{MPI\_FILE\_WRITE\_ALL\_END} \\
 \hline %-------------------------------------------------------------
-\emph{shared}       & \emph{blocking}
+\emph{shared}       & \mpiterm{blocking}
   & \mpifunc{MPI\_FILE\_READ\_SHARED}   & \mpifunc{MPI\_FILE\_READ\_ORDERED} \\
 \emph{file pointer} &
   & \mpifunc{MPI\_FILE\_WRITE\_SHARED}  & \mpifunc{MPI\_FILE\_WRITE\_ORDERED} \\
 \cline{2-4}
-& \emph{nonblocking}
+& \mpiterm{nonblocking}
   & \mpifunc{MPI\_FILE\_IREAD\_SHARED}  & {N/A} \\
 & & \mpifunc{MPI\_FILE\_IWRITE\_SHARED} & \\
 \cline{2-4}
-& \emph{split collective} & {N/A} & \mpifunc{MPI\_FILE\_READ\_ORDERED\_BEGIN} \\
+& \mpiterm{split collective} & {N/A} & \mpifunc{MPI\_FILE\_READ\_ORDERED\_BEGIN} \\
 & & & \mpifunc{MPI\_FILE\_READ\_ORDERED\_END} \\
 & &                                  & \mpifunc{MPI\_FILE\_WRITE\_ORDERED\_BEGIN} \\
 & &                                  & \mpifunc{MPI\_FILE\_WRITE\_ORDERED\_END} \\
@@ -1147,7 +1153,7 @@
 %--------------------------

 \MPI/ provides three types of positioning for data access routines:
-explicit offsets, individual file pointers, and shared file pointers.
+\mpitermdef{explicit offsets}, \mpitermdef{individual file pointers}, and \mpitermdef{shared file pointers}.
 The different positioning methods may be mixed within the same program
 and do not affect each other.

@@ -1185,14 +1191,14 @@

 More formally,
 \[
-    new\_file\_offset = old\_file\_offset +
+    \textit{new\_file\_offset} = \textit{old\_file\_offset} +
                 \frac{elements(datatype)}{elements(etype)} \times count
 \]
 where $count$ is the number of $datatype$ items to be accessed,
 $elements(X)$ is the number of predefined datatypes in the typemap of $X$,
-and $old\_file\_offset$ is
+and \textit{old\_file\_offset} is
 the value of the implicit offset before the call.
-The file position, $new\_file\_offset$, is in terms
+The file position, \textit{new\_file\_offset}, is in terms
 of a count of etypes relative to the current view.

 \subsubsection{Synchronism}
@@ -1200,14 +1206,14 @@

 \MPI/ supports blocking and nonblocking I/O routines.

-A \mpiterm{blocking} I/O call will
+A \mpitermni{blocking}\mpitermindex{blocking!I/O} I/O call will
 not return
 until the I/O request is completed.

-A \mpiterm{nonblocking} I/O call initiates an I/O operation, but does not
+A \mpitermni{nonblocking}\mpitermindex{nonblocking!I/O} I/O call initiates an I/O operation, but does not
 wait for it to complete.  Given suitable hardware, this allows the
 transfer of data out of and into the user's buffer to proceed concurrently with
-computation.  A separate \mpiterm{request complete} call
+computation.  A separate \mpitermni{request complete}\mpitermindex{request complete!I/O} call
 (\mpifunc{MPI\_WAIT}, \mpifunc{MPI\_TEST}, or any of their variants) is
 needed to complete the I/O request,
 i.e., to confirm that the data has been read or written and that
@@ -1353,6 +1359,8 @@
 unless an error is raised (or a read reaches the end of file).

 \subsection{Data Access with Explicit Offsets}
+\mpitermtitleindex{explicit offsets}
+\mpitermtitleindex{file!data access!explicit offsets}
 %---------------------------------------------
 \label{sec:io-explicit}

@@ -1526,6 +1534,8 @@
 \mpifunc{MPI\_FILE\_WRITE\_AT\_ALL}.

 \subsection{Data Access with Individual File Pointers}
+\mpitermtitleindex{individual file pointers}
+\mpitermtitleindex{file!data access!individual file pointers}
 %-----------------------------------------------------
 \label{sec:io-indiv-ptr}

@@ -1868,6 +1878,8 @@
 is returned in \mpiarg{disp}.

 \subsection{Data Access with Shared File Pointers}
+\mpitermtitleindex{shared file pointers}
+\mpitermtitleindex{file!data access!shared file pointers}
 %-------------------------------------------------
 \label{sec:io-shared-ptr}

@@ -1984,6 +1996,8 @@
 of the \mpifunc{MPI\_FILE\_WRITE\_SHARED} interface.

 \subsubsection{Collective Operations}
+\mpitermtitleindex{collective communication!file data access operations}
+\mpitermtitleindex{file!data access!collective operations}
 %-- - - - - - - - - - - - - - - - - -
 \label{sec:io-shared-ptr-col}

@@ -2064,6 +2078,7 @@
 \mpifunc{MPI\_FILE\_WRITE\_SHARED} interface.

 \subsubsection{Seek}
+\mpitermtitleindexmainsub{file!data access}{seek}
 %-- - - - - - - - -
 \label{sec:io-shared-ptr-seek}

@@ -2140,6 +2155,7 @@
 \end{users}

 \subsection{Split Collective Data Access Routines}
+\mpitermtitleindexmainsub{file!data access}{split collective}
 %-----------------------------------------------------
 \label{sec:io-split-collective}

@@ -2421,6 +2437,7 @@
 \mpicppemptybind{MPI::File::Write\_ordered\_end(const~void*~buf)}{void}

 \section{File Interoperability}
+\mpitermtitleindexmainsub{file}{interoperability}
 %==============================
 \label{sec:io-file-interop}

@@ -2512,6 +2529,7 @@

 \begin{description}
 \item[``native'']\index{CONST:native}
+\mpitermdefindex{native -- file data representation}%
 Data in this representation is stored in a file exactly
 as it is in memory.
 The advantage of this data representation is that
@@ -2534,6 +2552,7 @@
 \end{implementors}

 \item[``internal'']\index{CONST:internal}
+\mpitermdefindex{internal -- file data representation}%
 This data representation can be used for I/O operations in a homogeneous or
 heterogeneous environment; the implementation will perform type
 conversions if necessary. The implementation is free to store data in
@@ -2559,6 +2578,7 @@
 \end{implementors}

 \item[``external32'']\index{CONST:external32}
+\mpitermdefindex{external32 -- file data representation}%
 This data representation states that read and write operations
 convert all data from
 and to the ``external32''
@@ -2841,6 +2861,7 @@


 \subsection{User-Defined Data Representations}
+\mpitermtitleindex{user-defined data representations}
 %---------------------------------------------
 \label{sec:io-datarep}

@@ -3067,6 +3088,7 @@
 \mpiarg{position}.

 An implementation will only invoke the callback routines in this section
+\flushline
 (\mpiarg{read\_conversion\_fn}, \mpiarg{write\_conversion\_fn},
 and \mpiarg{dtype\_file\_extent\_fn})
 when one of the read or write routines in \sectionref{sec:io-access},
@@ -3139,6 +3161,7 @@
 \end{users}

 \section{Consistency and Semantics}
+\mpitermtitleindex{semantics!file consistency}
 %==================================
 \label{sec:io-semantics}

@@ -3464,6 +3487,7 @@
 semantics set forth in \sectionref{sec:nbcoll}.

 \subsection{Type Matching}
+\mpitermtitleindex{matching!type}
 %-------------------------

 The type matching rules for I/O mimic the type matching rules
@@ -3545,12 +3569,13 @@
 when a file is created (see \sectionref{sec:io-info}).

 \subsection{File Size}
+\mpitermtitleindex{file size}
 %--------------------------
 \label{sec:io-consistency-filesize}

 The size of a file may be increased by writing to the file after the
 current end of file.  The size may also be changed by calling
-\MPI/ \mpiterm{size changing} routines,
+\MPI/ \mpitermni{size changing}\mpitermindex{size changing!I/O} routines,
 such as \mpifunc{MPI\_FILE\_SET\_SIZE}.  A call to a size changing routine
 does not necessarily change the file size.  For example, calling
 \mpifunc{MPI\_FILE\_PREALLOCATE} with a size less than the current size does
@@ -3559,7 +3584,7 @@
 Consider a set of bytes that has been written to a file since
 the most recent call to a size changing routine,
 or since \mpifunc{MPI\_FILE\_OPEN} if no such routine has been called.
-Let the \mpiterm{high byte} be the byte
+Let the \mpitermni{high byte} be the byte
 in that set with the largest displacement.  The file size
 is the larger of
 \begin{itemize}
@@ -3627,18 +3652,18 @@
 %%ENDHEADER
 \begin{verbatim}
 /* Process 0 */
-int  i, a[10] ;
+int  i, a[10];
 int  TRUE = 1;

 for ( i=0;i<10;i++)
-   a[i] = 5 ;
+   a[i] = 5;

 MPI_File_open( MPI_COMM_WORLD, "workfile",
-               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh0 ) ;
-MPI_File_set_view( fh0, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-MPI_File_set_atomicity( fh0, TRUE ) ;
-MPI_File_write_at(fh0, 0, a, 10, MPI_INT, &status) ;
-/* MPI_Barrier( MPI_COMM_WORLD ) ; */
+               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh0 );
+MPI_File_set_view( fh0, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+MPI_File_set_atomicity( fh0, TRUE );
+MPI_File_write_at(fh0, 0, a, 10, MPI_INT, &status);
+/* MPI_Barrier( MPI_COMM_WORLD ); */
 \end{verbatim}
 %%HEADER
 %%LANG: C
@@ -3647,14 +3672,14 @@
 %%ENDHEADER
 \begin{verbatim}
 /* Process 1 */
-int  b[10] ;
+int  b[10];
 int  TRUE = 1;
 MPI_File_open( MPI_COMM_WORLD, "workfile",
-               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh1 ) ;
-MPI_File_set_view( fh1, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-MPI_File_set_atomicity( fh1, TRUE ) ;
-/* MPI_Barrier( MPI_COMM_WORLD ) ; */
-MPI_File_read_at(fh1, 0, b, 10, MPI_INT, &status) ;
+               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh1 );
+MPI_File_set_view( fh1, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+MPI_File_set_atomicity( fh1, TRUE );
+/* MPI_Barrier( MPI_COMM_WORLD ); */
+MPI_File_read_at(fh1, 0, b, 10, MPI_INT, &status);
 \end{verbatim}
 A user may guarantee that the write on process \constskip{0}
 precedes the read on process \constskip{1} by imposing temporal order
@@ -3675,17 +3700,17 @@
 %%ENDHEADER
 \begin{verbatim}
 /* Process 0 */
-int  i, a[10] ;
+int  i, a[10];
 for ( i=0;i<10;i++)
-   a[i] = 5 ;
+   a[i] = 5;

 MPI_File_open( MPI_COMM_WORLD, "workfile",
-               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh0 ) ;
-MPI_File_set_view( fh0, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-MPI_File_write_at(fh0, 0, a, 10, MPI_INT, &status ) ;
-MPI_File_sync( fh0 ) ;
-MPI_Barrier( MPI_COMM_WORLD ) ;
-MPI_File_sync( fh0 ) ;
+               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh0 );
+MPI_File_set_view( fh0, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+MPI_File_write_at(fh0, 0, a, 10, MPI_INT, &status );
+MPI_File_sync( fh0 );
+MPI_Barrier( MPI_COMM_WORLD );
+MPI_File_sync( fh0 );
 \end{verbatim}

 %%HEADER
@@ -3695,14 +3720,14 @@
 %%ENDHEADER
 \begin{verbatim}
 /* Process 1 */
-int  b[10] ;
+int  b[10];
 MPI_File_open( MPI_COMM_WORLD, "workfile",
-               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh1 ) ;
-MPI_File_set_view( fh1, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-MPI_File_sync( fh1 ) ;
-MPI_Barrier( MPI_COMM_WORLD ) ;
-MPI_File_sync( fh1 ) ;
-MPI_File_read_at(fh1, 0, b, 10, MPI_INT, &status ) ;
+               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh1 );
+MPI_File_set_view( fh1, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+MPI_File_sync( fh1 );
+MPI_Barrier( MPI_COMM_WORLD );
+MPI_File_sync( fh1 );
+MPI_File_read_at(fh1, 0, b, 10, MPI_INT, &status );
 \end{verbatim}
 The ``sync-barrier-sync'' construct is required because:
 \begin{itemize}
@@ -3726,16 +3751,16 @@
 \begin{verbatim}
 /* ----------------  THIS EXAMPLE IS ERRONEOUS --------------- */
 /* Process 0 */
-int  i, a[10] ;
+int  i, a[10];
 for ( i=0;i<10;i++)
-   a[i] = 5 ;
+   a[i] = 5;

 MPI_File_open( MPI_COMM_WORLD, "workfile",
-               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh0 ) ;
-MPI_File_set_view( fh0, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-MPI_File_write_at(fh0, 0, a, 10, MPI_INT, &status ) ;
-MPI_File_sync( fh0 ) ;
-MPI_Barrier( MPI_COMM_WORLD ) ;
+               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh0 );
+MPI_File_set_view( fh0, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+MPI_File_write_at(fh0, 0, a, 10, MPI_INT, &status );
+MPI_File_sync( fh0 );
+MPI_Barrier( MPI_COMM_WORLD );
 \end{verbatim}

 %%HEADER
@@ -3745,13 +3770,13 @@
 %%ENDHEADER
 \begin{verbatim}
 /* Process 1 */
-int  b[10] ;
+int  b[10];
 MPI_File_open( MPI_COMM_WORLD, "workfile",
-               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh1 ) ;
-MPI_File_set_view( fh1, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-MPI_Barrier( MPI_COMM_WORLD ) ;
-MPI_File_sync( fh1 ) ;
-MPI_File_read_at(fh1, 0, b, 10, MPI_INT, &status ) ;
+               MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh1 );
+MPI_File_set_view( fh1, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+MPI_Barrier( MPI_COMM_WORLD );
+MPI_File_sync( fh1 );
+MPI_File_read_at(fh1, 0, b, 10, MPI_INT, &status );

 /* ----------------  THIS EXAMPLE IS ERRONEOUS --------------- */
 \end{verbatim}
@@ -3789,12 +3814,12 @@
 \begin{verbatim}
 int a = 4, b, TRUE=1;
 MPI_File_open( MPI_COMM_WORLD, "myfile",
-               MPI_MODE_RDWR, MPI_INFO_NULL, &fh ) ;
-MPI_File_set_view( fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-/* MPI_File_set_atomicity( fh, TRUE ) ;   Use this to set atomic mode. */
-MPI_File_iwrite_at(fh, 10, &a, 1, MPI_INT, &reqs[0]) ;
-MPI_File_iread_at(fh,  10, &b, 1, MPI_INT, &reqs[1]) ;
-MPI_Waitall(2, reqs, statuses) ;
+               MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
+MPI_File_set_view( fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+/* MPI_File_set_atomicity( fh, TRUE );   Use this to set atomic mode. */
+MPI_File_iwrite_at(fh, 10, &a, 1, MPI_INT, &reqs[0]);
+MPI_File_iread_at(fh,  10, &b, 1, MPI_INT, &reqs[1]);
+MPI_Waitall(2, reqs, statuses);
 \end{verbatim}
 For asynchronous data access operations, \MPI/ specifies
 that the access occurs at any time between the call to the asynchronous
@@ -3818,13 +3843,13 @@
 \begin{verbatim}
 int a = 4, b;
 MPI_File_open( MPI_COMM_WORLD, "myfile",
-               MPI_MODE_RDWR, MPI_INFO_NULL, &fh ) ;
-MPI_File_set_view( fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-/* MPI_File_set_atomicity( fh, TRUE ) ;   Use this to set atomic mode. */
-MPI_File_iwrite_at(fh, 10, &a, 1, MPI_INT, &reqs[0]) ;
-MPI_File_iread_at(fh,  10, &b, 1, MPI_INT, &reqs[1]) ;
-MPI_Wait(&reqs[0], &status) ;
-MPI_Wait(&reqs[1], &status) ;
+               MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
+MPI_File_set_view( fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+/* MPI_File_set_atomicity( fh, TRUE );   Use this to set atomic mode. */
+MPI_File_iwrite_at(fh, 10, &a, 1, MPI_INT, &reqs[0]);
+MPI_File_iread_at(fh,  10, &b, 1, MPI_INT, &reqs[1]);
+MPI_Wait(&reqs[0], &status);
+MPI_Wait(&reqs[1], &status);
 \end{verbatim}
 If atomic mode is set, either \constskip{2} or \constskip{4} will be read
 into \variable{b}.  Again, \MPI/ does not guarantee sequential consistency
@@ -3839,12 +3864,12 @@
 \begin{verbatim}
 int a = 4, b;
 MPI_File_open( MPI_COMM_WORLD, "myfile",
-               MPI_MODE_RDWR, MPI_INFO_NULL, &fh ) ;
-MPI_File_set_view( fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-MPI_File_iwrite_at(fh, 10, &a, 1, MPI_INT, &reqs[0]) ;
-MPI_Wait(&reqs[0], &status) ;
-MPI_File_iread_at(fh,  10, &b, 1, MPI_INT, &reqs[1]) ;
-MPI_Wait(&reqs[1], &status) ;
+               MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
+MPI_File_set_view( fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+MPI_File_iwrite_at(fh, 10, &a, 1, MPI_INT, &reqs[0]);
+MPI_Wait(&reqs[0], &status);
+MPI_File_iread_at(fh,  10, &b, 1, MPI_INT, &reqs[1]);
+MPI_Wait(&reqs[1], &status);
 \end{verbatim}
 defines the same ordering as:
 %%HEADER
@@ -3855,10 +3880,10 @@
 \begin{verbatim}
 int a = 4, b;
 MPI_File_open( MPI_COMM_WORLD, "myfile",
-               MPI_MODE_RDWR, MPI_INFO_NULL, &fh ) ;
-MPI_File_set_view( fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL ) ;
-MPI_File_write_at(fh, 10, &a, 1, MPI_INT, &status ) ;
-MPI_File_read_at(fh,  10, &b, 1, MPI_INT, &status ) ;
+               MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
+MPI_File_set_view( fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL );
+MPI_File_write_at(fh, 10, &a, 1, MPI_INT, &status );
+MPI_File_read_at(fh,  10, &b, 1, MPI_INT, &status );
 \end{verbatim}
 Since
 \begin{itemize}
@@ -3874,9 +3899,9 @@
 %%SKIP
 %%ENDHEADER
 \begin{verbatim}
-MPI_File_iwrite_all(fh,...) ;
-MPI_File_iread_all(fh,...) ;
-MPI_Waitall(...) ;
+MPI_File_iwrite_all(fh,...);
+MPI_File_iread_all(fh,...);
+MPI_Waitall(...);
 \end{verbatim}

 In addition, as mentioned in \sectionref{sec:io-semantics-nb-collective},
@@ -3889,10 +3914,10 @@
 %%SKIP
 %%ENDHEADER
 \begin{verbatim}
-MPI_File_write_all_begin(fh,...) ;
-MPI_File_iread(fh,...) ;
-MPI_Wait(fh,...) ;
-MPI_File_write_all_end(fh,...) ;
+MPI_File_write_all_begin(fh,...);
+MPI_File_iread(fh,...);
+MPI_Wait(fh,...);
+MPI_File_write_all_end(fh,...);
 \end{verbatim}

 Recall that constraints governing consistency and semantics are not
@@ -3901,10 +3926,10 @@
 %%SKIP
 %%ENDHEADER
 \begin{verbatim}
-MPI_File_write_all_begin(fh,...) ;
-MPI_File_read_all_begin(fh,...) ;
-MPI_File_read_all_end(fh,...) ;
-MPI_File_write_all_end(fh,...) ;
+MPI_File_write_all_begin(fh,...);
+MPI_File_read_all_begin(fh,...);
+MPI_File_read_all_end(fh,...);
+MPI_File_write_all_end(fh,...);
 \end{verbatim}
 since split collective operations on the same file handle may not overlap
 (see \sectionref{sec:io-split-collective}).
@@ -3914,6 +3939,7 @@


 \section{I/O Error Handling}
+\mpitermtitleindex{error handling!I/O}
 %===========================
 \label{sec:io-errhandlers}

@@ -3975,6 +4001,7 @@


 \section{I/O Error Classes}
+\mpitermtitleindex{error handling!I/O}
 %==========================
 \label{sec:io-errors}

@@ -4100,11 +4127,11 @@

    /* buffer initialization */
    buffer1 = (float *)
-                      malloc(bufcount*sizeof(float)) ;
+                      malloc(bufcount*sizeof(float));
    buffer2 = (float *)
-                      malloc(bufcount*sizeof(float)) ;
-   compute_buf_ptr = buffer1 ;   /* initially point to buffer1 */
-   write_buf_ptr   = buffer1 ;   /* initially point to buffer1 */
+                      malloc(bufcount*sizeof(float));
+   compute_buf_ptr = buffer1;    /* initially point to buffer1 */
+   write_buf_ptr   = buffer1;    /* initially point to buffer1 */


    /* DOUBLE-BUFFER prolog:
@@ -4163,7 +4190,7 @@
 25--49, etc.; see Figure~\ref{fig:io-array-file}).
 To create the filetypes for each process one could
 use the following C program
-(see \section~\ref{sec:io-const-array}):
+(see Section~\ref{sec:io-const-array}):

 \exindex{MPI\_TYPE\_CREATE\_SUBARRAY}%
 %%HEADER
Index: mpi-sys-macs.tex
===================================================================
--- mpi-sys-macs.tex	(revision 2030)
+++ mpi-sys-macs.tex	(working copy)
@@ -182,7 +182,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %
 % For correct usage of \_ together with pdflatex:
-% This macro enables that all "_" (underscore) characters in the pfd
+% This macro enables that all "_" (underscore) characters in the pdf
 % file are searchable, and that cut&paste will copy the "_" as underscore.
 % Without the following macro, the \_ is treated in searches and cut&paste
 % as a " " (space character).
@@ -339,6 +339,7 @@
 \makeatother

 \newcommand{\uu}[1]{\underline{\hyperpage{#1}}}
+\newcommand{\bold}[1]{\textbf{\hyperpage{#1}}}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 % Theorems have \em text; we want \rm.  The easiest way to fix this,
Index: indextomap.pl
===================================================================
--- indextomap.pl	(revision 2030)
+++ indextomap.pl	(working copy)
@@ -28,10 +28,15 @@
 	# Remove trailing spaces
 	$name =~ s/\s+//;
 	#print $pagetype . "\n";
-	if ($pagetype eq "hyperindexformat{\\uu}") {
+	# Only include MPI Function names in the index map file
+	# (including all other terms can cause problems for the name mapping
+	# program because some indexed names are too common)
+	if ($name =~ /^MPI/ && $pagetype eq "hyperindexformat{\\uu}") {
 	    if (defined($nameToURL{$name})) {
 		if ($nameToURL{$name} != $page) {
 		    print STDERR "Multiple primary definitions for $name\n";
+		    # Use only the first definition
+		    next
 		}
 	    }
 	    $nameToURL{$name} = $page;
Index: chap-context/context.tex
===================================================================
--- chap-context/context.tex	(revision 2030)
+++ chap-context/context.tex	(working copy)
@@ -1,4 +1,8 @@
 \chapter{Groups, Contexts, Communicators, and Caching}
+\mpitermtitleindex{group}
+\mpitermtitleindex{context}
+\mpitermtitleindex{communicator}
+\mpitermtitleindex{caching}
 \label{sec:context}
 \label{chap:context}

@@ -52,20 +56,20 @@
 The corresponding
 concepts that \MPI/ provides, specifically to support robust libraries, are
 as follows:
-\begin{itemize} \item \mpiterm{Contexts} of communication,
-\item \mpiterm{Groups} of processes,
-\item \mpiterm{Virtual topologies},
-\item \mpiterm{Attribute caching},
-\item \mpiterm{Communicators}.
+\begin{itemize} \item \mpitermdefni{Contexts}\mpitermdefindex{context} of communication,
+\item \mpitermdefni{Groups} of processes\mpitermdefindex{group},
+\item \mpitermdefni{Virtual topologies}\mpitermdefindex{virtual topology},
+\item \mpitermdefni{Attribute caching}\mpitermdefindex{attribute!caching},
+\item \mpitermdefni{Communicators}\mpitermdefindex{communicator}.
 \end{itemize}
-\mpiterm{Communicators} (see \cite{communicator,zipcode1,Skj93b}) encapsulate all of
+\mpitermdefni{Communicators} (see \cite{communicator,zipcode1,Skj93b}) encapsulate all of
 these ideas in order to provide the appropriate scope for all communication
 operations in \MPI/.  Communicators are divided into two kinds:
 intra-communicators for operations within a single group of processes and
 inter-communicators for operations between two groups of
 processes.

-\paragraph{Caching.} Communicators (see
+\paragraph{Caching.}\mpitermdefindex{caching} Communicators (see
 below) provide a ``caching'' mechanism that allows one to
 associate new attributes with communicators, on
 par with \MPI/ built-in
@@ -74,7 +78,7 @@
 virtual-topology functions described in
 Chapter~\ref{chap:topol} are likely to be supported this way.

-\paragraph{Groups.} Groups
+\paragraph{Groups.}\mpitermdefindex{group} Groups
 define an ordered collection of processes, each with a rank, and it is this
 group that defines the low-level names for inter-process communication (ranks
 are used for sending and receiving).  Thus, groups define a scope for process
@@ -83,14 +87,14 @@
 communicators in \MPI/, but only communicators can be used in
 communication operations.

-\paragraph{Intra-communicators.} The most commonly used means for message
+\paragraph{Intra-communicators.}\mpitermdefindex{intra-communicator} The most commonly used means for message
 passing in \MPI/ is via intra-communicators.  Intra-communicators contain an
 instance of a group, contexts of communication for both point-to-point and
 collective communication, and the ability to include virtual topology and
 other attributes.
 These features work as follows:
 \begin{itemize}
-\item \mpiterm{Contexts} provide the ability to have separate safe ``universes''
+\item \mpitermdefni{Contexts}\mpitermdefindex{context} provide the ability to have separate safe ``universes''
 of message-passing in \MPI/.  A context is akin to an additional
 tag that differentiates messages.
 The system manages this differentiation process.
@@ -104,16 +108,16 @@
 communications are also guaranteed not to interfere with collective
 communications within a single communicator.

-\item \mpiterm{Groups} define the participants in the communication (see above)
+\item \mpitermdefni{Groups}\mpitermdefindex{group} define the participants in the communication (see above)
 of a communicator.

-\item A \mpiterm{virtual topology} defines a special mapping of the ranks in a
+\item A \mpitermdef{virtual topology} defines a special mapping of the ranks in a
   group to and from a topology.  Special constructors for
   communicators are defined in Chapter~\ref{chap:topol} to provide
   this feature.  Intra-communicators as described in this chapter do
   not have topologies.

-\item \mpiterm{Attributes} define the local information that the user or
+\item \mpitermdefni{Attributes}\mpitermdefindex{attribute} define the local information that the user or
 library has added to a communicator for later reference.
 \end{itemize}

@@ -131,9 +135,9 @@
 \end{users}

 \paragraph{Inter-communicators.}
-The discussion has dealt so far with \mpiterm{intra-communication}:
+The discussion has dealt so far with \mpitermdef{intra-communication}:
 communication
-within a group.  \MPI/ also supports \mpiterm{inter-communication}:
+within a group.  \MPI/ also supports \mpitermdef{inter-communication}:
 communication
 between two non-overlapping groups.  When an application is built by composing
 several parallel modules, it is convenient to allow one module to communicate
@@ -144,7 +148,7 @@
 not all processes are preallocated at initialization time.  In such a
 situation, it becomes necessary to support communication across ``universes.''
 Inter-communication is supported by objects called
-\mpiterm{inter-communicators}.
+\mpitermdefni{inter-communicators}\mpitermdefindex{inter-communicator}.
 These objects bind two groups together with communication contexts shared by
 both groups.
 For inter-communicators, these features work as follows:
@@ -191,11 +195,13 @@

 \subsection{Groups}
 \label{sec:context:groups}
-A \mpiterm{group} is an ordered set of process identifiers (henceforth
-processes); processes are implementation-dependent objects.  Each
-process in a group is associated with an integer \mpiterm{rank}.  Ranks are
+A \mpitermdef{group} is an ordered set of process identifiers (henceforth
+processes); processes are
+implementation\hskip0pt-\hskip0pt\relax{}dependent
+objects.  Each
+process in a group is associated with an integer \mpitermdef{rank}.  Ranks are
 contiguous and start from zero.
-Groups are represented by opaque \mpiterm{group objects}, and hence cannot
+Groups are represented by opaque \mpitermdef{group objects}, and hence cannot
 be directly transferred from one process to another.   A group is used
 within a communicator to describe the participants in a communication
 ``universe'' and to rank such participants (thus giving them unique names
@@ -228,7 +234,7 @@

 \subsection{Contexts}
 \label{sec:context:contexts}
-A \mpiterm{context} is a property of communicators (defined next) that allows
+A \mpitermdef{context} is a property of communicators (defined next) that allows
 partitioning of the communication space.  A message sent in one context cannot
 be received in another context.  Furthermore, where permitted, collective
 operations are independent of pending point-to-point operations.
@@ -281,7 +287,7 @@
 communication, and provides machine-independent process addressing through
 ranks.

-Intra-communicators are represented by opaque \mpiterm{intra-communicator
+Intra-communicators are represented by opaque \mpitermdef{intra-communicator
 objects}, and hence cannot be directly transferred from one process to
 another.

@@ -827,8 +833,8 @@
 called the \emph{left} and \emph{right} groups.  A process in an
 intercommunicator is a member of either the left or the right group.  From the
 point of view of that process, the
-group that the process is a member of is called the \emph{local} group; the
-other group (relative to that process) is the \emph{remote} group.
+group that the process is a member of is called the \mpiterm{local group}; the
+other group (relative to that process) is the \mpiterm{remote group}.
 The left and right group labels give us a way to describe the two groups in
 an intercommunicator that is not relative to any particular process (as the
 local and remote groups are).
@@ -1619,15 +1625,15 @@
   {
     int me, count, count2;
     void *send_buf, *recv_buf, *send_buf2, *recv_buf2;
-    MPI_Group MPI_GROUP_WORLD, grprem;
+    MPI_Group group_world, grprem;
     MPI_Comm commslave;
     static int ranks[] = {0};
     ...
     MPI_Init(&argc, &argv);
-    MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
+    MPI_Comm_group(MPI_COMM_WORLD, &group_world);
     MPI_Comm_rank(MPI_COMM_WORLD, &me);  /* local */

-    MPI_Group_excl(MPI_GROUP_WORLD, 1, ranks, &grprem);  /* local */
+    MPI_Group_excl(group_world, 1, ranks, &grprem);  /* local */
     MPI_Comm_create(MPI_COMM_WORLD, grprem, &commslave);

     if(me != 0)
@@ -1642,7 +1648,7 @@
     MPI_Reduce(send_buf2, recv_buf2, count2,
                MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);

-    MPI_Group_free(&MPI_GROUP_WORLD);
+    MPI_Group_free(&group_world);
     MPI_Group_free(&grprem);
     MPI_Finalize();
     return 0;
@@ -1686,14 +1692,14 @@
      int me;
      MPI_Request request[2];
      MPI_Status status[2];
-     MPI_Group MPI_GROUP_WORLD, subgroup;
+     MPI_Group group_world, subgroup;
      int ranks[] = {2, 4, 6, 8};
      MPI_Comm the_comm;
      ...
      MPI_Init(&argc, &argv);
-     MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
+     MPI_Comm_group(MPI_COMM_WORLD, &group_world);

-     MPI_Group_incl(MPI_GROUP_WORLD, 4, ranks, &subgroup); /* local */
+     MPI_Group_incl(group_world, 4, ranks, &subgroup); /* local */
      MPI_Group_rank(subgroup, &me);     /* local */

      MPI_Comm_create(MPI_COMM_WORLD, subgroup, &the_comm);
@@ -1711,7 +1717,7 @@
          MPI_Comm_free(&the_comm);
      }

-     MPI_Group_free(&MPI_GROUP_WORLD);
+     MPI_Group_free(&group_world);
      MPI_Group_free(&subgroup);
      MPI_Finalize();
      return 0;
@@ -1846,7 +1852,7 @@
    int main(int argc, char *argv[])
    {
      int ma, mb;
-     MPI_Group MPI_GROUP_WORLD, group_a, group_b;
+     MPI_Group group_world, group_a, group_b;
      MPI_Comm comm_a, comm_b;

      static int list_a[] = {0, 1};
@@ -1860,10 +1866,10 @@

      ...
      MPI_Init(&argc, &argv);
-     MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
+     MPI_Comm_group(MPI_COMM_WORLD, &group_world);

-     MPI_Group_incl(MPI_GROUP_WORLD, size_list_a, list_a, &group_a);
-     MPI_Group_incl(MPI_GROUP_WORLD, size_list_b, list_b, &group_b);
+     MPI_Group_incl(group_world, size_list_a, list_a, &group_a);
+     MPI_Group_incl(group_world, size_list_b, list_b, &group_b);

      MPI_Comm_create(MPI_COMM_WORLD, group_a, &comm_a);
      MPI_Comm_create(MPI_COMM_WORLD, group_b, &comm_b);
@@ -1888,7 +1894,7 @@
        MPI_Comm_free(&comm_b);
      MPI_Group_free(&group_a);
      MPI_Group_free(&group_b);
-     MPI_Group_free(&MPI_GROUP_WORLD);
+     MPI_Group_free(&group_world);
      MPI_Finalize();
      return 0;
    }
@@ -1970,8 +1976,8 @@

 All communication described thus far has involved
 communication between processes that are members of the same group.  This type
-of communication is called ``int\-ra-com\-mun\-i\-cat\-ion'' and the
-communicator used is called an ``intra-communicator,'' as we have noted
+of communication is called ``\mpitermdefni{int\-ra-com\-mun\-i\-cat\-ion}\mpitermdefindex{intra-communication}'' and the
+communicator used is called an ``\mpitermdef{intra-communicator},'' as we have noted
 earlier in the chapter.

 In modular and multi-disciplinary applications, different process groups
@@ -1984,10 +1990,10 @@
 process group that uses the services of one or more servers.  It is again most
 natural to specify the target process by rank within the target group in these
 applications.  This type of communication is called
-``int\-er-com\-mun\-i\-cat\-ion'' and the communicator used is called an
-``inter-communicator,'' as introduced earlier.
+``\mpitermdefni{int\-er-com\-mun\-i\-cat\-ion}'' and the communicator used is called an
+``\mpitermdef{inter-communicator},'' as introduced earlier.

-An int\-er-com\-mun\-i\-cat\-ion  is a point-to-point communication
+An \mpitermdef{inter-communication}  is a point-to-point communication
 between processes in different groups.  The group containing a process that
 initiates an int\-er-com\-mun\-i\-cat\-ion operation is called the ``local
 group,'' that is, the sender in a send and the receiver in a receive.  The
@@ -2055,7 +2061,7 @@
 \mpiterm{source} is the rank of the process in the local group.
 For intra-communicators, \mpiterm{group} is the communicator group
 (remote=local), \mpiterm{source} is the rank of the process in this group,
-and \mpiterm{send context} and \mpiterm{receive context} are identical.
+and \mpitermni{send context}\mpitermindex{send!context} and \mpitermni{receive context}\mpitermindex{receive!context} are identical.
 A group
 can be
 represented by a rank-to-absolute-address translation table.
@@ -2088,7 +2094,7 @@

 Assume that \textbf{Q} posts a receive with an explicit source argument
 using the inter-communicator.  Then \textbf{Q} matches
-\mpiterm{receive\_context} to the message context and source argument to the
+\textbf{receive\_context} to the message context and source argument to the
 message source.

 The same algorithm is appropriate for intra-communicators as well.
@@ -2469,10 +2475,11 @@
 \end{verbatim}

 \section{Caching}
+\mpitermtitleindex{caching}
 \label{sec:caching}

 \MPI/ provides a ``caching'' facility that allows an application to
-attach arbitrary pieces of information, called \mpiterm{attributes}, to
+attach arbitrary pieces of information, called \mpitermdefni{attributes}\mpitermdefindex{attribute}, to
 three kinds of \MPI/ objects, communicators,
 windows, and datatypes.
 More precisely, the caching
@@ -3422,6 +3429,7 @@


 \section{Naming Objects}
+\mpitermtitleindex{naming objects}
 \label{sec:ei-naming}

 There are many occasions on which it would be useful to allow a user
@@ -3638,6 +3646,7 @@


 \section{Formalizing the Loosely Synchronous Model}
+\mpitermtitleindex{loosely synchronous model}
 \label{sec:formalizing}
 In this section, we make further statements about the loosely
 synchronous model, with particular attention to intra-communication.
@@ -3666,13 +3675,13 @@
 \label{sec:context:models-of-execution}

 In the loosely synchronous model, transfer of control to a
-\mpiterm{parallel procedure} is effected by having each executing process
+\mpitermdef{parallel procedure} is effected by having each executing process
 invoke the procedure.  The invocation is a collective operation:  it
 is executed by all processes in the execution group, and invocations
 are similarly ordered at all processes.  However, the invocation need
 not be synchronized.

-We say that a parallel procedure is \emph{active} in a process if the process
+We say that a parallel procedure is \mpiterm{active} in a process if the process
 belongs to a group that may collectively execute the procedure, and
 some member of that group is currently executing the procedure code.
 If a parallel procedure is active in a process, then this process may
Index: instr.tex
===================================================================
--- instr.tex	(revision 2030)
+++ instr.tex	(working copy)
@@ -107,6 +107,7 @@
 For compatibility with the widest variety of editors, text should be
 wrapped to fit with 80 columns.  Edits should avoid reflowing text as
 this complicates identifying real changes in the document.
+The document follows the conventions and spelling of American English.

 \subsection{Basic Formatting}

@@ -132,9 +133,9 @@
 addition, the use of the page reference is often misleading, as the
 page number will refer to the beginning of the section but the typical
 use of these is to point to the entire body of the section, which
-almost certainly spans multiple pages.
-See
-Section~\ref{sec:not-to-do} for some examples.
+almost certainly spans multiple pages, or to a specific page within
+the section, but not necessarily the first page of the section.
+See Section~\ref{sec:not-to-do} for some examples.

 LaTeX defines many environments and many others may be added to
 LaTeX.  To preserve a uniform appearance, use only these environments
@@ -354,6 +355,14 @@
 consistent style is used in the document.
 Do not use $\ldots$ to for this purpose.

+\subsection{\texorpdfstring{\MPI/}{MPI} Terms}
+\label{sec:terms}
+The \MPI/ document introduces a number of terms, such as ``message''
+and ``send buffer.''
+These should be marked as \verb+\mpitermdef{message}+ where the term
+is first used and defined, and as \verb+\mpiterm{send buffer}+ at
+subsequent uses.  These macros will generate an index entry for each use.
+
 \subsection{Standard Names}
 \label{sec:standard-names}

@@ -937,9 +946,20 @@
 It is incorrect to use an en dash as punctuation, and it is incorrect
 to use a hyphen in a number range.

+\subsection{Using Quotes}\label{sec:using-quotes}
+TeX uses the characters \verb+`+ and \verb+'+ for open and close
+quotes respectively.
+For double quotes, use two of the approproate quote; do \emph{not} use
+the double quote character \verb+"+.
+
+Because this document uses the standards of American English,
+punctuation after a quoted phrase is placed within the quotation.
+For example, ``terma,'' ``termb,'' and ``termc.''
+
+
 \subsection{And so on}
 The above are not the only things to avoid --- the recommendation is
-to stick to the commands outline in this document and to contact the
-document master/editor if something else is needed.
+to stick to the commands outlined in this document and to contact the
+document master or editor if something else is needed.

 \end{document}
Index: chap-coll/coll.tex
===================================================================
--- chap-coll/coll.tex	(revision 2030)
+++ chap-coll/coll.tex	(working copy)
@@ -1,4 +1,6 @@
 \chapter{Collective Communication}
+\mpitermtitleindex{communication!collective}
+\mpitermtitleindex{collective communication}
 \label{sec:coll}
 \label{chap:coll}
 \label{chap:collective-2}
@@ -211,13 +213,15 @@
 Instead, there is a communicator argument.
 Groups and communicators are discussed in full detail in Chapter~\ref{chap:context}.
 For the purposes of this chapter, it is sufficient to know that there
-are two types of communicators: \emph{intra-communicators} and \emph{inter-communicators}.
+are two types of communicators: \mpitermni{intra-communicators}\mpitermindex{intra-communicator}
+and \mpitermni{inter-communicators}\mpitermindex{inter-communicator}.
 An intracommunicator can be thought of as an identifier for a single group of processes
 linked with a context.  An intercommunicator identifies two distinct groups of processes
 linked with a context.


 \subsection{Specifics for Intracommunicator Collective Operations}
+\mpitermtitleindex{intra-communicator!collective operations}
 All processes in the group identified by the intracommunicator must call
 the collective routine.

@@ -252,6 +256,7 @@
 \end{users}

 \subsection{Applying Collective Operations to Intercommunicators}
+\mpitermtitleindex{inter-communicator!collective operations}
 \label{sec:collective-2}
 \label{sec:MPI-coll}

@@ -357,6 +362,7 @@
 \end{figure}

 \subsection{Specifics for Intercommunicator Collective Operations}
+\mpitermtitleindex{inter-communicator!collective operations}
 All processes in both groups identified by the intercommunicator must call
 the collective routine.

@@ -392,6 +398,7 @@
 \end{rationale}

 \section{Barrier Synchronization}
+\mpitermtitleindex{barrier synchronization}
 \label{sec:coll-barrier}

 \begin{funcdef}{MPI\_BARRIER(comm)}
@@ -418,6 +425,7 @@
 have entered the call.

 \section{Broadcast}
+\mpitermtitleindex{broadcast}
 \label{sec:coll-broadcast}

 \begin{funcdef}{MPI\_BCAST(buffer, count, datatype, root, comm)}
@@ -503,6 +511,7 @@
 \end{example}

 \section{Gather}
+\mpitermtitleindex{gather}
 \label{sec:coll-gather}

 \begin{funcdef}{MPI\_GATHER(sendbuf, sendcount, sendtype, recvbuf,
@@ -542,12 +551,13 @@
 \begin{mpicodeblock}
 MPI\_Send(sendbuf, sendcount, sendtype, root , ...),
 \end{mpicodeblock}
-and the
+\noindent and the
 root had executed \mpicode{n} calls to
 \begin{mpicodeblock}
 MPI\_Recv(recvbuf+i$\cdot$ recvcount$\cdot$ extent(recvtype), recvcount, recvtype, i,...),
 \end{mpicodeblock}
-where \mpicode{extent(recvtype)} is the type extent obtained from a call to
+\noindent where
+\mpicode{extent(recvtype)} is the type extent obtained from a call to
 \mpicode{MPI\_Type\_get\_extent}.

 An alternative description is that the \mpicode{n} messages sent by the
@@ -648,7 +658,7 @@
 \begin{mpicodeblock}
 MPI\_Send(sendbuf, sendcount, sendtype, root, ...),
 \end{mpicodeblock}
-and the root executes \mpicode{n} receives,
+\noindent and the root executes \mpicode{n} receives,
 \begin{mpicodeblock}
 MPI\_Recv(recvbuf+displs[j]$\cdot$ extent(recvtype), recvcounts[j],
 recvtype, i, ...).
@@ -1118,6 +1128,7 @@
 \end{example}

 \section{Scatter}
+\mpitermtitleindex{scatter}
 \label{sec:coll-scatter}

 \begin{funcdef}{MPI\_SCATTER(sendbuf, sendcount, sendtype, recvbuf,
@@ -1154,7 +1165,7 @@
 MPI\_Send(sendbuf+i$\cdot$ sendcount$\cdot$ extent(sendtype), sendcount,
 sendtype, i,...),
 \end{mpicodeblock}
-and each process executed a receive,
+\noindent and each process executed a receive,
 \begin{mpicodeblock}
 MPI\_Recv(recvbuf, recvcount, recvtype, i,...).
 \end{mpicodeblock}
@@ -1265,7 +1276,7 @@
 MPI\_Send(sendbuf+displs[i]$\cdot$ extent(sendtype), sendcounts[i],
 sendtype, i,...),
 \end{mpicodeblock}
-and each process executed a receive,
+\noindent and each process executed a receive,
 \begin{mpicodeblock}
 MPI\_Recv(recvbuf, recvcount, recvtype, i,...).
 \end{mpicodeblock}
@@ -1456,6 +1467,7 @@
 \end{figure}

 \section{Gather-to-all}
+\mpitermtitleindex{gather-to-all}
 \label{sec:coll-allcast}

 \begin{funcdef}{MPI\_ALLGATHER(sendbuf, sendcount, sendtype, recvbuf,
@@ -1645,6 +1657,7 @@
 \end{example}

 \section{All-to-All Scatter/Gather}
+\mpitermtitleindex{all-to-all}
 \label{sec:coll-alltoall}

 \begin{funcdef}{MPI\_ALLTOALL(sendbuf, sendcount, sendtype, recvbuf,
@@ -1694,10 +1707,10 @@
 MPI\_Send(sendbuf+i$\cdot$ sendcount$\cdot$
 extent(sendtype),sendcount,sendtype,i, ...),
 \end{mpicodeblock}
-and a receive from every other process
+\noindent and a receive from every other process
 with a call to,
 \begin{mpicodeblock}
-MPI\_Recv(recvbuf+i$\cdot$ recvcount$\cdot$ extent(recvtype),recvcount,recvtype,i,...).
+MPI\_Recv(recvbuf+i $\cdot$ recvcount $\cdot$ extent(recvtype),recvcount,recvtype,i,...).
 \end{mpicodeblock}

 All arguments
@@ -1797,7 +1810,7 @@
 \begin{mpicodeblock}
 MPI\_Send(sendbuf+sdispls[i]$\cdot$ extent(sendtype),sendcounts[i],sendtype,i,...),
 \end{mpicodeblock}
-and received a message from every other process with
+\noindent and received a message from every other process with
 a call to
 \begin{mpicodeblock}
 MPI\_Recv(recvbuf+rdispls[i]$\cdot$ extent(recvtype),recvcounts[i],recvtype,i,...).
@@ -1906,7 +1919,7 @@
 \begin{mpicodeblock}
 MPI\_Send(sendbuf+sdispls[i],sendcounts[i],sendtypes[i] ,i,...),
 \end{mpicodeblock}
-and received a message from every other process with a call to
+\noindent and received a message from every other process with a call to
 \begin{mpicodeblock}
 MPI\_Recv(recvbuf+rdispls[i],recvcounts[i],recvtypes[i] ,i,...).
 \end{mpicodeblock}
@@ -1938,6 +1951,7 @@
 \end{rationale}

 \section{Global Reduction Operations}
+\mpitermtitleindex{reduction operations}
 \label{global-reduce}

 The functions in this section perform a global reduce operation
@@ -1959,6 +1973,7 @@
 functionality of a reduce and of a scatter operation.

 \subsection{Reduce}
+\mpitermtitleindex{reduce}
 \label{subsec:coll-reduce}

 \begin{funcdef}{MPI\_REDUCE(sendbuf, recvbuf, count, datatype, op,
@@ -2094,6 +2109,7 @@
 buffer arguments are significant at the root.

 \subsection{Predefined Reduction Operations}
+\mpitermtitleindexsubmain{predefined}{reduction operations}
 \label{coll-predefined-op}

 The following predefined operations are supplied for \mpifunc{MPI\_REDUCE}
@@ -2585,6 +2601,7 @@
 \end{rationale}

 \subsection{User-Defined Reduction Operations}
+\mpitermtitleindexsubmain{user-defined}{reduction operations}
 \label{subsec:coll-user-ops}

 \begin{funcdef}{MPI\_OP\_CREATE(user\_fn, commute, op)}
@@ -2612,7 +2629,7 @@
 commutative and associative. If  \mpiarg{commute} $=$ \mpicode{false},
 then the order of operands is fixed and is defined to be in ascending, process
 rank order, beginning with process zero.  The order of evaluation can be
-changed, talking advantage of the associativity of the operation.  If
+changed, taking advantage of the associativity of the operation.  If
 \mpiarg{commute} $=$ \mpicode{true} then the order of evaluation can be changed,
 taking advantage of commutativity and associativity.

@@ -2637,7 +2654,7 @@
 to \mpifunc{MPI\_REDUCE}.
 The user reduce function should be written such that the following
 holds:
-Let \mpicode{u[0], $\ldots$, u[len-1]} be the \mpiarg{len} elements in the
+Let \mpicode{u[0], $\ldots$ , u[len-1]} be the \mpiarg{len} elements in the
 communication buffer described by the arguments \mpiarg{invec, len}
 and \mpiarg{datatype} when the function is invoked;
 let \mpicode{v[0], $\ldots$ , v[len-1]} be \mpiarg{len} elements in the
@@ -2652,7 +2669,7 @@
 Informally, we can think of
 \mpiarg{invec} and \mpiarg{inoutvec} as arrays of \mpiarg{len} elements that
 \mpiarg{user\_fn}
-is combining.  The result of the reduction over-writes values in
+is combining.  The result of the reduction overwrites values in
 \mpiarg{inoutvec}, hence the name.  Each invocation of the function results in
 the pointwise evaluation of the reduce operator on \mpiarg{len}
 elements:
@@ -2883,6 +2900,7 @@
 \end{example}

 \subsection{All-Reduce}
+\mpitermtitleindex{all-reduce}
 \label{subsec:coll-all-reduce}

 \MPI/ includes
@@ -2979,6 +2997,7 @@
 \end{example}

 \subsection{Process-Local Reduction}
+\mpitermtitleindex{reduction operations!process-local}
 \label{subsec:coll-process-local-reduction} % Sect. 5.9.7 p.173 NEWsection

 The functions in this section are of importance to library implementors
@@ -3028,6 +3047,7 @@


 \section{Reduce-Scatter}
+\mpitermtitleindex{reduce-scatter}
 \label{sec:coll-reduce-scatter}

 \MPI/ includes variants of the reduce operations where the result is scattered
@@ -3182,8 +3202,10 @@
 \end{rationale}

 \section{Scan}
+\mpitermtitleindexmainsub{reduction operations}{scan}
 \label{sec:coll-scan}
 \subsection{Inclusive Scan}
+\mpitermtitleindexsubmain{inclusive}{scan}

 \begin{funcdef}{MPI\_SCAN(sendbuf, recvbuf, count, datatype, op, comm)}
 \funcarg{\IN}{sendbuf}{starting address of send buffer (choice)}
@@ -3224,6 +3246,7 @@
 This operation is invalid for intercommunicators.

 \subsection{Exclusive Scan}
+\mpitermtitleindexsubmain{exclusive}{scan}
 \label{subsec:coll-exscan}
 \label{coll-exscan} % Sect. 5.11.2 p.175 newlabel

@@ -3395,6 +3418,7 @@


 \section{Nonblocking Collective Operations}
+\mpitermtitleindex{collective communication!nonblocking}
 \label{sec:nbcoll}
 As described in Section~\ref{sec:pt2pt-nonblock}, performance of many
 applications can be improved by overlapping communication and
@@ -3542,6 +3566,7 @@


 \subsection{Nonblocking Barrier Synchronization}
+\mpitermtitleindex{barrier synchronization!nonblocking}
 \label{sec:nbcoll-ibarrier}

 \begin{funcdef}{MPI\_IBARRIER(comm , request)}
@@ -3575,6 +3600,7 @@


 \subsection{Nonblocking Broadcast}
+\mpitermtitleindex{broadcast!nonblocking}
 \label{sec:nbcoll-ibroadcast}

 \begin{funcdef}{MPI\_IBCAST(buffer, count, datatype, root, comm, request)}
@@ -3629,6 +3655,7 @@


 \subsection{Nonblocking Gather}
+\mpitermtitleindex{gather!nonblocking}
 \label{sec:nbcoll-igather}

 \begin{funcdef2}{MPI\_IGATHER(sendbuf, sendcount, sendtype, recvbuf,
@@ -3694,6 +3721,7 @@
 Section~\ref{sec:coll-gather}).

 \subsection{Nonblocking Scatter}
+\mpitermtitleindex{scatter!nonblocking}
 \label{sec:nbcoll-iscatter}

 \begin{funcdef2}{MPI\_ISCATTER(sendbuf, sendcount, sendtype, recvbuf,
@@ -3756,6 +3784,7 @@


 \subsection{Nonblocking Gather-to-all}
+\mpitermtitleindex{gather-to-all!nonblocking}
 \label{sec:nbcoll-iallcast}

 \begin{funcdef2}{MPI\_IALLGATHER(sendbuf, sendcount, sendtype, recvbuf,
@@ -3815,6 +3844,7 @@


 \subsection{Nonblocking All-to-All Scatter/Gather}
+\mpitermtitleindex{all-to-all!nonblocking}
 \label{sec:nbcoll-ialltoall}

 \begin{funcdef}{MPI\_IALLTOALL(sendbuf, sendcount, sendtype, recvbuf,
@@ -3912,6 +3942,7 @@
 Section~\ref{sec:coll-alltoall}).

 \subsection{Nonblocking Reduce}
+\mpitermtitleindex{reduce!nonblocking}
 \label{subsec:nbcoll-ireduce}

 \begin{funcdef}{MPI\_IREDUCE(sendbuf, recvbuf, count, datatype, op,
@@ -3961,6 +3992,7 @@


 \subsection{Nonblocking All-Reduce}
+\mpitermtitleindex{all-reduce!nonblocking}
 \label{subsec:nbcoll-all-reduce}

 \begin{funcdef}{MPI\_IALLREDUCE(sendbuf, recvbuf, count, datatype, op, comm, request)}
@@ -3990,6 +4022,7 @@


 \subsection{Nonblocking Reduce-Scatter with Equal Blocks}
+\mpitermtitleindex{reduce-scatter!nonblocking}
 \label{sec:nbcoll-reduce-scatter-block}

 \begin{funcdef}{MPI\_IREDUCE\_SCATTER\_BLOCK(sendbuf, recvbuf, recvcount,
@@ -4015,6 +4048,7 @@


 \subsection{Nonblocking Reduce-Scatter}
+\mpitermtitleindex{reduce-scatter!nonblocking}
 \label{sec:nbcoll-reduce-scatter}

 \begin{funcdef}{MPI\_IREDUCE\_SCATTER(sendbuf, recvbuf, recvcounts,
@@ -4044,6 +4078,7 @@


 \subsection{Nonblocking Inclusive Scan}
+\mpitermtitleindex{inclusive scan!nonblocking}
 \label{subsec:nbcoll-iscan}

 \begin{funcdef}{MPI\_ISCAN(sendbuf, recvbuf, count, datatype, op, comm, request)}
@@ -4068,6 +4103,7 @@
 Section~\ref{sec:coll-scan}).

 \subsection{Nonblocking Exclusive Scan}
+\mpitermtitleindex{exclusive scan!nonblocking}
 \label{subsec:nbcoll-iexscan}


@@ -4093,6 +4129,7 @@
 Section~\ref{subsec:coll-exscan}).

 \section{Correctness}
+\mpitermtitleindex{collective communication!correctness}
 \label{coll:correct}

 A correct, portable program must invoke collective communications so
Index: chap-terms/terms-2.tex
===================================================================
--- chap-terms/terms-2.tex	(revision 2030)
+++ chap-terms/terms-2.tex	(working copy)
@@ -60,10 +60,11 @@

 \item
 The names of certain actions have been standardized. In
-particular, \mpiterm{Create} creates a new object, \mpiterm{Get}
-retrieves information about an object, \mpiterm{Set} sets
-this information, \mpiterm{Delete} deletes information,
-\mpiterm{Is} asks whether or not an object has a certain property.
+particular, \mpitermdefni{Create}\mpitermindex{create -- in function names}
+creates a new object, \mpitermdefni{Get}\mpitermdefindex{get -- in function names}
+retrieves information about an object, \mpitermdefni{set}\mpitermdefindex{Set -- in function names} sets
+this information, \mpitermdefni{Delete}\mpitermdefindex{delete -- in function names} deletes information,
+\mpitermdefni{Is}\mpitermdefindex{is -- in function names} asks whether or not an object has a certain property.

 \end{enumerate}

@@ -71,8 +72,8 @@
 some \MPI/ functions (that were defined during the \MPII/ process)
 violate these rules
 in several cases. The most common exceptions are the omission
-of the \mpiterm{Class} name from the routine and the omission of
-the \mpiterm{Action} where one can be inferred.
+of the \mpitermdefni{Class}\mpitermdefindex{class -- in function names} name from the routine and the omission of
+the \mpitermdefni{Action}\mpitermdefindex{action -- in function names} where one can be inferred.

 \mpi/ identifiers are limited to 30 characters (31 with the profiling
 interface).  This is done to avoid exceeding the limit on some
@@ -189,38 +190,38 @@
 terms are used.

 \begin{description}
-\item[\mpiterm{nonblocking}] A procedure is nonblocking if it may return before the associated
+\item[\mpitermdef{nonblocking}] A procedure is nonblocking if it may return before the associated
 operation completes, and before the user is allowed to reuse
 resources (such as buffers) specified in the call.
 The word complete is used with respect to operations and any associated requests and/or
-communications.  An \mpiterm{operation completes} when the user is allowed
+communications.  An \mpitermdef{operation completes}\mpitermdefindex{completes -- operation} when the user is allowed
 to reuse resources, and any output buffers have been updated.
-\item[\mpiterm{blocking}] A procedure is blocking if return from the procedure indicates the user
+\item[\mpitermdef{blocking}] A procedure is blocking if return from the procedure indicates the user
 is allowed to reuse resources specified in the call.
-\item[\mpiterm{local}]
+\item[\mpitermdef{local}]
 A procedure is local if completion of the procedure depends only on the
 local executing process.
-\item[\mpiterm{non-local}]
+\item[\mpitermdef{non-local}]
 A procedure is non-local if completion of the operation may require
 the execution of some \MPI/ procedure on another process.  Such an
 operation may require
 communication occurring with another user process.
-\item[\mpiterm{collective}]
+\item[\mpitermdef{collective}]
 A procedure is collective if all processes in a process group need to invoke the procedure.  A
 collective call may or may not be synchronizing.
 Collective calls over the same communicator
 must be executed in the same order by all members of the process
 group.
-\item[\mpiterm{predefined}]
+\item[\mpitermdefni{predefined}\mpitermdefindex{predefined datatype}]
 A predefined datatype is a datatype with a predefined (constant) name
 (such as \consti{MPI\_INT}, \consti{MPI\_FLOAT\_INT}, or \consti{MPI\_PACKED})
 or a datatype constructed with \mpifunc{MPI\_TYPE\_CREATE\_F90\_INTEGER},
 \mpifunc{MPI\_TYPE\_CREATE\_F90\_REAL}, or
-\mpifunc{MPI\_TYPE\_CREATE\_F90\_COMPLEX}.  The former are \mpiterm{named}
-whereas the latter are \mpiterm{unnamed}.
-\item[\mpiterm{derived}]
+\mpifunc{MPI\_TYPE\_CREATE\_F90\_COMPLEX}.  The former are \mpitermdefni{named}\mpitermdefindex{named datatype}
+whereas the latter are \mpitermdefni{unnamed}\mpitermdefindex{unnamed datatype}.
+\item[\mpitermdefni{derived}\mpitermdefindex{derived datatype}]
 A derived datatype is any datatype that is not predefined.
-\item[\mpiterm{portable}]
+\item[\mpitermdefni{portable}\mpitermdefindex{portable datatype}]
 A datatype is portable if it is a predefined datatype, or it is derived
 from a portable datatype using only the type constructors
 \mpifunc{MPI\_TYPE\_CONTIGUOUS}, \mpifunc{MPI\_TYPE\_VECTOR},
@@ -242,7 +243,7 @@
 These displacements are unlikely to be chosen correctly if they fit
 data layout on one memory, but are used for data layouts on another
 process, running on a processor with a different architecture.
-\item[\mpiterm{equivalent}]
+\item[\mpitermdefni{equivalent}\mpitermdefindex{equivalent datatypes}]
 Two datatypes are equivalent if they appear to have been created with
 the same sequence of calls (and arguments) and thus have the same
 typemap.  Two equivalent datatypes do not necessarily have the same
@@ -254,14 +255,15 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Opaque Objects}
+\mpitermtitleindex{opaque objects}
 \label{terms:opaque-objects}

-\MPI/ manages \mpiterm{system memory} that is used for buffering
+\MPI/ manages \mpitermdef{system memory}\mpitermdefindex{memory!system} that is used for buffering
 messages and for storing internal representations of various \MPI/ objects
 such as groups, communicators, datatypes, etc.
 This memory is not directly accessible to the user, and objects stored
-there are \mpiterm{opaque}: their size and shape is not visible to the
-user.  Opaque objects are accessed via \mpiterm{handles}, which exist in
+there are \mpitermdefni{opaque}: their size and shape is not visible to the
+user.  Opaque objects are accessed via \mpitermdef{handles}, which exist in
 user space.  \MPI/ procedures that operate on opaque objects are
 passed handle arguments to access these objects.
 In addition to their use by \MPI/ calls for object access, handles can
@@ -413,6 +415,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Array Arguments}
+\mpitermtitleindex{array arguments}
 \label{subsec:array-arguments}

 An \MPI/ call may need an argument that is an array of opaque objects,
@@ -431,6 +434,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{State}
+\mpitermtitleindex{state}

 \MPI/ procedures use at various places arguments with \emph{state} types.  The
 values of such a data type are all identified by names, and no operation is
@@ -441,6 +445,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Named Constants}
+\mpitermtitleindex{constants}
 \label{subsec:named-constants}

 \MPI/ procedures sometimes assign a special meaning to a special value of a
@@ -500,23 +505,19 @@
 \end{obeylines}

 The constants that cannot be used in initialization expressions or
-assignments in Fortran are:
+assignments in Fortran are as follows:
 \begin{obeylines}
-\tt%%ALLOWLATEX%
-  MPI\_BOTTOM
-  MPI\_STATUS\_IGNORE
-  MPI\_STATUSES\_IGNORE
-  MPI\_ERRCODES\_IGNORE
-  MPI\_IN\_PLACE
-  MPI\_ARGV\_NULL
-  MPI\_ARGVS\_NULL
-  MPI\_UNWEIGHTED
-  MPI\_WEIGHTS\_EMPTY
+\sf\small%%ALLOWLATEX%
+  MPI\_BOTTOM\cdeclindex{MPI\_BOTTOM}
+  MPI\_STATUS\_IGNORE\cdeclindex{MPI\_STATUS\_IGNORE}
+  MPI\_STATUSES\_IGNORE\cdeclindex{MPI\_STATUSES\_IGNORE}
+  MPI\_ERRCODES\_IGNORE\cdeclindex{MPI\_ERRCODES\_IGNORE}
+  MPI\_IN\_PLACE\cdeclindex{MPI\_IN\_PLACE}
+  MPI\_ARGV\_NULL\cdeclindex{MPI\_ARGV\_NULL}
+  MPI\_ARGVS\_NULL\cdeclindex{MPI\_ARGVS\_NULL}
+  MPI\_UNWEIGHTED\cdeclindex{MPI\_UNWEIGHTED}
+  MPI\_WEIGHTS\_EMPTY\cdeclindex{MPI\_WEIGHTS\_EMPTY}
 \end{obeylines}
-\cdeclindex{MPI\_BOTTOM}\cdeclindex{MPI\_STATUS\_IGNORE}%
-\cdeclindex{MPI\_STATUSES\_IGNORE}\cdeclindex{MPI\_ERRCODES\_IGNORE}%
-\cdeclindex{MPI\_IN\_PLACE}\cdeclindex{MPI\_ARGV\_NULL}%
-\cdeclindex{MPI\_ARGVS\_NULL}\cdeclindex{MPI\_UNWEIGHTED}%

 \begin{implementors}
 In Fortran the implementation of these special constants may require the
@@ -535,6 +536,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Choice}
+\mpitermtitleindex{choice}
 \label{sub:choice}

 \MPI/ functions sometimes use arguments with a \emph{choice} (or union) data
@@ -559,11 +561,15 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Absolute Addresses and Relative Address Displacements}
+\mpitermtitleindexsubmain{absolute}{addresses}
+\mpitermtitleindex{relative displacement}
+\mpitermtitleindex{addresses!relative displacement}
 \label{subsec:displacement}

-Some \MPI/ procedures use \emph{address} arguments that represent an absolute
-address in the calling program,
-or relative displacement arguments that represent differences of two absolute addresses.
+Some \MPI/ procedures use \mpitermni{address} arguments that represent an
+\mpitermni{absolute address} in the calling program,
+or \mpitermni{relative displacement}
+arguments that represent differences of two absolute addresses.
 The datatype of such arguments
 \cdeclmainindex{MPI\_Aint}%
 is \type{MPI\_Aint} in C and \ftype{INTEGER
@@ -583,6 +589,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{File Offsets}
+\mpitermtitleindexmainsub{file}{offset}

 For I/O there is a need to give the size, displacement, and offset
 into a file.  These quantities can easily be larger than 32 bits which
@@ -599,6 +606,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Counts}
+\mpitermtitleindex{counts}
 \label{subsec:count}

 As described above, \MPI/ defines types (e.g., \type{MPI\_Aint}) to
@@ -630,6 +638,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Language Binding}
+\mpitermtitleindex{language binding}
 \label{subsec:lang}
 \label{subsec:binding}

@@ -662,6 +671,8 @@
 where any of the letters are either upper or lower case.

 \subsection{Deprecated and Removed Names and Functions}
+\mpitermtitleindex{deprecated names and functions}
+\mpitermtitleindex{removed names and functions}
 \label{sec:deprecated}
 A number of chapters refer to deprecated or replaced \MPI/ constructs.
 These are
@@ -769,6 +780,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Fortran Binding Issues}
+\mpitermtitleindex{Fortran -- language binding}
 \label{sec:fortran-binding-issues}

 Originally,
@@ -786,9 +798,7 @@
 capitals.  Programs must not declare names, e.g., for
 variables, subroutines,
 functions, parameters, derived types, abstract interfaces, or modules,
-beginning with the prefix \code{MPI\_},
-with the exception of \code{MPI\_} routines written by the user
-to make use of the profiling interface.
+beginning with the prefix \code{MPI\_}.
 To avoid
 conflicting with the profiling interface, programs must also avoid subroutines and functions with the prefix \code{PMPI\_}.
 This is mandated to avoid possible name collisions.
@@ -827,6 +837,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{C Binding Issues}
+\mpitermtitleindex{C -- language binding}
 \label{sec:c-binding-issues}

 We use the ISO C
@@ -838,8 +849,6 @@
 beginning with
 any prefix of the form \code{MPI\_},
 where any of the letters are either upper or lower case.
-An exception are \code{MPI\_} routines written by the user to
-make use of the profiling interface.
 To support the profiling interface, programs must not declare
 functions with names beginning with any prefix of the form \code{PMPI\_},
 where any of the letters are either upper or lower case.
@@ -863,6 +872,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 \subsection{Functions and Macros}
+\mpitermtitleindex{macros}
 \label{sec:macros}

 An implementation is allowed to implement
@@ -885,6 +895,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Processes}
+\mpitermtitleindex{processes}

 An \MPI/ program consists of autonomous processes, executing their own
 code, in
@@ -920,6 +931,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Error Handling}
+\mpitermtitleindex{error handling}

 \MPI/ provides the user with reliable message transmission.
 A message sent is always received
@@ -936,12 +948,12 @@
 Similarly, \MPI/ itself provides no mechanisms for
 handling processor failures.

-Of course, \MPI/ programs may still be erroneous.  A \mpiterm{program error} can
+Of course, \MPI/ programs may still be erroneous.  A \mpitermdef{program error}\mpitermdefindex{error handling!program error} can
 occur when an \MPI/ call is made with an incorrect argument (non-existing
 destination in a send operation, buffer too small in a receive
 operation, etc.).
 This type of error would occur in any implementation.
-In addition, a \mpiterm{resource error} may occur when a program exceeds the amount
+In addition, a \mpitermdef{resource error}\mpitermdefindex{error handling!resource error} may occur when a program exceeds the amount
 of available system resources (number of pending messages, system buffers,
 etc.).   The occurrence of this type of error depends on the amount of
 available resources in the system and the resource allocation mechanism used;
@@ -1072,6 +1084,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Interaction with Signals}
+\mpitermtitleindex{signals}

 \MPI/ does not specify the interaction of processes with signals and does
 not require that \MPI/ be signal safe.  The
Index: chap-intro/intro.tex
===================================================================
--- chap-intro/intro.tex	(revision 2030)
+++ chap-intro/intro.tex	(working copy)
@@ -290,10 +290,11 @@
 \section{Background of \texorpdfstring{\MPIIIIDOTI/}{MPI-3.1}}
 \MPIIIIDOTI/ is a minor update to the \MPI/ standard.  Most of the updates
 are corrections and clarifications to the standard,
-especially for the Fortran binding.  New functions added
-include routines to portably manipulate \code{MPI\_Aint} values, nonblocking
+especially for the Fortran bindings.  New functions added
+include routines to manipulate \code{MPI\_Aint} values in a portable manner, nonblocking
 collective I/O routines, and routines to get the index value by name for
-the \mpiskipfunc{MPI\_T} performance and control variables.
+\mpiskipfunc{MPI\_T} performance and control variables.
+A general index was also added.

 \section{Who Should Use This Standard?}

Index: chap-topol/topol.tex
===================================================================
--- chap-topol/topol.tex	(revision 2030)
+++ chap-topol/topol.tex	(working copy)
@@ -1,4 +1,5 @@
 \chapter{Process Topologies}
+\mpitermtitleindex{topologies}
 \label{sec:topol}
 \label{chap:topol}

@@ -58,6 +59,7 @@
 \end{rationale}

 \section{Virtual Topologies}
+\mpitermtitleindexsubmain{virtual}{topology}
 The communication pattern of a set of processes can be represented by a
 graph. The nodes
 represent processes,
@@ -110,8 +112,11 @@
 \section{Overview of the Functions}
 \label{subsec:topol-overview}

-MPI supports three topology types: Cartesian,
-graph, and distributed graph.  The function \mpifunc{MPI\_CART\_CREATE}
+MPI supports three topology types:
+\mpitermdefni{Cartesian}\mpitermdefindex{Cartesian -- topology}\mpitermdefindex{topology!Cartesian},
+\mpitermdefni{graph}\mpitermdefindex{graph -- topology}\mpitermdefindex{topology!graph}, and
+\mpitermdefni{distributed graph}\mpitermdefindex{distributed graph -- topology}\mpitermdefindex{topology!distributed graph}.
+The function \mpifunc{MPI\_CART\_CREATE}
 is used to create Cartesian topologies, the function
 \mpifunc{MPI\_GRAPH\_CREATE} is used to create graph topologies, and the
 functions \mpifunc{MPI\_DIST\_GRAPH\_CREATE\_ADJACENT} and
@@ -206,6 +211,8 @@
 \label{subsec:topol-construct}

 \subsection{Cartesian Constructor}
+\mpitermtitleindex{Cartesian -- topology}
+\mpitermtitleindex{topology!Cartesian}
 \label{subsec:topol-cartesian-constructor}

 \begin{funcdef}{MPI\_CART\_CREATE(comm\_old, ndims, dims, periods, reorder, comm\_cart)}
@@ -294,6 +301,8 @@
 \end{example}

 \subsection{Graph Constructor}
+\mpitermtitleindex{graph -- topology}
+\mpitermtitleindex{topology!graph}
 \label{subsec:topol-graph-constructor}

 \begin{funcdef}{MPI\_GRAPH\_CREATE(comm\_old, nnodes, index, edges, reorder, comm\_graph)}
@@ -428,6 +437,8 @@
 \end{implementors}

 \subsection{Distributed Graph Constructor}
+\mpitermtitleindex{distributed graph -- topology}
+\mpitermtitleindex{topology!distributed graph}
 \label{subsec:topol-distgraph-constructor} % Sect. 7.5.3a p.247

 \mpifunc{MPI\_GRAPH\_CREATE} requires that each process passes the
@@ -1412,6 +1423,7 @@

 % The full section title is too long for the running head
 \section[Neighborhood Collective Communication]{Neighborhood Collective Communication on Process Topologies}
+\mpitermtitleindexsubmain{neighborhood}{collective communication}
 \label{sec:sparsecoll}

 MPI process topologies specify a communication graph, but they
@@ -1910,6 +1922,7 @@
 operations as described in Section~\ref{sec:nbcoll}.

 \subsection{Nonblocking Neighborhood Gather}
+\mpitermtitleindex{neighborhood collective communication!nonblocking}

 \begin{funcdef}{MPI\_INEIGHBOR\_ALLGATHER(sendbuf, sendcount, sendtype,
 recvbuf, recvcount, recvtype, comm, request)}
Index: chap-inquiry/inquiry.tex
===================================================================
--- chap-inquiry/inquiry.tex	(revision 2030)
+++ chap-inquiry/inquiry.tex	(working copy)
@@ -12,6 +12,7 @@
 \label{sec:inquiry-impl}

 \subsection{Version Inquiries}
+\mpitermtitleindex{version inquiries}
 \label{subsec:inquiry-version}
 In order to cope with changes to the \MPI/ Standard, there are both compile-time
 and run-time ways to determine which version of the standard is in use in the
@@ -52,8 +53,8 @@

 \mpifunc{MPI\_GET\_VERSION} can be called
 before \mpifunc{MPI\_INIT} and after \mpifunc{MPI\_FINALIZE}.
-This function is callable from threads without restriction,
-see Section~\ref{sec:ei-threads}.
+This function must always be thread-safe, as defined in
+Section~\ref{sec:ei-threads}.
 Valid (\const{MPI\_VERSION}, \const{MPI\_SUBVERSION}) pairs in
 this and previous versions of the \MPI/ standard
 are (3,1), (3,0), (2,2), (2,1), (2,0), and (1,2).
@@ -93,10 +94,11 @@

 \mpifunc{MPI\_GET\_LIBRARY\_VERSION} can be called
 before \mpifunc{MPI\_INIT} and after \mpifunc{MPI\_FINALIZE}.
-This function is callable from threads without restriction,
-see Section~\ref{sec:ei-threads}.
+This function must always be thread-safe, as defined in
+Section~\ref{sec:ei-threads}.

 \subsection{Environmental Inquiries}
+\mpitermtitleindex{environmental inquiries}
 \label{subsec:inquiry-inquiry}

 A set of attributes that describe the execution environment are attached to
@@ -136,6 +138,7 @@
 The required parameter values are discussed in more detail below:

 \subsubsection{Tag Values}
+\mpitermtitleindex{tag values}
 Tag values range from \code{0} to the value returned for \const{MPI\_TAG\_UB},
 inclusive.
 These values are guaranteed to be unchanging during the execution of an \MPI/
@@ -150,17 +153,19 @@
 of \const{MPI\_COMM\_WORLD}.

 \subsubsection{Host Rank}
-The value returned for \const{MPI\_HOST} gets the rank of the \mpiterm{HOST} process in the group associated
+\mpitermtitleindex{host rank}
+The value returned for \const{MPI\_HOST} gets the rank of the \mpitermni{HOST} process in the group associated
 with communicator \const{MPI\_COMM\_WORLD}, if there is such.
 \const{MPI\_PROC\_NULL} is returned if there is no host.
 \MPI/ does not specify what it
-means for a process to be a \mpiterm{HOST}, nor does it requires that a \mpiterm{HOST}
+means for a process to be a \mpitermni{HOST}, nor does it requires that a \mpitermni{HOST}
 exists.

 The attribute \const{MPI\_HOST} has the same value on all processes
 of \const{MPI\_COMM\_WORLD}.

 \subsubsection{IO Rank}
+\mpitermtitleindex{IO rank}
 The value returned for \const{MPI\_IO} is the rank of a processor that can
 provide language-standard I/O facilities.  For Fortran, this means that all of
 the Fortran I/O operations are supported (e.g., \code{OPEN}, \code{REWIND},
@@ -187,6 +192,7 @@
 \end{users}

 \subsubsection{Clock Synchronization}
+\mpitermtitleindex{clock synchronization}
 \label{subsubsec:inquiry-clock-sync}

 The value returned for \const{MPI\_WTIME\_IS\_GLOBAL} is 1 if clocks
@@ -210,6 +216,7 @@
 processes of \const{MPI\_COMM\_WORLD}.

 \subsubsection{Inquire Processor Name}
+\mpitermtitleindex{processor name}
 \begin{funcdef}{MPI\_GET\_PROCESSOR\_NAME( name, resultlen )}
 \funcarg{\OUT}{name}{A unique specifier for the actual (as
 opposed to virtual) node.}
@@ -259,6 +266,7 @@
 \end{users}

 \section{Memory Allocation}
+\mpitermtitleindex{memory!allocation}
 \label{sec:misc-memalloc}

 In some systems, message-passing and remote-memory-access (\RMA/) operations
@@ -455,6 +463,7 @@
 \end{example}

 \section{Error Handling}
+\mpitermtitleindex{error handling}
 \label{sec:errorhandler}

 An \MPI/ implementation cannot or may choose not to handle some errors
@@ -462,7 +471,7 @@
 exceptions or traps, such as floating point errors or access
 violations.
 The set of errors that are handled by \MPI/ is implementation-dependent.
-Each such error generates an \mpiterm{\MPI/ exception}.
+Each such error generates an \mpitermdefni{\MPI/ exception}\mpitermdefindex{exception}.

 The above text takes precedence over any text on error handling within this
 document.  Specifically, text that states that errors \emph{will} be handled
@@ -595,6 +604,7 @@
 %new stuff collecting types of error handlers by Marc

 \subsection{Error Handlers for Communicators}
+\mpitermtitleindex{error handling!error handlers}
 \label{subsec:inquiry-errhdlr-comm}

 \begin{funcdef}{MPI\_COMM\_CREATE\_ERRHANDLER(comm\_errhandler\_fn, errhandler)}
@@ -603,7 +613,8 @@
 \end{funcdef}

 \cdeclmainindex{MPI\_Errhandler}%
-\mpibind{MPI\_Comm\_create\_errhandler(MPI\_Comm\_errhandler\_function~*comm\_errhandler\_fn, MPI\_Errhandler~*errhandler)}
+%% No tie in the first argument-it makes line breaking impossible
+\mpibind{MPI\_Comm\_create\_errhandler(MPI\_Comm\_errhandler\_function *comm\_errhandler\_fn, MPI\_Errhandler~*errhandler)}

 \mpifnewbind{MPI\_Comm\_create\_errhandler(comm\_errhandler\_fn, errhandler, ierror) \fargs PROCEDURE(MPI\_Comm\_errhandler\_function) :: comm\_errhandler\_fn \\ TYPE(MPI\_Errhandler), INTENT(OUT) :: errhandler \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_COMM\_CREATE\_ERRHANDLER(COMM\_ERRHANDLER\_FN, ERRHANDLER, IERROR)\fargs  EXTERNAL COMM\_ERRHANDLER\_FN \\ INTEGER ERRHANDLER, IERROR}
@@ -703,7 +714,8 @@
 \end{funcdef}

 \cdeclindex{MPI\_Errhandler}%
-\mpibind{MPI\_Win\_create\_errhandler(MPI\_Win\_errhandler\_function~*win\_errhandler\_fn, MPI\_Errhandler~*errhandler)}
+%% No tie in the first argument-it makes line breaking impossible
+\mpibind{MPI\_Win\_create\_errhandler(MPI\_Win\_errhandler\_function *win\_errhandler\_fn, MPI\_Errhandler~*errhandler)}

 \mpifnewbind{MPI\_Win\_create\_errhandler(win\_errhandler\_fn, errhandler, ierror) \fargs PROCEDURE(MPI\_Win\_errhandler\_function) :: win\_errhandler\_fn \\ TYPE(MPI\_Errhandler), INTENT(OUT) :: errhandler \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_WIN\_CREATE\_ERRHANDLER(WIN\_ERRHANDLER\_FN, ERRHANDLER, IERROR) \fargs EXTERNAL WIN\_ERRHANDLER\_FN \\ INTEGER ERRHANDLER, IERROR}
@@ -777,7 +789,8 @@
 \end{funcdef}

 \cdeclindex{MPI\_Errhandler}%
-\mpibind{MPI\_File\_create\_errhandler(MPI\_File\_errhandler\_function~*file\_errhandler\_fn, MPI\_Errhandler~*errhandler)}
+%% No tie in the first argument-it makes line breaking impossible
+\mpibind{MPI\_File\_create\_errhandler(MPI\_File\_errhandler\_function *file\_errhandler\_fn, MPI\_Errhandler~*errhandler)}

 \mpifnewbind{MPI\_File\_create\_errhandler(file\_errhandler\_fn, errhandler, ierror) \fargs PROCEDURE(MPI\_File\_errhandler\_function) :: file\_errhandler\_fn \\ TYPE(MPI\_Errhandler), INTENT(OUT) :: errhandler \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_FILE\_CREATE\_ERRHANDLER(FILE\_ERRHANDLER\_FN, ERRHANDLER, IERROR)\fargs  EXTERNAL FILE\_ERRHANDLER\_FN \\ INTEGER ERRHANDLER, IERROR}
@@ -899,6 +912,7 @@
 \end{rationale}

 \section{Error Codes and Classes}
+\mpitermtitleindex{error handling!error codes and classes}
 \label{sec:ei-error-classes}
 The error codes returned by \MPI/ are left entirely to the
 implementation (with the
@@ -1083,6 +1097,8 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Error Classes, Error Codes, and Error Handlers}
+\mpitermtitleindex{error handling!error codes and classes}
+\mpitermtitleindex{error handling!error handlers}
 \label{sec:ei-error}

 Users may want to write a layered library on top of an existing \MPI/
@@ -1325,6 +1341,7 @@


 \section{Timers and Synchronization}
+\mpitermtitleindex{timers and synchronization}
 \MPI/ defines a timer.  A timer is specified even though it is not
 ``message-passing,'' because timing parallel programs is important in
 ``performance debugging'' and because existing timers (both in POSIX
@@ -1387,6 +1404,7 @@


 \section{Startup}
+\mpitermtitleindex{startup}
 \label{sec:inquiry-startup}

 \label{sec:misc-init}
@@ -1446,7 +1464,7 @@
 about the execution environment by querying the predefined info object
 \const{MPI\_INFO\_ENV}.
 The following keys are predefined for this object, corresponding to the
-arguments of \mpifunc{MPI\_COMM\_SPAWN} or of \mpifunc{mpiexec}:
+arguments of \mpifunc{MPI\_COMM\_SPAWN} or of \mpifunc{mpiexec}\mpitermindex{mpiexec}:
 \begin{description}
 \item[\infokey{command}] Name of program executed.
 \item[\infokey{argv}] Space separated arguments to command.
@@ -1744,8 +1762,8 @@
 called does not affect the behavior of \mpifunc{MPI\_INITIALIZED}.
 It is one of the few routines that may be called before
 \mpifunc{MPI\_INIT} is called.
-This function is callable from threads without restriction,
-see Section~\ref{sec:ei-threads}.
+This function must always be thread-safe, as defined in
+Section~\ref{sec:ei-threads}.

 \begin{funcdef}{MPI\_ABORT(comm, errorcode)}
 \funcarg{\IN}{comm}{communicator of tasks to abort}
@@ -1783,10 +1801,12 @@
 \begin{users}
 Whether the \mpiarg{errorcode} is returned from the executable or from the
 \mpifuncindex{mpiexec}%
+\mpitermindex{mpiexec}%
 \MPI/ process startup mechanism (e.g., \code{mpiexec}), is an aspect of quality
 of the \MPI/ library but not mandatory.
 \end{users}
 \mpifuncindex{mpiexec}%
+\mpitermindex{mpiexec}%
 \begin{implementors}
 Where possible, a high-quality implementation will try to return the
 \mpiarg{errorcode} from the \MPI/ process startup mechanism
@@ -1794,6 +1814,7 @@
 \end{implementors}

 \subsection{Allowing User Functions at Process Termination}
+\mpitermtitleindex{user functions at process termination}
 \label{subsec:inquiry-startup-userfunc}

 There are times in which it would be convenient to have actions happen
@@ -1830,6 +1851,7 @@


 \subsection{Determining Whether \texorpdfstring{\mpi/}{MPI} Has Finished}
+\mpitermtitleindex{finished}

 One of the goals of \mpi/ was to allow for layered libraries.  In
 order for a library to do this cleanly, it needs to know if \mpi/ is
@@ -1854,8 +1876,8 @@
 This routine returns \mpiarg{true} if \mpifunc{MPI\_FINALIZE} has completed.
 It is valid to call \mpifunc{MPI\_FINALIZED}
 before \mpifunc{MPI\_INIT} and after \mpifunc{MPI\_FINALIZE}.
-This function is callable from threads without restriction,
-see Section~\ref{sec:ei-threads}.
+This function must always be thread-safe, as defined in
+Section~\ref{sec:ei-threads}.

 \begin{users}
 \mpi/ is ``active'' and it is thus safe to call \mpi/ functions if
@@ -1869,6 +1891,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Portable \texorpdfstring{\MPI/}{MPI} Process Startup}
+\mpitermtitleindex{startup!portable}

 A number of implementations of \mpi/ provide a startup command for \MPI/ programs
 that is of the form
@@ -1893,6 +1916,7 @@
 In order that the ``standard'' command not be confused with existing
 practice, which is not standard and not portable among implementations,
 \mpifuncindex{mpirun}%
+\mpitermindex{mpirun}%
 \mpifuncindex{mpiexec}%
 instead of \code{mpirun} \MPI/ specifies \code{mpiexec}.

@@ -1906,6 +1930,7 @@


+\mpitermdefindex{mpiexec}%
 It is suggested that\mpifuncmainindex{mpiexec}
 %%HEADER
 %%SKIP
Index: MAKE-FUNC-INDEX
===================================================================
--- MAKE-FUNC-INDEX	(revision 2030)
+++ MAKE-FUNC-INDEX	(working copy)
@@ -39,6 +39,8 @@
 touch temp
 chmod +w temp

+create_index 'General  Index'      '-e'  '\{TERM:'                       's/TERM://' \
+                                   'This index lists mainly terms of the \MPI/ specification. The underlined page numbers refer to the definitions or parts of the definition of the terms. Bold face numbers mark section titles.'
 create_index 'Examples Index'      '-e'  '\{EXAMPLES:'                   's/EXAMPLES://' \
                                    'This index lists code examples throughout the text.  Some examples are referred to by content; others are listed by the major \MPI/ function that they are demonstrating.  \MPI/ functions listed in all capital letter are Fortran examples; \MPI/ functions listed in mixed case are C examples.'
 create_index 'MPI Constant and Predefined Handle Index'  '-e'  '\{CONST:MPI[^|}]*[ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789][|}]'  's/CONST://'    \
Index: Makefile
===================================================================
--- Makefile	(revision 2030)
+++ Makefile	(working copy)
@@ -505,7 +505,7 @@
 # function definition within the standard.  Without this step, there is no
 # change to the text

-bindinglinks: mpi-report.idx \
+bindinglinks: mpi-report.idx indextomap.pl \
 	appLang-CNames.tex appLang-FNames.tex appLang-F2008Names.tex
 	if [ -x $(MAPNAMES) ] ; then \
 	    ./indextomap.pl mpi-report.idx > map.cit ; \
Index: mpi-user-macs.tex
===================================================================
--- mpi-user-macs.tex	(revision 2030)
+++ mpi-user-macs.tex	(working copy)
@@ -347,9 +347,39 @@
 %
 % Use mpiterm when introducing a term that you want emphasized and indexed.
 % Use mpitermni for terms that should not be indexed (ni = not indexed)
-\def\mpiterm#1{\emph{#1}\index{#1}}
-\def\mpitermni#1{\emph{#1}}
+% Place the index first to allow emph to (possibly) correct spacing.
+% mpitermdefindex produces only an index entry. The combination of ni and
+% index allows index entries that differ from the text.
+% References to section titles are marked with titleindex.
+%
+% An attempt was made to detect whether a term was the first use.  However,
+% the presense of \_ in some of the terms caused the simple code (using
+% \csname #1 \endcsname) to fail, and code to sanitize the argument is
+% complex.  So we fell back to the simple choice here.
+\def\mpiterm#1{\index{TERM:#1}\emph{#1}}
+\def\mpitermni#1{\index{TERMnoindex:#1}\emph{#1}}
+\def\mpitermindex#1{\index{TERM:#1}}
+\def\mpitermdef#1{\textbf{#1}\index{TERM:#1|uu}}
+\def\mpitermdefni#1{\textbf{#1}\index{TERMnoindex:#1|uu}}
+\def\mpitermdefindex#1{\index{TERM:#1|uu}}

+% Special macro for lb\_marker and ub\_marker that are a term
+% which is a very special catgory and should be printed with sf
+\def\mpiublb#1{\textsf{#1}\index{TERM:#1}}
+
+\def\mpitermtitleindex#1{\index{TERM:#1|bold}}
+\def\mpitermtitleindexsubmain#1#2{\index{TERM:#1 #2|bold}\index{TERM:#2!#1|bold}}
+% e.g. \mpitermtitleindexsubmain{Point-to-Point}{Communication}
+% results in:     Point-to-Point Communication, 23
+%                 Communication,
+%                    Point-to-Point, 23
+\def\mpitermtitleindexmainsub#1#2{\index{TERM:#1!#2|bold}\index{TERM:#2|bold}}
+% e.g. \mpitermtitleindexmainsub{Message}{Envelope}
+% results in:     Message
+%                    Envelope, 27
+%                 Envelope, 27
+
+
 %
 % Use flushline to force a linebreak without right justifying the line.
 \def\flushline{\hfill\hbox{}\linebreak}
@@ -505,8 +535,10 @@

 %%
 %% Language-independent code block environment
+%% This needs to be used with care.  If it is used inline, follow it with
+%% \noindent to keep TeX from starting a new paragraph.
 \newenvironment{mpicodeblock}{\ifvmode\else\par\fi\vspace{\codeSpace}%
-\noindent\sf}{}
+\noindent\sf\quad}{\ifvmode\else\par\fi\vspace{\codeSpace}}

 %%
 %% Use \XXX/ for a ``function name'' wildcard
Index: chap-changes/changes.tex
===================================================================
--- chap-changes/changes.tex	(revision 2030)
+++ chap-changes/changes.tex	(working copy)
@@ -1,4 +1,5 @@
 \chapter{Change-Log}
+\mpitermtitleindex{change-log}
 \label{sec:change}
 \label{chap:change}

@@ -63,7 +64,8 @@
 \MPIIIIDOTO/ Chapters 3-17, Annex A.3 on page 707, and Example 5.21 on page 187.
 \newline
 Within the \code{mpi\_f08} Fortran support method, \code{BIND(C)} was removed from
-all \code{SUBROUTINE}, \code{FUNCTION}, and \code{ABSTRACT INTERFACE} definitions.
+all \flushline
+\code{SUBROUTINE}, \code{FUNCTION}, and \code{ABSTRACT INTERFACE} definitions.

 % 02.--- MPI-3.0-erratum Ticket 415
 \item
@@ -130,11 +132,14 @@

 % 09.--- MPI-3.0-erratum Ticket 362
 \item
-Section~\ref{sec:winalloc} on page~\pageref{sec:winalloc}, and
+Section~\ref{chap:one-side-2:win_create} on page~\pageref{chap:one-side-2:win_create}, and
 \MPIIIIDOTO/ Section~11.2.2 on page 407.
 \newline
-The \infokey{same\_size} info key can be used with all window flavors.
+The \infokey{same\_size} info key can be used with all window flavors,
+and requires that all processes in the process group of the communicator
+have provided this info key with the same value.

+
 % 10.--- MPI-3.0-erratum Ticket 350
 \item
 Section~\ref{sec:1sided-accumulate} on page~\pageref{sec:1sided-accumulate}, and
@@ -268,7 +273,7 @@
 \mpifunc{MPI\_QUERY\_THREAD}, \mpifunc{MPI\_IS\_THREAD\_MAIN},
 \mpifunc{MPI\_GET\_VERSION}, and \mpifunc{MPI\_GET\_LIBRARY\_VERSION}
 are callable from threads without restriction (in the sense of
-MPI\_THREAD\_MULTIPLE), irrespective of the actual level of thread support
+\const{MPI\_THREAD\_MULTIPLE}), irrespective of the actual level of thread support
 provided, in the case where the implementation supports threads.

 % 03.--- MPI-3.1 Ticket 369
@@ -282,8 +287,10 @@
 Sections~\ref{sec:io-explicit} and~\ref{sec:io-indiv-ptr}
 on pages~\pageref{sec:io-explicit} and~\pageref{sec:io-indiv-ptr}.
 \newline
-Added \mpifunc{MPI\_File\_iread\_at\_all}, \mpifunc{MPI\_File\_iwrite\_at\_all},
-\mpifunc{MPI\_File\_iread\_all}, and \mpifunc{MPI\_File\_iwrite\_all}
+%% WDG - Corrected to refer to the language-neutral names, as required
+%% by the standard and the mpifunc macro
+Added \mpifunc{MPI\_FILE\_IREAD\_AT\_ALL}, \mpifunc{MPI\_FILE\_IWRITE\_AT\_ALL},
+\mpifunc{MPI\_FILE\_IREAD\_ALL}, and \mpifunc{MPI\_FILE\_IWRITE\_ALL}

 % 05.--- MPI-3.1 Ticket 378
 \item
@@ -372,9 +379,9 @@
 \item
 Sections~\ref{subsec:pt2pt-messagedata},
 \ref{coll-predefined-op},
-\ref{subsec:ext32} Table \ref{table:io:extsizes},
+\ref{subsec:ext32} Table~\ref{table:io:extsizes},
 %-REMOVED-C++ \ref{sec:c++datatypes} Table \ref{tab:cpp-basic-datatypes},
-and Annex \ref{subsec:annexa-const}
+and Annex~\ref{subsec:annexa-const}
 on pages~\pageref{subsec:pt2pt-messagedata},
 \pageref{coll-predefined-op},
 \pageref{table:io:extsizes},
@@ -511,7 +518,7 @@

 % 06.--- MPI-3.0 Ticket 265, 1st entry
 \item
-Sections \ref{subsec:count}, \ref{subsec:pt2pt-messagedata},
+Sections~\ref{subsec:count}, \ref{subsec:pt2pt-messagedata},
 \ref{table:pttopt:datatypes:c_f}, \ref{coll-predefined-op},
 on pages~\pageref{subsec:count}, \pageref{subsec:pt2pt-messagedata},
 \pageref{table:pttopt:datatypes:c_f}, \pageref{coll-predefined-op},
@@ -525,8 +532,8 @@
 \pageref{subsec:pt2pt-true-extent}, % MPI_TYPE_GET_TRUE_EXTENT_X
 \pageref{subsec:pt2pt-datatypeuse}, % MPI_GET_EXTENTS_X
 \pageref{func:mpi-status-set-elements-x}, % MPI_STATUS_SET_ELEMENTS_X
-and Annex
-\ref{subsec:annexa-const} on page \pageref{subsec:annexa-const}.
+and
+Annex~\ref{subsec:annexa-const} on page \pageref{subsec:annexa-const}.
 \newline
 New inquiry functions, \mpifunc{MPI\_TYPE\_SIZE\_X}, \mpifunc{MPI\_TYPE\_GET\_EXTENT\_X},
 \mpifunc{MPI\_TYPE\_GET\_TRUE\_EXTENT\_X}, and \mpifunc{MPI\_GET\_ELEMENTS\_X}, return their results
Index: chap-pt2pt/pt2pt.tex
===================================================================
--- chap-pt2pt/pt2pt.tex	(revision 2030)
+++ chap-pt2pt/pt2pt.tex	(working copy)
@@ -1,4 +1,5 @@
 \chapter{Point-to-Point Communication}
+\mpitermtitleindexsubmain{point-to-point}{communication}
 \label{sec:pt2pt}
 \label{chap:pt2pt}

@@ -7,8 +8,8 @@

 Sending and receiving of messages by processes is the basic \MPI/
 communication mechanism.
-The basic point-to-point communication operations are \mpiterm{send} and
-\mpiterm{receive}.   Their use is illustrated in the example below.
+The basic point-to-point communication operations are \mpitermdef{send} and
+\mpitermdef{receive}.   Their use is illustrated in the example below.

 %%HEADER
 %%LANG: C
@@ -40,26 +41,27 @@

 In this example, process zero (\code{myrank = 0}) sends a message to process one
 using the
-\mpiterm{send} operation \mpifunc{MPI\_SEND}. The
-operation specifies a \mpiterm{send buffer} in the sender memory from which the
+\mpitermdef{send} operation \mpifunc{MPI\_SEND}. The
+operation specifies a \mpitermdefni{send buffer}\mpitermdefindex{send!buffer} in the sender memory from which the
 message data is taken.  In the example above, the send buffer consists of the
-storage containing the variable \mpiterm{message} in the memory of process zero.
+storage containing the variable \mpiarg{message}\mpitermindex{message} in the memory of process zero.
 The location, size and type of the send buffer are specified by the first three
 parameters of the send operation.  The message sent will contain the 13
 characters of this variable.
-In addition, the send operation associates an \mpiterm{envelope} with the
+In addition, the send operation associates an \mpitermdef{envelope} with the
 message.  This envelope specifies the message destination and contains
-distinguishing information that can be used by the \mpiterm{receive} operation to
+distinguishing information that can be used by the \mpitermdef{receive} operation to
 select a particular message.
 The last three parameters of the send operation, along with the rank of the
 sender,
 specify the envelope for the message sent.
 Process one (\code{myrank = 1}) receives this message with the
-\mpiterm{receive} operation \mpifunc{MPI\_RECV}.
+\mpitermdef{receive} operation \mpifunc{MPI\_RECV}.
 The message to be received is selected according to the value of its
-envelope, and the message data is stored into the \mpiterm{receive
-buffer}.  In the example above, the receive buffer consists of the storage
-containing the string \code{message} in the memory of process one.
+envelope, and the message data is stored into the
+\mpitermdefni{receive buffer}\mpitermdefindex{receive!buffer}.
+In the example above, the receive buffer consists of the storage
+containing the string \mpiarg{message} in the memory of process one.
 The first three parameters of the receive operation specify the location, size
 and type of the receive buffer.  The next three
 parameters are used for selecting the incoming message.   The last parameter is
@@ -78,6 +80,7 @@
 \section{Blocking Send and Receive Operations}
 \label{sec:pt2pt-basicsendrecv}
 \subsection{Blocking Send}
+\mpitermtitleindex{send}
 \label{subsec:pt2pt-basicsend}

 The syntax of the blocking send operation is given below.
@@ -101,6 +104,7 @@
 The blocking semantics of this call are described in Section~\ref{sec:pt2pt-modes}.

 \subsection{Message Data}
+\mpitermtitleindexmainsub{message}{data}

 \label{subsec:pt2pt-messagedata}

@@ -306,12 +310,14 @@


 \subsection{Message Envelope}
+\mpitermtitleindex{message!envelope}
+\mpitermtitleindex{envelope}
 \label{subsec:pt2pt-envelope}

 In addition to the data part, messages carry information that can be used to
 distinguish messages and selectively receive them.  This information consists
 of a fixed number of fields, which we collectively call
-the \mpiterm{message envelope}.   These fields are
+the \mpitermdefni{message envelope}.   These fields are
 \begin{center}
 source \\
 destination \\
@@ -336,7 +342,7 @@
 described in Chapter~\ref{chap:environment}.  \MPI/ requires that
 \mpicode{UB} be no less than 32767.

-The \mpiarg{comm} argument specifies the \mpiterm{communicator} that is used for
+The \mpiarg{comm} argument specifies the \mpitermdef{communicator} that is used for
 the send operation.
 Communicators are explained in Chapter~\ref{chap:context}; below is a brief
 summary of their usage.
@@ -348,7 +354,7 @@
 sent, and messages sent in different contexts do not interfere.

 The communicator also specifies the set of processes that share this
-communication context.   This \mpiterm{process group}
+communication context.   This \mpitermdef{process group}
 is ordered and processes are identified by their
 rank within this group.  Thus, the range of valid values for \mpiarg{dest} is
 $0, \ldots, n-1 \cup \{\const{MPI\_PROC\_NULL}\}$, where $n$ is the number of
@@ -386,6 +392,7 @@
 \end{implementors}

 \subsection{Blocking Receive}
+\mpitermtitleindex{receive}
 \label{subsec:pt2pt-basicreceive}


@@ -474,7 +481,7 @@
 with that same communicator (remote process group, for intercommunicators).
 Thus, the range of valid values for the
 \mpiarg{source} argument is
-\{$0,\ldots,n-1\}\cup\{\const{MPI\_ANY\_SOURCE}\},\cup\{\const{MPI\_PROC\_NULL}\}$, where
+\{$0,\ldots,n-1\}\cup\{\const{MPI\_ANY\_SOURCE}\}\cup\{\const{MPI\_PROC\_NULL}\}$, where
 $n$ is the number of processes in this group.

 Note the asymmetry between send and receive operations:  A receive
@@ -503,6 +510,7 @@
 \sectionref{sec:pt2pt-nullproc}.

 \subsection{Return Status}
+\mpitermtitleindex{status}
 \label{subsec:pt2pt-status}

 The source or tag of a received message may not be known if wildcard
@@ -667,6 +675,7 @@
 \mpifunc{MPI\_RECV} operations described in this section.

 \subsection{Passing \texorpdfstring{\const{MPI\_STATUS\_IGNORE}}{MPI\_STATUS\_IGNORE} for Status}
+\mpitermtitleindex{status!ignore}
 \label{sec:pt2pt-status-ignore}

 Every call to \mpifunc{MPI\_RECV} includes a \mpiarg{status} argument, wherein
@@ -738,6 +747,7 @@
 \section{Data Type Matching and Data Conversion}
 \label{sec:pt2pt-typematch}
 \subsection{Type Matching Rules}
+\mpitermtitleindexsubmain{type}{matching}
 \label{subsec:pt2pt-typematch}

 One can think of message transfer as consisting of the following three phases.
@@ -978,6 +988,8 @@


 \subsection{Data Conversion}
+\mpitermtitleindex{data conversion}
+\mpitermtitleindex{conversion}
 \label{subsec:pt2pt-conversion}

 One of the goals of \MPI/ is to support parallel computations across
@@ -1076,9 +1088,10 @@


 \section{Communication Modes}
+\mpitermtitleindexmainsub{communication}{modes}
 \label{sec:pt2pt-modes}
 The send call described in Section~\ref{subsec:pt2pt-basicsend}
-is \mpiterm{blocking}:
+is \mpitermdef{blocking}:
 it does not return until the message data
 and envelope have been safely stored away so that the sender is
 free to modify
@@ -1098,7 +1111,7 @@

 The send call described in Section~\ref{subsec:pt2pt-basicsend}
 uses
-the \mpiterm{standard} communication mode.  In this mode,
+the \mpitermdef{standard} communication mode.  In this mode,
 it is up to \MPI/ to decide whether outgoing
 messages will be buffered.  \MPI/ may
 buffer outgoing messages.  In such a case, the send call may complete
@@ -1130,7 +1143,7 @@

 There are three additional communication modes.

-A \mpiterm{buffered} mode send operation can be started whether or not a
+A \mpitermdef{buffered} mode send operation can be started whether or not a
 matching receive has been posted.
 It may complete before a matching receive is posted.  However, unlike
 the standard send, this operation is \mpiterm{local}, and its
@@ -1142,7 +1155,7 @@
 Buffer allocation by the user may be required for the buffered mode to be
 effective.

-A send that uses the \mpiterm{synchronous} mode can be started whether or
+A send that uses the \mpitermdef{synchronous} mode can be started whether or
 not a matching receive was posted.  However, the send will complete
 successfully only if a matching receive is posted, and the
 receive operation has started to receive the message sent by the
@@ -1158,7 +1171,7 @@
 at either end before both processes rendezvous at the
 communication.  A send executed in this mode is \mpiterm{non-local}.

-A send that uses the \mpiterm{ready} communication mode
+A send that uses the \mpitermdef{ready} communication mode
 may be started \emph{only} if the matching receive is already posted.
 Otherwise, the operation is erroneous and its outcome is undefined.
 On some systems, this allows the removal of a hand-shake
@@ -1307,6 +1320,7 @@


 \section{Semantics of Point-to-Point Communication}
+\mpitermtitleindex{semantics!point-to-point communication}
 \label{sec:pt2pt-semantics}

 A valid \MPI/ implementation guarantees certain general properties of
@@ -1415,7 +1429,7 @@

 \paragraph*{Fairness}

-\MPI/ makes no guarantee of \emph{fairness} in the handling of
+\MPI/ makes no guarantee of \mpiterm{fairness} in the handling of
 communication.  Suppose that a send is posted.  Then it is possible
 that the destination process repeatedly posts a receive that matches this
 send, yet the message is never received, because it is each time overtaken by
@@ -1602,6 +1616,7 @@
 \end{users}

 \section{Buffer Allocation and Usage}
+\mpitermtitleindex{buffer allocation}
 \label{sec:pt2pt-buffer}

 A user may specify a buffer to be used for buffering messages sent in buffered
@@ -1783,6 +1798,7 @@
 \end{itemize}

 \section{Nonblocking Communication}
+\mpitermtitleindex{nonblocking}
 \label{sec:pt2pt-nonblock}

 One can improve performance on many systems by overlapping
@@ -1790,23 +1806,23 @@
 where communication can be executed autonomously by an intelligent
 communication controller.  Light-weight threads are one mechanism for
 achieving such overlap.  An alternative mechanism that often leads to
-better performance is to use \mpiterm{nonblocking communication}.  A
-nonblocking \mpiterm{send start} call initiates the send operation, but does not
+better performance is to use \mpitermdefni{nonblocking communication}\mpitermdefindex{nonblocking!communication}.  A
+nonblocking \mpitermdefni{send start}\mpitermdefindex{send!start} call initiates the send operation, but does not
 complete it.  The send start call
 can
 return before the message was copied out of the send buffer.
-A separate \mpiterm{send complete}
+A separate \mpitermdefni{send complete}\mpitermdefindex{send!complete}
 call is needed to complete the communication, i.e., to verify that the
 data has been copied out of the send buffer.  With
 suitable hardware, the transfer of data out of the sender memory
 may proceed concurrently with computations done at the sender after
 the send was initiated and before it completed.
-Similarly, a nonblocking \mpiterm{receive start call} initiates the receive
+Similarly, a nonblocking \mpitermdefni{receive start call}\mpitermdefindex{receive!start call} initiates the receive
 operation, but does not complete it.  The call
 can
 return before
-a message is stored into the receive buffer.  A separate \mpiterm{receive
-complete} call
+a message is stored into the receive buffer.  A separate
+\mpitermdefni{receive complete}\mpitermdefindex{receive!complete} call
 is needed to complete the receive operation and verify that the data has
 been received into the receive buffer.
 With suitable hardware, the transfer of data into the receiver memory
@@ -1820,7 +1836,7 @@
 \mpiterm{ready}.  These carry
 the same meaning.
 Sends of all modes, \mpiterm{ready} excepted, can be started whether a matching
-receive has been posted or not; a nonblocking \mpiterm{ready}
+receive has been posted or not; a nonblocking \mpitermdefni{ready}\mpitermdefindex{ready!nonblocking}
 send can be started only if
 a matching receive is posted.   In all cases, the send start call
 is local: it returns immediately, irrespective of the
@@ -1836,7 +1852,7 @@
 send buffer.
 It may carry additional meaning, depending on the send mode.

-If the send mode is \mpiterm{synchronous}, then the
+If the send mode is \mpitermdefni{synchronous}\mpitermdefindex{synchronous!nonblocking}, then the
 send can complete only if a matching receive has started.  That
 is, a receive has
 been posted, and has been matched with the send.  In this case,
@@ -1846,13 +1862,13 @@
 ``knows'' the transfer will complete, but before the receiver ``knows'' the
 transfer will complete.)

-If the send mode is \mpiterm{buffered} then the
+If the send mode is \mpitermdefni{buffered}\mpitermdefindex{buffered!nonblocking} then the
 message must be buffered if there is no pending receive.  In this case,
 the send-complete
 call is local, and must succeed irrespective of the status of a matching
 receive.

-If the send mode is \mpiterm{standard} then the send-complete call may
+If the send mode is \mpitermdefni{standard}\mpitermdefindex{standard!nonblocking} then the send-complete call may
 return before a matching receive
 is posted,
 if the message is buffered.  On the other hand, the
@@ -1892,9 +1908,10 @@
 \end{users}

 \subsection{Communication Request Objects}
+\mpitermtitleindexmainsub{nonblocking}{request objects}
 \label{subsec:pt2pt-commobject}

-Nonblocking communications use opaque \mpiterm{request} objects to
+Nonblocking communications use opaque \mpitermdefni{request} objects to
 identify communication operations and match the operation that
 initiates the communication with the operation that terminates it.
 These are system objects that are accessed via a handle.
@@ -1906,12 +1923,13 @@
 information about the status of the pending communication operation.

 \subsection{Communication Initiation}
+\mpitermtitleindexmainsub{nonblocking}{initiation}
 \label{subsec:pt2pt-commstart}

 We use the same naming conventions as for blocking communication: a
 prefix of \mpicode{B}, \mpicode{S}, or \mpicode{R} is used for
-\mpiterm{buffered}, \mpiterm{synchronous} or \mpiterm{ready} mode.
-In addition a prefix of \mpicode{I} (for \mpiterm{immediate}) indicates
+\mpitermdef{buffered}, \mpitermdef{synchronous} or \mpitermdef{ready} mode.
+In addition a prefix of \mpicode{I} (for \mpitermdef{immediate}) indicates
 that the call is nonblocking.

 \begin{funcdef}{MPI\_ISEND(buf, count, datatype, dest, tag, comm, request)}
@@ -2067,6 +2085,7 @@


 \subsection{Communication Completion}
+\mpitermtitleindexmainsub{nonblocking}{completion}
 \label{subsec:pt2pt-commend}

 The functions \mpifunc{MPI\_WAIT} and \mpifunc{MPI\_TEST} are used to complete a
@@ -2077,7 +2096,7 @@
 of the send buffer unchanged). It does not indicate that the
 message has been received,
 rather, it may have been buffered by the communication
-subsystem.  However, if a \mpiterm{synchronous}
+subsystem.  However, if a \mpitermdef{synchronous}
 mode send was used, the completion of the
 send operation indicates that a matching receive was initiated, and that the
 message will eventually be received by this matching receive.
@@ -2089,16 +2108,16 @@
 course, that the send was initiated).

 We shall use the following terminology:
-A \mpiterm{null} handle is a handle with
+A \mpitermdef{null handle} is a handle with
 value\flushline
 \const{MPI\_REQUEST\_NULL}.
 A persistent
-request and the handle to it are \mpiterm{inactive}
+request and the handle to it are \mpitermdef{inactive}
 if the request is not associated with any ongoing
 communication (see \sectionref{sec:pt2pt-persistent}).
-A handle is \mpiterm{active} if it is neither null nor inactive.
+A handle is \mpitermdef{active} if it is neither null nor inactive.
 An
-\mpiterm{empty} status is a status which is set to return \mpiarg{tag =}
+\mpitermdef{empty} status is a status which is set to return \mpiarg{tag =}
 \const{MPI\_ANY\_TAG}, \mpiarg{source =} \const{MPI\_ANY\_SOURCE}, \mpiarg{error =}
 \const{MPI\_SUCCESS},  and is also internally configured so that calls to
 \mpifunc{MPI\_GET\_COUNT}, \mpifunc{MPI\_GET\_ELEMENTS}, and \mpifunc{MPI\_GET\_ELEMENTS\_X} return
@@ -2333,6 +2352,7 @@
 \end{example}

 \subsection{Semantics of Nonblocking Communications}
+\mpitermtitleindex{semantics!nonblocking communications}
 \label{subsec:pt2pt-semantics}


@@ -2437,6 +2457,8 @@
 send.

 \subsection{Multiple Completions}
+\mpitermtitleindex{multiple completions}
+\mpitermtitleindex{completion!multiple}
 \label{subsec:pt2pt-multiple}

 It is convenient to be able to wait for the completion of any, some, or all the
@@ -2677,7 +2699,7 @@
 \mpibind{MPI\_Waitsome(int~incount, MPI\_Request~array\_of\_requests[], int~*outcount, int~array\_of\_indices[], MPI\_Status~array\_of\_statuses[])}

 \mpifnewbind{MPI\_Waitsome(incount, array\_of\_requests, outcount, array\_of\_indices, array\_of\_statuses, ierror) \fargs INTEGER, INTENT(IN) :: incount \\ TYPE(MPI\_Request), INTENT(INOUT) :: array\_of\_requests(incount) \\ INTEGER, INTENT(OUT) :: outcount, array\_of\_indices(*) \\ TYPE(MPI\_Status) :: array\_of\_statuses(*) \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
-\mpifbind{MPI\_WAITSOME(INCOUNT, ARRAY\_OF\_REQUESTS, OUTCOUNT, ARRAY\_OF\_INDICES,\\\ \ \ \ ARRAY\_OF\_STATUSES, IERROR)\fargs INTEGER INCOUNT, ARRAY\_OF\_REQUESTS(*), OUTCOUNT, ARRAY\_OF\_INDICES(*), ARRAY\_OF\_STATUSES(MPI\_STATUS\_SIZE,*), IERROR}
+\mpifbind{MPI\_WAITSOME(INCOUNT, ARRAY\_OF\_REQUESTS, OUTCOUNT, ARRAY\_OF\_INDICES,\\\ \ \ \ ARRAY\_OF\_STATUSES, IERROR)\fargs INTEGER INCOUNT, ARRAY\_OF\_REQUESTS(*), OUTCOUNT, ARRAY\_OF\_INDICES(*),\\\ \ \ \ ARRAY\_OF\_STATUSES(MPI\_STATUS\_SIZE,*), IERROR}
 \mpicppemptybind{MPI::Request::Waitsome(int~incount, MPI::Request~array\_of\_requests[], int~array\_of\_indices[], MPI::Status~array\_of\_statuses[])}{static int}
 \mpicppemptybind{MPI::Request::Waitsome(int~incount, MPI::Request~array\_of\_requests[], int~array\_of\_indices[])}{static int}

@@ -2748,7 +2770,7 @@
 \mpifunc{MPI\_WAITSOME} will
 block until a communication completes, if it was
 passed a list that contains at least one active handle.  Both calls fulfill a
-\mpiterm{fairness} requirement:  If a request for a receive repeatedly
+\mpitermdef{fairness} requirement:  If a request for a receive repeatedly
 appears in a list of requests passed to \mpifunc{MPI\_WAITSOME} or
 \mpifunc{MPI\_TESTSOME}, and a matching send has been posted, then the receive
 will eventually succeed, unless the send is satisfied by another receive; and
@@ -2867,6 +2889,7 @@


 \subsection{Non-destructive Test of \texorpdfstring{\mpiarg{status}}{status}}
+\mpitermtitleindex{status!test}
 \label{subsec:pt2pt-teststatus}

 This call is useful for accessing the information associated with a
@@ -2921,6 +2944,7 @@
 gracefully.

 \subsection{Probe}
+\mpitermtitleindex{probe}

 \begin{funcdef}{MPI\_IPROBE(source, tag, comm, flag, status)}
 \funcarg{\IN}{source}{rank of source or \const{MPI\_ANY\_SOURCE} (integer)}
@@ -3135,6 +3159,8 @@
 \end{implementors}

 \subsection{Matching Probe}
+\mpitermtitleindex{matching probe}
+\mpitermtitleindex{probe, matching}
 \label{sec:matching-probe}

 The function \mpifunc{MPI\_PROBE} checks for incoming messages without
@@ -3239,6 +3265,7 @@
 \mpifunc{MPI\_PROBE} and \mpifunc{MPI\_IPROBE}.

 \subsection{Matched Receives}
+\mpitermtitleindex{matched receives}
 \label{sec:matched-receive}

 The functions \mpifunc{MPI\_MRECV} and \mpifunc{MPI\_IMRECV} receive
@@ -3335,6 +3362,7 @@
 \end{implementors}

 \subsection{Cancel}
+\mpitermtitleindex{cancel}
 \label{sec:cancel}

 \begin{funcdef}{MPI\_CANCEL(request)}
@@ -3447,12 +3475,13 @@


 \section{Persistent Communication Requests}
+\mpitermtitleindex{persistent communication requests}
 \label{sec:pt2pt-persistent}

 Often a communication with the same argument list is repeatedly
 executed within the inner loop of a parallel computation.  In such a
 situation, it may be possible to optimize the communication by
-binding the list of communication arguments to a \mpiterm{persistent} communication
+binding the list of communication arguments to a \mpitermdefni{persistent} communication
 request once and, then, repeatedly using
 the request to initiate and complete messages.  The
 persistent request thus created can be thought of as a
@@ -3687,9 +3716,9 @@
 rule is followed, then the functions
 described in this section will be invoked
 in a sequence of the form,
-\(
+\[
 \textbf{Create \ (Start \ Complete)$^*$ \ Free}
-\)
+\]
 where
 $*$ indicates zero or more repetitions.
 If the same communication object is used in several concurrent
@@ -3725,9 +3754,10 @@


 \section{Send-Receive}
+\mpitermtitleindex{send-receive}
 \label{sec:pt2pt-sendrecv}

-The \mpiterm{send-receive} operations combine in one call the sending of a
+The \mpitermdefni{send-receive} operations combine in one call the sending of a
 message to one destination and the receiving of another message, from
 another process.  The two (source and destination) are possibly the same.
 A send-receive operation is
@@ -3774,7 +3804,7 @@
 \cdeclindex{MPI\_Status}%
 \mpibind{MPI\_Sendrecv(const~void~*sendbuf, int~sendcount, MPI\_Datatype~sendtype, int~dest, int~sendtag, void~*recvbuf, int~recvcount, MPI\_Datatype~recvtype, int~source, int~recvtag, MPI\_Comm~comm, MPI\_Status~*status)}

-\mpifnewbind{MPI\_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status, ierror) \fargs TYPE(*), DIMENSION(..), INTENT(IN) :: sendbuf \\ TYPE(*), DIMENSION(..) :: recvbuf \\ INTEGER, INTENT(IN) :: sendcount, dest, sendtag, recvcount, source, recvtag \\ TYPE(MPI\_Datatype), INTENT(IN) :: sendtype, recvtype \\ TYPE(MPI\_Comm), INTENT(IN) :: comm \\ TYPE(MPI\_Status) :: status \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
+\mpifnewbind{MPI\_Sendrecv(sendbuf, sendcount, sendtype, dest, sendtag, recvbuf, recvcount, recvtype, source, recvtag, comm, status, ierror) \fargs TYPE(*), DIMENSION(..), INTENT(IN) :: sendbuf \\ TYPE(*), DIMENSION(..) :: recvbuf \\ INTEGER, INTENT(IN) :: sendcount, dest, sendtag, recvcount, source,\\\ \ \ \ recvtag \\ TYPE(MPI\_Datatype), INTENT(IN) :: sendtype, recvtype \\ TYPE(MPI\_Comm), INTENT(IN) :: comm \\ TYPE(MPI\_Status) :: status \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_SENDRECV(SENDBUF, SENDCOUNT, SENDTYPE, DEST, SENDTAG, RECVBUF, RECVCOUNT, RECVTYPE, SOURCE, RECVTAG, COMM, STATUS, IERROR)\fargs <type> SENDBUF(*), RECVBUF(*) \\ INTEGER  SENDCOUNT, SENDTYPE, DEST, SENDTAG, RECVCOUNT, RECVTYPE,\\\ \ \ \ SOURCE, RECVTAG, COMM, STATUS(MPI\_STATUS\_SIZE), IERROR}
 \mpicppemptybind{MPI::Comm::Sendrecv(const void~*sendbuf, int~sendcount, const MPI::Datatype\&~sendtype, int~dest, int~sendtag, void~*recvbuf, int~recvcount, const~MPI::Datatype\&~recvtype, int~source, int~recvtag, MPI::Status\&~status) const}{void}
 \mpicppemptybind{MPI::Comm::Sendrecv(const void~*sendbuf, int~sendcount, const MPI::Datatype\&~sendtype, int~dest, int~sendtag, void~*recvbuf, int~recvcount, const~MPI::Datatype\&~recvtype, int~source, int~recvtag) const}{void}
@@ -3826,6 +3856,7 @@
 \end{implementors}

 \section{Null Processes}
+\mpitermtitleindex{null processes}
 \label{sec:pt2pt-nullproc}

 In many instances, it is convenient to specify a ``dummy'' source or
Index: chap-binding/binding-2.tex
===================================================================
--- chap-binding/binding-2.tex	(revision 2030)
+++ chap-binding/binding-2.tex	(working copy)
@@ -1,8 +1,11 @@
 \chapter{Language Bindings}
+\mpitermtitleindex{language binding}
 \label{sec:binding-2}
 \label{chap:binding-2}

 \section{Fortran Support}
+\mpitermtitleindex{Fortran support}
+\mpitermtitleindex{Fortran -- language binding}

 \subsection{Overview}
 \label{f90:overview}
@@ -124,6 +127,7 @@
 Section~\ref{sec:f90-problems:comparison-with-C} compares the Fortran problems with those in C.

 \subsection{Fortran Support Through the \texorpdfstring{\code{mpi\_f08}}{mpi\_f08} Module}
+\mpitermtitleindex{mpi\_f08 module -- Fortran support}
 \label{f90:mpif08}

 An \MPI/ implementation providing a Fortran interface must
@@ -290,6 +294,7 @@
 \end{rationale}

 \subsection{Fortran Support Through the \texorpdfstring{\code{mpi}}{mpi} Module}
+\mpitermtitleindex{mpi module -- Fortran support}
 \label{f90:extended}

 An \MPI/ implementation providing a Fortran interface must provide a module named \code{mpi} that
@@ -308,8 +313,7 @@
       otherwise unable to ignore the types of choice buffers, then the
       implementation must provide explicit interfaces only for \MPI/
       routines with no choice buffer arguments. See
-      Section \sectionref{sec:f90:different-fortran-versions} on
-      page \pageref{sec:f90:different-fortran-versions} for more
+      \sectionref{sec:f90:different-fortran-versions} for more
       details.
 \item Define all \MPI/ handles as type \ftype{INTEGER}.
 \item Define the derived type \const{MPI\_Status}
@@ -442,6 +446,7 @@
 \end{implementors}

 \subsection{Fortran Support Through the \texorpdfstring{\code{mpif.h}}{mpif.h} Include File}
+\mpitermtitleindex{mpif.h include file -- Fortran support}
 \label{f90:basic}

 The use of the \code{mpif.h} include file is strongly discouraged
@@ -580,7 +585,7 @@

 A Fortran call to an \MPI/ routine shall result in a call to a procedure
 with one of the specific procedure names and calling conventions, as described in
-Table~\ref{tab:specific-fortran-proc-names} on page~\pageref{tab:specific-fortran-proc-names}.
+\namedref{Table}{tab:specific-fortran-proc-names}.
 Case is not significant in the names.

 \begin{table}[tbp]
@@ -621,14 +626,15 @@
 \hline
 \end{tabular}
 \end{center}
-\caption{Specific Fortran procedure names and related calling conventions.
+\caption[Specific Fortran procedure names and related calling conventions]%
+{Specific Fortran procedure names and related calling conventions.
 \mpifunc{MPI\_ISEND} is used as an example.
 For routines without choice buffers, only 1A and 2A apply.}
 \label{tab:specific-fortran-proc-names}
 \end{table}

 Note that for the deprecated routines in
-Section~\ref{sec:deprecated:since20} on page~\pageref{sec:deprecated:since20},
+\sectionref{sec:deprecated:since20},
 which are reported only in
 Annex~\ref{sec:lang:fortran-mpifh-and-mpi-module},
 scheme 2A is utilized in the \texttt{mpi} module and \texttt{mpif.h},
@@ -645,7 +651,7 @@
 The \texttt{mpi} and \texttt{mpi\_f08} modules and the \texttt{mpif.h}
 include file
 will each correspond to exactly one implementation scheme from
-Table~\ref{tab:specific-fortran-proc-names} on page~\pageref{tab:specific-fortran-proc-names}.
+\namedref{Table}{tab:specific-fortran-proc-names}.
 However, the \MPI/ library may contain multiple implementation schemes from
 Table~\ref{tab:specific-fortran-proc-names}.
 \begin{implementors}
@@ -921,8 +927,8 @@
   An implementation that provides a Fortran interface must provide a
   combination of \MPI/ library and module or include file that uses
   the specific procedure names as described in
-  Table~\ref{tab:specific-fortran-proc-names} on
-  page~\pageref{tab:specific-fortran-proc-names} so that the \MPI/
+  \namedref{Table}{tab:specific-fortran-proc-names}
+  so that the \MPI/
   Fortran routines are interceptable as described above.
 \end{implementors}

@@ -2304,6 +2310,7 @@
 is not specified.

 \subsection{Fortran Derived Types}
+\mpitermtitleindex{derived datatype}
 \label{sec:f90-problems:derived-types}

 \MPI/ supports passing Fortran
@@ -2495,6 +2502,7 @@
 \label{sec:f90-problems:code-movements}

 \subsubsection{Nonblocking Operations}
+\mpitermtitleindex{nonblocking!Fortran problems}

 If a variable is local to a Fortran subroutine (i.e., not in
 a module or a \code{COMMON} block), the compiler will assume that it cannot be modified
@@ -2584,6 +2592,7 @@
 communication call, as well as in each parallel file I/O operation.

 \subsubsection{Persistent Operations}
+\mpitermtitleindex{persistent communication requests!Fortran problems}

 With persistent requests, the buffer argument is hidden from the
 \mpifunc{MPI\_START} and \mpifunc{MPI\_STARTALL} calls, i.e., the
@@ -2593,6 +2602,7 @@
 subsection in Section~\ref{sec:f90-problems:code-movements}.

 \subsubsection{One-sided Communication}
+\mpitermtitleindex{one-sided communication!Fortran problems}

 An example with instruction reordering due to register optimization can be found
 in \sectionref{sec:onesided-optimizations}.
@@ -2604,7 +2614,7 @@
 etc., that hides the actual
 variables involved in the communication.
 \consti{MPI\_BOTTOM} with an
-\type{MPI\_Datatype} containing absolute addresses is one example.  Creating
+\type{MPI\_Datatype} containing \mpiterm{absolute addresses}\mpitermindex{addresses!absolute} is one example.  Creating
 a datatype which uses one variable as an anchor and brings along
 others by using \mpifunc{MPI\_GET\_ADDRESS} to determine their offsets from
 the anchor is another. The anchor variable would be the only one
@@ -2756,6 +2766,7 @@
 code movement problems in \MPI/ applications.

 \subsubsection{The Fortran ASYNCHRONOUS Attribute}
+\mpitermtitleindex{ASYNCHRONOUS -- Fortran attribute}
 \label{sec:f90-problems:asynchronous}

 Declaring an actual buffer argument with the \ftype{ASYNCHRONOUS}
@@ -2914,10 +2925,12 @@
    Using \mpifunc{MPI\_GET} instead of \mpifunc{MPI\_PUT},
    the same calls to \mpifunc{MPI\_F\_SYNC\_REG} are necessary.

+%% Must use \rm before \textbf to get bold in roman fontfamily, since
+%% otherwise, textbf will fail to find bold for \tt.
 {\tt%%ALLOWLATEX%
 \begin{tabbing}
 \rule{3mm}{0mm}\=\rule{60mm}{0mm}\=\kill
-\>\textbf{Source of Process 1} \>\textbf{Source of Process 2}\\[2pt]
+\>{\rm\textbf{Source of Process 1}} \>{\rm\textbf{Source of Process 2}}\\[2pt]
 \>bbbb = 777                   \> buff = 999             \\
 \>                             \> call MPI\_F\_SYNC\_REG(buff) \\
 \>call MPI\_WIN\_FENCE         \> call MPI\_WIN\_FENCE   \\
@@ -2973,6 +2986,8 @@
 rules specified in \sectionref{sec:f90:requirements}.

 \subsubsection{Module Variables and COMMON Blocks}
+\mpitermtitleindex{module variables}
+\mpitermtitleindex{COMMON blocks}

 An alternative to the previously mentioned methods is to put the buffer or variable into a module or a
 common block and access it through a \code{USE} or \code{COMMON} statement in each
@@ -3361,6 +3376,7 @@


 \section{Language Interoperability}
+\mpitermtitleindex{language binding!interoperability}
 \label{sec:misc-lang-interop}

 \subsection{Introduction}
@@ -3491,6 +3507,7 @@
 \end{implementors}

 \subsection{Transfer of Handles}
+\mpitermtitleindex{handles}
 \label{sec:misc-handleconvert}

 Handles are passed between
@@ -3656,6 +3673,7 @@
 \end{rationale}

 \subsection{Status}
+\mpitermtitleindex{status}
 \label{sec:conversion:status}

 The following two procedures are provided in C to convert from a
@@ -3665,6 +3683,7 @@
 The conversion occurs on all the information in status, including that which
 is hidden.  That is, no status information is lost in the conversion.

+\medskip%ALLOWLATEX%
 \cdeclindex{MPI\_Status}%
 \mpiemptybindidx{MPI\_Status\_f2c(const MPI\_Fint *f\_status, MPI\_Status *c\_status)}{int}{MPI\_STATUS\_F2C}

@@ -3697,6 +3716,7 @@

 To do the conversion in the other direction, we have the following:

+\medskip%ALLOWLATEX%
 \cdeclindex{MPI\_Status}%
 \mpiemptybindidx{MPI\_Status\_c2f(const MPI\_Status *c\_status, MPI\_Fint *f\_status)}{int}{MPI\_STATUS\_C2F}

@@ -3737,14 +3757,16 @@
 \label{fig:fortran:status-conversion-triangle}
 \end{figure}

+\medskip%ALLOWLATEX%
 \cdeclindex{MPI\_Status}%
-\mpiemptybindidx{MPI\_Status\_f082c(const MPI\_F08\_status *f08\_status, MPI\_Status *c\_status)}{int}{MPI\_STATUS\_F082C}
+\mpiemptybindidx{MPI\_Status\_f082c(const~MPI\_F08\_status~*f08\_status, MPI\_Status~*c\_status)}{int}{MPI\_STATUS\_F082C}

 This C routine converts a Fortran \code{mpi\_f08} \ftype{TYPE(MPI\_Status)}\cdeclindex{MPI\_Status}
 into a C \ctype{MPI\_Status}\cdeclindex{MPI\_Status}.

+\medskip%ALLOWLATEX%
 \cdeclindex{MPI\_Status}%
-\mpiemptybindidx{MPI\_Status\_c2f08(const MPI\_Status *c\_status, MPI\_F08\_status *f08\_status)}{int}{MPI\_STATUS\_C2F08}
+\mpiemptybindidx{MPI\_Status\_c2f08(const~MPI\_Status~*c\_status, MPI\_F08\_status~*f08\_status)}{int}{MPI\_STATUS\_C2F08}

 This C routine converts a C \ctype{MPI\_Status}\cdeclindex{MPI\_Status}
 into a Fortran \code{mpi\_f08} \ftype{TYPE(MPI\_Status)}\cdeclindex{MPI\_Status}.
@@ -3808,6 +3830,7 @@
 % end of removal

 \subsection{\MPI/ Opaque Objects}
+\mpitermtitleindex{opaque objects}
 \label{subsec:mpiopaqueobjects}

 Unless said otherwise, opaque objects are ``the same'' in all languages:
@@ -3821,6 +3844,7 @@
 \MPI/ object.

 \subsubsection{Datatypes}
+\mpitermtitleindex{datatypes}
 \label{sec:misc-datatypes}

 Datatypes encode the same information in all languages.  E.g., a
@@ -3977,6 +4001,7 @@
 %another language.

 \subsubsection{Callback Functions}
+\mpitermtitleindex{callback functions!language interoperability}

 \MPI/ calls may associate callback functions with \MPI/
 objects: error handlers are associated with communicators and files, attribute copy
@@ -4024,6 +4049,7 @@
 \end{users}

 \subsubsection{Error Handlers}
+\mpitermtitleindex{error handling!error handlers}

 \begin{implementors}
 Error handlers, have,
@@ -4034,6 +4060,7 @@
 \end{implementors}

 \subsubsection{Reduce Operations}
+\mpitermtitleindex{reduction operations}

 All predefined named and unnamed datatypes as
 listed in \sectionref{coll-predefined-op} can be used in the listed
@@ -4048,6 +4075,7 @@
 \end{users}

 \subsection{Attributes}
+\mpitermtitleindex{attribute}
 \label{sec:misc-attr}


@@ -4431,6 +4459,7 @@
 \end{implementors}

 \subsection{Extra-State}
+\mpitermtitleindex{extra-state}

 Extra-state should not be modified by the copy or delete callback
 functions.  (This is obvious from the C binding, but not obvious from the
@@ -4446,6 +4475,7 @@


 \subsection{Constants}
+\mpitermtitleindex{constants}
 \label{sec:misc-constants}

 \MPI/ constants have the same value in all languages,
@@ -4514,6 +4544,7 @@
 %}

 \subsection{Interlanguage Communication}
+\mpitermtitleindex{interlanguage communication}
 \label{subsec:interlanguage-communication}
 The type matching rules for communication in \MPI/ are not changed:
 the datatype specification for each item sent should match,
Index: chap-appLang/appLang-Const.tex
===================================================================
--- chap-appLang/appLang-Const.tex	(revision 2030)
+++ chap-appLang/appLang-Const.tex	(working copy)
@@ -7,6 +7,7 @@
 % Last chapter and first Annex should not have same title, therefore title of the Annex changed:
 % \chapter{Language Binding}
 \chapter{Language Bindings Summary}
+\mpitermtitleindex{language binding!summary}
 \label{sec:lang}
 \label{chap:lang}
 \label{chap:binding}
@@ -22,6 +23,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 \subsection{Defined Constants}
+\mpitermtitleindex{constants}
 \label{subsec:annexa-const}

 The C and Fortran names are listed below.
@@ -1030,6 +1032,7 @@
 \end{center}

 \subsection{Types}
+\mpitermtitleindex{types}
 \label{subsec:annexa-type} % Sect. A.1.2 p. 499 newlabel
 The following are defined C type definitions, included in the file
 \code{mpi.h}.
@@ -1084,6 +1087,8 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 \subsection{Prototype Definitions}
+\mpitermtitleindex{prototype definitions}
+\mpitermtitleindex{callback functions!prototype definitions}
 \label{subsec:annexa-prototypes} % Sect. A.1.3 p.500 newlabel

 %
@@ -1382,6 +1387,8 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 \subsection{Deprecated Prototype Definitions}
+\mpitermtitleindex{prototype definitions!deprecated}
+\mpitermtitleindex{callback functions!prototype definitions!deprecated}

 The following are defined C typedefs for deprecated user-defined functions,
 also included in the file \code{mpi.h}.
@@ -1428,6 +1435,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Info Keys}
+\mpitermtitleindex{info object!keys}

 The following info keys are reserved. They are strings. \\[3pt]
 \infoskip{access\_style} \\
@@ -1463,6 +1471,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Info Values}
+\mpitermtitleindex{info object!values}

 The following info values are reserved. They are strings. \\[3pt]
 \infoskip{false} \\
Index: chap-dynamic/dynamic-2.tex
===================================================================
--- chap-dynamic/dynamic-2.tex	(revision 2030)
+++ chap-dynamic/dynamic-2.tex	(working copy)
@@ -38,6 +38,7 @@
 % Version as of April 27, 1995

 \chapter{Process Creation and Management}
+\mpitermtitleindex{process creation}
 \label{sec:dynamic-2}
 \label{chap:dynamic-2}

@@ -141,6 +142,7 @@
 other.

 \subsection{Starting Processes}
+\mpitermtitleindex{starting processes}

 \MPI/ applications may start new processes through an interface to an external process manager.

@@ -270,6 +272,7 @@
 a process may belong to several groups.

 \subsection{Starting Processes and Establishing Communication}
+\mpitermtitleindex{starting processes}


 The following routine starts a number of \MPI/ processes and
@@ -960,6 +963,7 @@


 \section{Establishing Communication}
+\mpitermtitleindex{establishing communication}
 \label{sec:client-server}


@@ -1004,6 +1008,8 @@
 \end{users}

 \subsection{Names, Addresses, Ports,  and All That}
+\mpitermtitleindex{names}
+\mpitermtitleindex{ports}

 Almost all of the complexity in \MPI/ client/server routines
 addresses the question ``how does the client find out how
@@ -1260,6 +1266,7 @@


 \subsection{Name Publishing}
+\mpitermtitleindex{names!name publishing}


 The routines in this section provide a mechanism for
@@ -1610,6 +1617,7 @@
 \section{Other Functionality}

 \subsection{Universe Size}
+\mpitermtitleindex{universe size}
 \label{subsec:universesize}

 Many ``dynamic'' \MPI/ applications are expected to exist in a static
@@ -1675,6 +1683,7 @@


 \subsection{Singleton \texorpdfstring{\mpifunc{MPI\_INIT}}{MPI\_INIT}}
+\mpitermtitleindex{singleton init}
 \label{subsec:singleton}

 A high-quality implementation will allow any
@@ -1797,7 +1806,7 @@
 in the child do not affect the parent, or vice-versa.

 \begin{itemize}
-\item Two processes are \mpiterm{connected} if there is a
+\item Two processes are \mpitermdef{connected} if there is a
 communication path (direct or indirect) between them. More precisely:
   \begin{enumerate}
   \item Two processes are connected if
@@ -1813,7 +1822,7 @@
   \end{enumerate}
   \item If A is connected to B and B to C, then A is connected to C.
   \end{enumerate}
-\item Two processes are \mpiterm{disconnected} (also \mpiterm{independent})
+\item Two processes are \mpitermdef{disconnected} (also \mpitermdef{independent})
 if they are not connected.
 \item By the above definitions, connectivity is a transitive
 property, and divides the universe of \MPI/ processes into
Index: chap-misc/misc-2.tex
===================================================================
--- chap-misc/misc-2.tex	(revision 2030)
+++ chap-misc/misc-2.tex	(working copy)
@@ -1,6 +1,7 @@

 % \chapter{Miscellany}
 \chapter{The \texorpdfstring{\mpiarg{Info}}{Info} Object}
+\mpitermtitleindex{info object}
 \label{sec:misc-2}
 \label{chap:misc-2}

Index: chap-ei/ei-2.tex
===================================================================
--- chap-ei/ei-2.tex	(revision 2030)
+++ chap-ei/ei-2.tex	(working copy)
@@ -8,7 +8,7 @@
 \label{sec:ei-intro}

 This chapter begins with calls used to
-create \mpiterm{generalized requests},
+create \mpitermdef{generalized requests},
 which allow users
 to create new nonblocking
 operations with an interface similar to what is present in \mpi/.
@@ -31,6 +31,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 \section{Generalized Requests}
+\mpitermtitleindex{generalized requests}
 \label{sec:ei-gr}

 The goal of
@@ -464,6 +465,7 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Associating Information with Status}
+\mpitermtitleindex{status!associating information}
 \label{sec:ei-status}

 \MPI/ supports several different types of requests besides those for
@@ -613,24 +615,24 @@

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{\texorpdfstring{\MPI/}{MPI} and Threads}
+\mpitermtitleindex{threads}
 \label{sec:ei-threads}

 This section specifies the interaction between \MPI/ calls and threads.
 The section lists minimal requirements for
-\mpiterm{thread compliant}  \MPI/ implementations
+\mpitermdef{thread compliant}  \MPI/ implementations
 and defines functions that can
 be used for initializing the thread environment.
 \MPI/ may be implemented in environments where threads
 are not supported or perform poorly.  Therefore, \MPI/ implementations are not required to be thread compliant as defined in this section.
+Regardless of whether or not the \MPI/ implementation is thread compliant,
 \mpifunc{MPI\_INITIALIZED}, \mpifunc{MPI\_FINALIZED},
 \mpifunc{MPI\_QUERY\_THREAD}, \mpifunc{MPI\_IS\_THREAD\_MAIN},
 \mpifunc{MPI\_GET\_VERSION} and \mpifunc{MPI\_GET\_LIBRARY\_VERSION}
-are exceptions to this rule and must always be thread-safe.  When a
+must always be thread-safe.  When a
 thread is executing one of these routines, if another concurrently
 running thread also makes an \MPI/ call, the outcome will be as if the
 calls executed in some order.
-Implementations that do not support threads are not required to support
-the calling of these functions from threads.

 This section
 generally assumes a thread package similar to
@@ -732,7 +734,7 @@

 The call to \mpifunc{MPI\_FINALIZE} should occur on the same thread
 that
-initialized \MPI/. We call this thread the \mpiterm{main
+initialized \MPI/. We call this thread the \mpitermdef{main
 thread}.  The call should occur only after all process threads
 have completed their \MPI/ calls, and have no pending communications
 or I/O operations.
@@ -935,7 +937,7 @@
 in \mpiarg{provided} the highest supported level.


-A \mpiterm{thread compliant} \MPI/ implementation will be able to return
+A \mpitermdef{thread compliant} \MPI/ implementation will be able to return
 \mpiarg{provided}\flushline
 \mpiarg{ = MPI\_THREAD\_MULTIPLE}.
 Such an implementation may always return
Index: chap-datatypes/datatypes.tex
===================================================================
--- chap-datatypes/datatypes.tex	(revision 2030)
+++ chap-datatypes/datatypes.tex	(working copy)
@@ -1,4 +1,5 @@
 \chapter{Datatypes}
+\mpitermtitleindex{datatypes}
 \label{chap:datatypes}

 Basic datatypes were introduced
@@ -15,6 +16,7 @@
 messages.

 \section{Derived Datatypes}
+\mpitermtitleindex{derived datatype}
 \label{sec:pt2pt-datatype}

 Up to here, all point to point communications have involved only
@@ -53,7 +55,7 @@
 in this section.  These methods of constructing derived datatypes can
 be applied recursively.

-A \mpiterm{general datatype} is an opaque object that specifies two
+A \mpitermdef{general datatype} is an opaque object that specifies two
 things:
 \begin{itemize}
 \item
@@ -66,8 +68,8 @@
 in increasing order. Therefore, the order of items need not
 coincide with their order in store, and an item may appear more than
 once.
-We call such a pair of sequences (or sequence of pairs) a \mpiterm{type map}.
-The sequence of basic datatypes (displacements ignored) is the \mpiterm{type
+We call such a pair of sequences (or sequence of pairs) a \mpitermdef{type map}.
+The sequence of basic datatypes (displacements ignored) is the \mpitermdef{type
 signature} of the datatype.

 Let
@@ -116,7 +118,7 @@
 map $\{ (\ctype{int}, 0) \}$, with one entry of type \ctype{int} and
 displacement zero.  The other basic datatypes are similar.

-The \mpiterm{extent} of a datatype is defined to
+The \mpitermdefni{extent}\mpitermdefindex{extent of datatypes} of a datatype is defined to
 be the span from the first byte to the last byte occupied by entries in this
 datatype, rounded up to satisfy alignment requirements.
 That is, if
@@ -141,7 +143,7 @@
 used by the compiler in common blocks, \ftype{SEQUENCE} derived types,
 \ftype{BIND(C)} derived types,
 or derived types that are neither \ftype{SEQUENCE} nor \ftype{BIND(C)}.
-The complete definition of \mpiterm{extent} is given
+The complete definition of \mpitermdefni{extent}\mpitermdefindex{extent of datatypes} is given
 by Equation~\ref{soft-lb-ub-definition} \sectionref{sec:pt2pt-datatype}.


@@ -239,12 +241,14 @@
 the datatype returned by \mpiarg{newtype} is
 \begin{displaymath}
 \{ (\ctype{double}, 0), (\ctype{char}, 8), (\ctype{double}, 16),
-   (\ctype{char}, 24), (\ctype{double}, 32), (\ctype{char}, 40) \} ;
+   (\ctype{char}, 24), (\ctype{double}, 32), (\ctype{char}, 40) \};
 \end{displaymath}
 i.e., alternating \ctype{double} and \ctype{char} elements, with displacements
 $0, 8, 16, 24, 32, 40$.
 \end{example}

+\medskip%ALLOWLATEX%
+
 In general,
 assume that the type map of \mpiarg{oldtype} is
 \begin{displaymath}
@@ -254,11 +258,13 @@
 Then \mpiarg{newtype} has a type map with $\mpicode{count} \cdot \mpicode{n}$
 entries defined by:
 \begin{displaymath}
-\{ (type_0, disp_0), \ldots , (type_{n-1}, disp_{n-1}), (type_0, disp_0
-+ex), \ldots ,(type_{n-1}, disp_{n-1} + ex) ,
+% The \hskip is to keep the equation within the text margins
+\hskip-1em\{ (type_0, disp_0), \ldots , (type_{n-1}, disp_{n-1}), (type_0, disp_0
++ex), \ldots ,(type_{n-1}, disp_{n-1} + ex),\\
 \end{displaymath}
+% This is *not* the way to display a multiline equation - FIXME
 \begin{displaymath}
-\ldots,(type_0, disp_0 +ex \cdot(\mpicode{count}-1) ), \ldots ,
+\hskip-1em\ldots,(type_0, disp_0 +ex \cdot(\mpicode{count}-1) ), \ldots ,
 (type_{n-1} , disp_{n-1} + ex \cdot (\mpicode{count}-1)) \} .
 \end{displaymath}

@@ -483,7 +489,7 @@

 \mpibind{MPI\_Type\_indexed(int~count, const~int~array\_of\_blocklengths[], const~int~array\_of\_displacements[], MPI\_Datatype~oldtype, MPI\_Datatype~*newtype)}

-\mpifnewbind{MPI\_Type\_indexed(count, array\_of\_blocklengths, array\_of\_displacements, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, array\_of\_blocklengths(count), array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
+\mpifnewbind{MPI\_Type\_indexed(count, array\_of\_blocklengths, array\_of\_displacements, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, array\_of\_blocklengths(count),\\\ \ \ \ array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_TYPE\_INDEXED(COUNT, ARRAY\_OF\_BLOCKLENGTHS, ARRAY\_OF\_DISPLACEMENTS, OLDTYPE, NEWTYPE, IERROR)\fargs INTEGER COUNT, ARRAY\_OF\_BLOCKLENGTHS(*), ARRAY\_OF\_DISPLACEMENTS(*),\\\ \ \ \ OLDTYPE, NEWTYPE, IERROR}
 \mpicppemptybind{MPI::Datatype::Create\_indexed(int~count, const~int~array\_of\_blocklengths[], const~int~array\_of\_displacements[]) const}{MPI::Datatype}

@@ -584,7 +590,7 @@
 \cdeclindex{MPI\_Aint}%
 \mpibind{MPI\_Type\_create\_hindexed(int~count, const~int~array\_of\_blocklengths[], const~MPI\_Aint~array\_of\_displacements[], MPI\_Datatype~oldtype, MPI\_Datatype~*newtype)}

-\mpifnewbind{MPI\_Type\_create\_hindexed(count, array\_of\_blocklengths, array\_of\_displacements, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, array\_of\_blocklengths(count) \\ INTEGER(KIND=MPI\_ADDRESS\_KIND), INTENT(IN) :: array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
+\mpifnewbind{MPI\_Type\_create\_hindexed(count, array\_of\_blocklengths, array\_of\_displacements, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, array\_of\_blocklengths(count) \\ INTEGER(KIND=MPI\_ADDRESS\_KIND), INTENT(IN) :: \\\ \ \ \ array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_TYPE\_CREATE\_HINDEXED(COUNT, ARRAY\_OF\_BLOCKLENGTHS, ARRAY\_OF\_DISPLACEMENTS, OLDTYPE, NEWTYPE, IERROR)\fargs INTEGER COUNT, ARRAY\_OF\_BLOCKLENGTHS(*), OLDTYPE, NEWTYPE, IERROR\\INTEGER(KIND=MPI\_ADDRESS\_KIND) ARRAY\_OF\_DISPLACEMENTS(*)}

 \mpicppemptybind{MPI::Datatype::Create\_hindexed(int count, const~int~array\_of\_blocklengths[], const~MPI::Aint~array\_of\_displacements[]) const}{MPI::Datatype}
@@ -650,13 +656,9 @@

 \mpibind{MPI\_Type\_create\_indexed\_block(int~count, int~blocklength, const~int~array\_of\_displacements[], MPI\_Datatype~oldtype, MPI\_Datatype~*newtype)}

-\mpifnewbind{MPI\_Type\_create\_indexed\_block(count, blocklength, array\_of\_displacements, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, blocklength, array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
+\mpifnewbind{MPI\_Type\_create\_indexed\_block(count, blocklength, array\_of\_displacements, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, blocklength,\\\ \ \ \ array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_TYPE\_CREATE\_INDEXED\_BLOCK(COUNT, BLOCKLENGTH, ARRAY\_OF\_DISPLACEMENTS, OLDTYPE, NEWTYPE, IERROR) \fargs INTEGER COUNT, BLOCKLENGTH, ARRAY\_OF\_DISPLACEMENTS(*), OLDTYPE,\\\ \ \ \ NEWTYPE, IERROR}

-
-
-
-
 % \mpicppemptybind{MPI::Datatype::Create\_indexed\_block( int~count, int~blocklength, const~int~array\_of\_displacements[])~const}{MPI::Datatype}
 \mpicppemptybind{MPI::Datatype::Create\_indexed\_block(int~count, int~blocklength, const~int~array\_of\_displacements[])~const}{MPI::Datatype}

@@ -677,7 +679,7 @@

 \mpibind{MPI\_Type\_create\_hindexed\_block(int~count, int~blocklength, const~MPI\_Aint~array\_of\_displacements[], MPI\_Datatype~oldtype, MPI\_Datatype~*newtype)}

-\mpifnewbind{MPI\_Type\_create\_hindexed\_block(count, blocklength, array\_of\_displacements, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, blocklength \\ INTEGER(KIND=MPI\_ADDRESS\_KIND), INTENT(IN) :: array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
+\mpifnewbind{MPI\_Type\_create\_hindexed\_block(count, blocklength, array\_of\_displacements, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, blocklength \\ INTEGER(KIND=MPI\_ADDRESS\_KIND), INTENT(IN) :: \\\ \ \ \ array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_TYPE\_CREATE\_HINDEXED\_BLOCK(COUNT, BLOCKLENGTH, ARRAY\_OF\_DISPLACEMENTS, OLDTYPE, NEWTYPE, IERROR)\fargs INTEGER COUNT, BLOCKLENGTH, OLDTYPE, NEWTYPE, IERROR\\INTEGER(KIND=MPI\_ADDRESS\_KIND) ARRAY\_OF\_DISPLACEMENTS(*)}

 \paragraph*{Struct}
@@ -707,7 +709,7 @@
 \cdeclindex{MPI\_Aint}%
 \mpibind{MPI\_Type\_create\_struct(int~count, const~int~array\_of\_blocklengths[], const~MPI\_Aint~array\_of\_displacements[], const~MPI\_Datatype~array\_of\_types[], MPI\_Datatype~*newtype)}

-\mpifnewbind{MPI\_Type\_create\_struct(count, array\_of\_blocklengths, array\_of\_displacements, array\_of\_types, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, array\_of\_blocklengths(count) \\ INTEGER(KIND=MPI\_ADDRESS\_KIND), INTENT(IN) :: array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: array\_of\_types(count) \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
+\mpifnewbind{MPI\_Type\_create\_struct(count, array\_of\_blocklengths, array\_of\_displacements, array\_of\_types, newtype, ierror) \fargs INTEGER, INTENT(IN) :: count, array\_of\_blocklengths(count) \\ INTEGER(KIND=MPI\_ADDRESS\_KIND), INTENT(IN) :: \\\ \ \ \ array\_of\_displacements(count) \\ TYPE(MPI\_Datatype), INTENT(IN) :: array\_of\_types(count) \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
 \mpifbind{MPI\_TYPE\_CREATE\_STRUCT(COUNT, ARRAY\_OF\_BLOCKLENGTHS, ARRAY\_OF\_DISPLACEMENTS, ARRAY\_OF\_TYPES, NEWTYPE, IERROR)\fargs INTEGER COUNT, ARRAY\_OF\_BLOCKLENGTHS(*), ARRAY\_OF\_TYPES(*), NEWTYPE,\\\ \ \ \ IERROR\\ INTEGER(KIND=MPI\_ADDRESS\_KIND) ARRAY\_OF\_DISPLACEMENTS(*)}

 \mpicppemptybind{MPI::Datatype::Create\_struct(int count, const~int~array\_of\_blocklengths[], const~MPI::Aint array\_of\_displacements[], const~MPI::Datatype~array\_of\_types[])}{static MPI::Datatype}
@@ -813,8 +815,8 @@
 % JMS-apr
 \mpibind{MPI\_Type\_create\_subarray(int~ndims, const~int~array\_of\_sizes[], const~int~array\_of\_subsizes[], const~int~array\_of\_starts[], int~order, MPI\_Datatype~oldtype, MPI\_Datatype~*newtype)}
 % JMS-apr
-\mpifnewbind{MPI\_Type\_create\_subarray(ndims, array\_of\_sizes, array\_of\_subsizes, array\_of\_starts, order, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: ndims, array\_of\_sizes(ndims), array\_of\_subsizes(ndims), array\_of\_starts(ndims), order \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
-\mpifbind{MPI\_TYPE\_CREATE\_SUBARRAY(NDIMS, ARRAY\_OF\_SIZES, ARRAY\_OF\_SUBSIZES,\\\ \ \ \ ARRAY\_OF\_STARTS, ORDER, OLDTYPE, NEWTYPE, IERROR)\fargs INTEGER NDIMS, ARRAY\_OF\_SIZES(*), ARRAY\_OF\_SUBSIZES(*), ARRAY\_OF\_STARTS(*), ORDER, OLDTYPE, NEWTYPE, IERROR}
+\mpifnewbind{MPI\_Type\_create\_subarray(ndims, array\_of\_sizes, array\_of\_subsizes, array\_of\_starts, order, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: ndims, array\_of\_sizes(ndims), \\\ \ \ \ array\_of\_subsizes(ndims), array\_of\_starts(ndims), order \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
+\mpifbind{MPI\_TYPE\_CREATE\_SUBARRAY(NDIMS, ARRAY\_OF\_SIZES, ARRAY\_OF\_SUBSIZES,\\\ \ \ \ ARRAY\_OF\_STARTS, ORDER, OLDTYPE, NEWTYPE, IERROR)\fargs INTEGER NDIMS, ARRAY\_OF\_SIZES(*), ARRAY\_OF\_SUBSIZES(*),\\\ \ \ \ ARRAY\_OF\_STARTS(*), ORDER, OLDTYPE, NEWTYPE, IERROR}
 % JMS-apr
 \mpicppemptybind{MPI::Datatype::Create\_subarray(int~ndims, const~int~array\_of\_sizes[], const~int~array\_of\_subsizes[], const~int~array\_of\_starts[], int~order) const}{MPI::Datatype}

@@ -906,14 +908,14 @@
 \mpiarg{order} = \const{MPI\_ORDER\_FORTRAN}, and
 Equation~\ref{eq:subarray-c} defines the recursion step when
 \mpiarg{order} = \const{MPI\_ORDER\_C}.
-These equations use the conceptual datatypes \mpiterm{lb\_marker}
-and \mpiterm{ub\_marker}, see \sectionref{subsec:pt2pt-markers} for details.
+These equations use the conceptual datatypes \mpiublb{lb\_marker}
+and \mpiublb{ub\_marker}, see \sectionref{subsec:pt2pt-markers} for details.

 \begin{eqnarray}
 \lefteqn{\mbox{Subarray}(1,\{size_0\},\{subsize_0\},\{start_0\},}
 \label{eq:subarray-base} \\
 & & \quad \{(type_0,disp_0),(type_1,disp_1),\ldots,(type_{n-1},disp_{n-1})\}) \nonumber \\
-& = & \{(\mpiterm{lb\_marker},0), \nonumber \\
+& = & \{(\mpiublb{lb\_marker},0), \nonumber \\
 & & (type_0,disp_0+start_0 \times ex),\ldots,(type_{n-1},
         disp_{n-1} + start_0 \times ex), \nonumber \\
 & & (type_0,disp_0+(start_0 + 1)\times ex),\ldots,(type_{n-1},
@@ -923,7 +925,7 @@
 \nonumber \\
 & & \hspace{.5in}(type_{n-1},disp_{n-1} + (start_0+subsize_0 - 1) \times ex),
 \nonumber \\
-& & (\mpiterm{ub\_marker}, size_0 \times ex) \} \nonumber \\
+& & (\mpiublb{ub\_marker}, size_0 \times ex) \} \nonumber \\
 & & \nonumber \\
 \lefteqn{\mbox{Subarray}( ndims,
 \{size_0, size_1,\ldots,size_{ndims-1}\},}       \label{eq:subarray-fortran} \\
@@ -1001,8 +1003,8 @@
 % JMS-apr
 \mpibind{MPI\_Type\_create\_darray(int~size, int~rank, int~ndims, const~int~array\_of\_gsizes[], const~int~array\_of\_distribs[], const~int~array\_of\_dargs[], const~int~array\_of\_psizes[], int~order, MPI\_Datatype~oldtype, MPI\_Datatype~*newtype)}
 % JMS-apr
-\mpifnewbind{MPI\_Type\_create\_darray(size, rank, ndims, array\_of\_gsizes, array\_of\_distribs, array\_of\_dargs, array\_of\_psizes, order, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: size, rank, ndims, array\_of\_gsizes(ndims), array\_of\_distribs(ndims), array\_of\_dargs(ndims), array\_of\_psizes(ndims), order \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
-\mpifbind{MPI\_TYPE\_CREATE\_DARRAY(SIZE, RANK, NDIMS, ARRAY\_OF\_GSIZES, ARRAY\_OF\_DISTRIBS, ARRAY\_OF\_DARGS, ARRAY\_OF\_PSIZES, ORDER, OLDTYPE, NEWTYPE, IERROR)\fargs INTEGER SIZE, RANK, NDIMS, ARRAY\_OF\_GSIZES(*), ARRAY\_OF\_DISTRIBS(*),\\\ \ \ \ ARRAY\_OF\_DARGS(*), ARRAY\_OF\_PSIZES(*), ORDER, OLDTYPE, NEWTYPE, IERROR}
+\mpifnewbind{MPI\_Type\_create\_darray(size, rank, ndims, array\_of\_gsizes, array\_of\_distribs, array\_of\_dargs, array\_of\_psizes, order, oldtype, newtype, ierror) \fargs INTEGER, INTENT(IN) :: size, rank, ndims, array\_of\_gsizes(ndims),\\\ \ \ \ array\_of\_distribs(ndims), array\_of\_dargs(ndims),\\\ \ \ \ array\_of\_psizes(ndims), order \\ TYPE(MPI\_Datatype), INTENT(IN) :: oldtype \\ TYPE(MPI\_Datatype), INTENT(OUT) :: newtype \\ INTEGER, OPTIONAL, INTENT(OUT) :: ierror}
+\mpifbind{MPI\_TYPE\_CREATE\_DARRAY(SIZE, RANK, NDIMS, ARRAY\_OF\_GSIZES, ARRAY\_OF\_DISTRIBS, ARRAY\_OF\_DARGS, ARRAY\_OF\_PSIZES, ORDER, OLDTYPE, NEWTYPE, IERROR)\fargs INTEGER SIZE, RANK, NDIMS, ARRAY\_OF\_GSIZES(*), ARRAY\_OF\_DISTRIBS(*),\\\ \ \ \ ARRAY\_OF\_DARGS(*), ARRAY\_OF\_PSIZES(*), ORDER, OLDTYPE, NEWTYPE,\\\ \ \ \ IERROR}
 % JMS-apr
 \mpicppemptybind{MPI::Datatype::Create\_darray(int~size, int~rank, int~ndims, const~int~array\_of\_gsizes[], const~int~array\_of\_distribs[], const~int~array\_of\_dargs[], const~int~array\_of\_psizes[], int~order) const}{MPI::Datatype}

@@ -1175,13 +1177,13 @@
 \end{displaymath}
 where $type_i$ is a predefined \MPI/ datatype, and let $ex$ be the
 extent of \mpiarg{oldtype}.
-The following function uses the conceptual datatypes \mpiterm{lb\_marker}
-and \mpiterm{ub\_marker}, see \sectionref{subsec:pt2pt-markers} for details.
+The following function uses the conceptual datatypes \mpiublb{lb\_marker}
+and \mpiublb{ub\_marker}, see \sectionref{subsec:pt2pt-markers} for details.

 Given the above, the function cyclic() is defined as follows:
 \begin{eqnarray*}
 \lefteqn{\mbox{cyclic}(darg, gsize, r, psize, \mpiarg{oldtype})} \\
-&=& \{ (\mpiterm{lb\_marker}, 0), \\
+&=& \{ (\mpiublb{lb\_marker}, 0), \\
 & &  (type_0, disp_0 + r \times darg \times ex), \ldots , \\
 & & \hspace{.5in}  (type_{n-1}, disp_{n-1} + r \times darg \times ex), \\
 & &  (type_0, disp_0 + (r \times darg + 1) \times ex), \ldots , \\
@@ -1223,7 +1225,7 @@
 & & \hspace{.5in} (type_{n-1}, disp_{n-1} + (r \times darg + darg_{last} - 1)
                 \times ex \\
 & & \hspace{1in} + psize \times darg \times ex \times (count - 1)), \\
-& &   (\mpiterm{ub\_marker}, gsize * ex) \}
+& &   (\mpiublb{ub\_marker}, gsize * ex) \}
 \end{eqnarray*}
 where $count$ is defined by this code fragment:
 %%HEADER
@@ -1305,7 +1307,9 @@
 \label{subsec:pt2pt-addfunc}

 The displacements in a general datatype are relative to some initial buffer
-address.  \mpiterm{Absolute addresses} can be substituted for these
+address.
+\mpitermdefni{Absolute addresses}\mpitermdefindex{absolute addresses}\mpitermdefindex{addresses!absolute}
+can be substituted for these
 displacements: we treat them as displacements relative to ``address
 zero,'' the start of the address space.  This initial address zero is
 indicated by the constant \const{MPI\_BOTTOM}.  Thus, a datatype can
@@ -1318,7 +1322,8 @@
 The address of a location in memory can be found by invoking the
 function\flushline
 \mpifunc{MPI\_GET\_ADDRESS}.
-The relative displacement between two absolute addresses
+The \mpitermdef{relative displacement}\mpitermdefindex{addresses!relative displacement}
+between two absolute addresses
 can be calculated with the function \mpifunc{MPI\_AINT\_DIFF}. A new absolute
 address as sum of an absolute base address and a relative displacement can be
 calculated with the function \mpifunc{MPI\_AINT\_ADD}. To ensure portability,
@@ -1511,6 +1516,8 @@
 value), it is set to \const{MPI\_UNDEFINED}.

 \subsection{Lower-Bound and Upper-Bound Markers}
+\mpitermtitleindex{lower-bound markers}
+\mpitermtitleindex{upper-bound markers}
 \label{subsec:pt2pt-markers}

 It is often convenient to define explicitly the lower bound and upper
@@ -1529,10 +1536,11 @@
 that match these structures.

 To achieve this, we add two additional conceptual datatypes,
-\mpiterm{lb\_marker} and \mpiterm{ub\_marker}, that represent the lower
+\mpitermdef{lb\_marker} and\flushline
+\mpitermdef{ub\_marker}, that represent the lower
 bound and upper
 bound of a datatype.  These conceptual datatypes occupy no space
-($extent(\mpiterm{lb\_marker}) = extent(\mpiterm{ub\_marker}) =0$)
+($extent(\mpiublb{lb\_marker}) = extent(\mpiublb{ub\_marker}) =0$)
 . They do not
 affect the size or count of a datatype, and do not affect the
  content of a message created with this datatype.  However, they do
@@ -1549,43 +1557,42 @@
 creates a new datatype that has an
 extent of 9 (from -3 to 5, 5 included), and contains an integer at
 displacement 0.   This is the datatype defined by the typemap
-\{(\mpiterm{lb\_marker}, -3), (int, 0), (\mpiterm{ub\_marker}, 6)\}.
+\{(\mpiublb{lb\_marker}, -3), (int, 0), (\mpiublb{ub\_marker}, 6)\}.
 If this type is replicated twice by a call to
 \mpifunc{MPI\_TYPE\_CONTIGUOUS(2, type1, type2)} then the newly created
 type can
 be described by the typemap
-\{(\mpiterm{lb\_marker}, -3), (int, 0), (int,9), (\mpiterm{ub\_marker}, 15)\}.
+\{(\mpiublb{lb\_marker}, -3), (int, 0), (int,9), (\mpiublb{ub\_marker}, 15)\}.
 (An entry of type
-\mpiterm{ub\_marker}
-can be deleted if there is another entry of type \mpiterm{ub\_marker} with a
+\mpiublb{ub\_marker}
+can be deleted if there is another entry of type \mpiublb{ub\_marker} with a
 higher
-displacement; an entry of type \mpiterm{lb\_marker} can be deleted if there
+displacement; an entry of type \mpiublb{lb\_marker} can be deleted if there
 is another
-entry of type \mpiterm{lb\_marker} with a lower displacement.)
+entry of type \mpiublb{lb\_marker} with a lower displacement.)
 \end{example}

 In general, if
 \begin{displaymath}
 Typemap = \{ (type_0 , disp_0 ) , \ldots , (type_{n-1} , disp_{n-1}) \} ,
 \end{displaymath}
-then the \mpiterm{lower bound} of $Typemap$ is defined to be
+then the \mpitermdef{lower bound} of $Typemap$ is defined to be
 \[
 lb(Typemap) = \left\{ \begin{array}{ll}
 \min_j disp_j & \parbox{1.5in}{\raggedright if no entry has type
-\mpiterm{lb\_marker}} \\
-\min_j \{ disp_j \ \mbox{such that}\ type_j = \mpiterm{lb\_marker} \} & \mbox{otherwise}
+\mpiublb{lb\_marker}} \\
+\min_j \{ disp_j \ \mbox{such that}\ type_j = \mpiublb{lb\_marker} \} & \mbox{otherwise}
 \end{array}
 \right. \]
 Similarly,
-the \mpiterm{upper bound} of $Typemap$ is defined to be
+the \mpitermdef{upper bound} of $Typemap$ is defined to be
 \[
 ub(Typemap) = \left\{ \begin{array}{ll}
 \max_j(disp_j + sizeof(type_j)) + \epsilon & \parbox{1.4in}{\raggedright if no entry has type
-\mpiterm{ub\_marker}}
-\\ \max_j \{ disp_j \ \mbox{such that}\ type_j = \mpiterm{ub\_marker} \} & \mbox{otherwise}
+\mpiublb{ub\_marker}}
+\\ \max_j \{ disp_j \ \mbox{such that}\ type_j = \mpiublb{ub\_marker} \} & \mbox{otherwise}
 \end{array}
 \right. \]
-
 Then
 \label{eq:pt2pt-extent}
 \[
@@ -1599,7 +1606,7 @@
 or derived types that are neither \ftype{SEQUENCE} nor \ftype{BIND(C)}.

 The formal definitions given for the various datatype constructors
-apply now, with the amended definition of \mpiterm{extent}.
+apply now, with the amended definition of \mpitermdefni{extent}\mpitermdefindex{extent of datatypes}.

 \begin{rationale}
 Before Fortran 2003, \mpifunc{MPI\_TYPE\_CREATE\_STRUCT} could be applied to Fortran common blocks and
@@ -1677,6 +1684,8 @@
 \end{users}

 \subsection{Extent and Bounds of Datatypes}
+\mpitermtitleindex{extent of datatypes}
+\mpitermtitleindex{bounds of datatypes}
 \label{subsec:pt2pt-extent}

 \begin{funcdef}{MPI\_TYPE\_GET\_EXTENT(datatype, lb, extent)}
@@ -1740,7 +1749,8 @@
 identical to \mpiarg{oldtype}, except that the lower bound of this new
 datatype is set to be \mpiarg{lb}, and its upper bound is set to be
 \mpiarg{lb $+$ extent}.
-Any previous \mpiterm{lb} and \mpiterm{ub} markers are erased,
+Any previous \mpitermdefni{lb}\mpitermdefindex{lb\_marker!erased}
+and \mpitermdefni{ub}\mpitermdefindex{ub\_marker!erased} markers are erased,
 and a new pair of lower bound and upper bound markers are put in the
 positions indicated by the \mpiarg{lb} and \mpiarg{extent} arguments.
 This affects the behavior of the datatype when used in communication
@@ -1748,6 +1758,8 @@
 construction of new derived datatypes.

 \subsection{True Extent of Datatypes}
+\mpitermtitleindex{true extent of datatypes}
+\mpitermtitleindex{extent of datatypes!true extent}
 \label{subsec:pt2pt-true-extent}

 Suppose we implement gather
@@ -1804,11 +1816,11 @@
 \]
 Then
 \[
-true\_lb(Typemap) = min_j  \{ disp_j ~:~ type_j \ne \mpiterm{lb\_marker}, \mpiterm{ub\_marker} \},
+true\_lb(Typemap) = min_j  \{ disp_j ~:~ type_j \ne \mpiublb{lb\_marker}, \mpiublb{ub\_marker} \},
 \]
 \[
 true\_ub (Typemap) = max_j \{disp_j + sizeof(type_j) ~:~ type_j \ne
-\mpiterm{lb\_marker}, \mpiterm{ub\_marker}\} ,
+\mpiublb{lb\_marker}, \mpiublb{ub\_marker}\} ,
 \]
 and
 \[
@@ -1830,7 +1842,7 @@
 \subsection{Commit and Free}
 \label{subsec:pt2pt-comfree}

-A datatype object has to be \mpiterm{committed} before it can be used in a
+A datatype object has to be \mpitermdefni{committed}\mpitermdefindex{commit} before it can be used in a
 communication.
 As an argument in datatype constructors, uncommitted and also
 committed datatypes can be used.
@@ -2016,7 +2028,8 @@
 elements, then we must have $k \le n \cdot \mpicode{count}$; the $i \cdot n +
 j$-th element of the message should have a type that matches $type_j$.

-Type matching is defined according to the type signature of
+\mpitermdefni{Type matching}\mpitermdefindex{type matching}\mpitermdefindex{matching!type}
+is defined according to the type signature of
 the corresponding datatypes, that is, the sequence of basic type
 components.  Type matching does not depend on some aspects of the
 datatype definition, such as the displacements (layout in memory) or the
@@ -2194,6 +2207,7 @@
 \end{implementors}

 \subsection{Correct Use of Addresses}
+\mpitermtitleindex{addresses!correct use}
 \label{subsec:pt2pt-segmented}

 Successively declared variables in C or Fortran are not necessarily
@@ -2201,12 +2215,12 @@
 that displacements do not cross from one variable
 to another.  Also, in machines with a segmented address space,
 addresses are not unique and address arithmetic has some peculiar
-properties.   Thus, the use of \mpiterm{addresses},
+properties.   Thus, the use of \mpitermdef{addresses},
 that is, displacements relative to the
 start address \const{MPI\_BOTTOM}, has to be restricted.

 Variables belong
-to the same \mpiterm{sequential storage} if they belong to the same
+to the same \mpitermdef{sequential storage} if they belong to the same
 array,
 to the same \code{COMMON} block in Fortran, or to the same structure in C.
 Valid addresses are defined recursively as follows:
@@ -2548,7 +2562,8 @@

 If combiner is \const{MPI\_COMBINER\_DUP} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &  C &    Fortran location                \\
@@ -2556,13 +2571,14 @@
 oldtype &               d[0] &          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 0, na = 0, nd = 1.

 If combiner is \const{MPI\_COMBINER\_CONTIGUOUS} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &  C &    Fortran location                \\
@@ -2571,13 +2587,14 @@
 oldtype &               d[0] &          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 1, na = 0, nd = 1.

 If combiner is \const{MPI\_COMBINER\_VECTOR} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &  C &    Fortran location                \\
@@ -2588,13 +2605,14 @@
 oldtype &               d[0] &          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 3, na = 0, nd = 1.

 If combiner is \const{MPI\_COMBINER\_HVECTOR} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &  C &    Fortran location                \\
@@ -2605,13 +2623,14 @@
 oldtype &               d[0] &          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 2, na = 1, nd = 1.

 If combiner is \const{MPI\_COMBINER\_INDEXED} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2622,13 +2641,14 @@
 oldtype &                       d[0] &                          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 2*count+1, na = 0, nd = 1.

 If combiner is \const{MPI\_COMBINER\_HINDEXED} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2639,13 +2659,14 @@
 oldtype &                       d[0] &                          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = count+1, na = count, nd = 1.

 If combiner is \const{MPI\_COMBINER\_INDEXED\_BLOCK} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2656,13 +2677,14 @@
 oldtype &                       d[0] &                          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = count+2, na = 0, nd = 1.

 If combiner is \const{MPI\_COMBINER\_HINDEXED\_BLOCK} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2673,13 +2695,14 @@
 oldtype &                       d[0] &                          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 2, na = count, nd = 1.

 If combiner is \const{MPI\_COMBINER\_STRUCT} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2690,13 +2713,14 @@
 array\_of\_types &              d[0] to d[i[0]-1] &             D(1) to D(I(1))                 \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = count+1, na = count, nd = count.

 If combiner is \const{MPI\_COMBINER\_SUBARRAY} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2709,13 +2733,14 @@
 oldtype &                       d[0] &                          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 3*ndims+2, na = 0, nd = 1.

 If combiner is \const{MPI\_COMBINER\_DARRAY} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2731,13 +2756,14 @@
 oldtype &                       d[0] &                          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 4*ndims+4, na = 0, nd = 1.

 If combiner is \const{MPI\_COMBINER\_F90\_REAL} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2746,13 +2772,14 @@
 r &                             i[1] &                          I(2)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 2, na = 0, nd = 0.

 If combiner is \const{MPI\_COMBINER\_F90\_COMPLEX} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2761,13 +2788,14 @@
 r &                             i[1] &                          I(2)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 2, na = 0, nd = 0.

 If combiner is \const{MPI\_COMBINER\_F90\_INTEGER} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2775,13 +2803,14 @@
 r &                             i[0] &                          I(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 1, na = 0, nd = 0.

 If combiner is \const{MPI\_COMBINER\_RESIZED} then

-\begin{centering}
+\smallskip%ALLOWLATEX%
+\begin{center}
 \begin{tabular}{l c c}
 \hline
 Constructor argument &          C &                    Fortran location                \\
@@ -2791,7 +2820,7 @@
 oldtype &                       d[0] &                          D(1)                            \\
 \hline
 \end{tabular}
-\end{centering}
+\end{center}
 \par\noindent
 and ni = 0, na = 2, nd = 1.

@@ -3084,7 +3113,7 @@
 for (i=0; i < 1000; i++)
    if (particle[i].type == 0)
       {
-         for (k=i+1; (k < 1000)&&(particle[k].type == 0) ; k++);
+         for (k=i+1; (k < 1000)&&(particle[k].type == 0); k++);
          zdisp[j] = i;
          zblock[j] = k-i;
          j++;
@@ -3191,7 +3220,7 @@
 for (i=0; i < 1000; i++)
     if (particle[i].type == 0)
         {
-            for (k=i+1; (k < 1000)&&(particle[k].type == 0) ; k++);
+            for (k=i+1; (k < 1000)&&(particle[k].type == 0); k++);
             zdisp[j] = i;
             zblock[j] = k-i;
             j++;
@@ -3347,6 +3376,8 @@


 \section{Pack and Unpack}
+\mpitermtitleindex{pack}
+\mpitermtitleindex{unpack}
 \label{sec:pt2pt-packing}

 Some existing communication libraries provide pack/unpack functions for sending
@@ -3469,9 +3500,9 @@
 message from that buffer.  (It is helpful to think of internal Fortran files or
 \code{sscanf} in C, for a similar function.)

-Several messages can be successively packed into one \mpiterm{packing unit}.
+Several messages can be successively packed into one \mpitermdef{packing unit}.
 This
-is effected by several successive \mpiterm{related} calls to \mpiarg{MPI\_PACK},
+is effected by several successive \mpitermdef{related} calls to \mpiarg{MPI\_PACK},
 where the first
 call provides \mpiarg{position = 0}, and each successive call inputs the value
 of \mpiarg{position} that was output by the previous call, and the same values
@@ -3745,6 +3776,9 @@
 \end{example}

 \section{Canonical \texorpdfstring{\mpifunc{MPI\_PACK}}{MPI\_PACK} and \texorpdfstring{\mpifunc{MPI\_UNPACK}}{MPI\_UNPACK}}
+\mpitermtitleindex{canonical pack and unpack}
+\mpitermtitleindex{pack!canonical}
+\mpitermtitleindex{unpack!canonical}
 \label{canonical_pack}

 These functions read/write data to/from the buffer in the ``external32'' data
Index: chap-deprecated/deprecated.tex
===================================================================
--- chap-deprecated/deprecated.tex	(revision 2030)
+++ chap-deprecated/deprecated.tex	(working copy)
@@ -1,4 +1,5 @@
 \chapter{Deprecated Functions}
+\mpitermtitleindex{deprecated functions}
 \label{chap:deprecated}

 \section{Deprecated since \texorpdfstring{\mpiiidoto/}{MPI-2.0}}
Index: chap-tools/tools-3.tex
===================================================================
--- chap-tools/tools-3.tex	(revision 2030)
+++ chap-tools/tools-3.tex	(working copy)
@@ -3,6 +3,7 @@
 %% BRONIS: Let's use a more general chapter title that is consistent
 %% BRONIS: with other chapter titles like ``Process Creation and Management''
 \chapter{Tool Support}
+\mpitermtitleindex{tool support}
 \label{sec:tools}
 \label{chap:tools}

Index: chap-tools/prof.tex
===================================================================
--- chap-tools/prof.tex	(revision 2030)
+++ chap-tools/prof.tex	(working copy)
@@ -1,4 +1,5 @@
 \section{Profiling Interface}
+\mpitermtitleindex{profiling interface}

 \label{sec:prof}
 \label{chap:prof}
@@ -15,6 +16,7 @@
  those allowed as macros (See Section~\ref{sec:macros}), may be accessed with a name shift. This requires,
 in C and Fortran, an alternate entry point name, with the prefix
 \code{PMPI\_} for each \mpi/
+\mpitermdefindex{PMPI\_}%
 function in each provided language binding and language support method.
 For routines implemented
 as macros, it is still required that the \mpifunc{PMPI\_} version be
@@ -134,7 +136,6 @@
 \item
 Adding user events to a trace file.
 \end{itemize}
-
 These requirements are met by use of \mpifunc{MPI\_PCONTROL}.

 \begin{funcdef}{MPI\_PCONTROL(level, \ldots)}
Index: chap-tools/mpit.tex
===================================================================
--- chap-tools/mpit.tex	(revision 2030)
+++ chap-tools/mpit.tex	(working copy)
@@ -1,6 +1,7 @@
 % LLNL-MI-422102-DRAFT

 \section{The \MPI/ Tool Information Interface}
+\mpitermtitleindex{tool information interface}
 \label{sec:mpit}

 \MPI/ implementations often use internal variables to control their
@@ -92,6 +93,7 @@


 \subsection{Verbosity Levels}
+\mpitermtitleindex{verbosity levels -- tools interface}
 \label{sec:mpit:verbose}

 The \MPI/ tool information interface provides access to internal configuration
@@ -460,6 +462,7 @@


 \subsection{Control Variables}
+\mpitermtitleindex{control variables -- tools interface}
 \label{sec:mpit:cvar}

 The routines described in this section of the \MPI/ tool information interface
@@ -736,7 +739,7 @@
 that stores the object's handle.
 The argument \mpiarg{obj\_handle} is ignored if the \mpifunc{MPI\_T\_CVAR\_GET\_INFO} call for this control variable returned \const{MPI\_T\_BIND\_NO\_OBJECT} in the argument \mpishortarg{bind}.
 The handle allocated to reference the variable is returned
-in the argument \mpiarg{handle}.  Upon successful return,
+in the argument \mpishortarg{handle}.  Upon successful return,
 \mpishortarg{count} contains the number of elements (of the datatype
 returned by a previous \mpifunc{MPI\_T\_CVAR\_GET\_INFO} call)
 used to represent this variable.
@@ -844,23 +847,23 @@
 %%ENDHEADER
 \begin{verbatim}
 int getValue_int_comm(int index, MPI_Comm comm, int *val) {
-	int err,count;
-	MPI_T_cvar_handle handle;
+  int err,count;
+  MPI_T_cvar_handle handle;

-	/* This example assumes that the variable index */
-	/* can be bound to a communicator */
+  /* This example assumes that the variable index */
+  /* can be bound to a communicator */

-	err=MPI_T_cvar_handle_alloc(index,&comm,&handle,&count);
-	if (err!=MPI_SUCCESS) return err;
+  err=MPI_T_cvar_handle_alloc(index,&comm,&handle,&count);
+  if (err!=MPI_SUCCESS) return err;

-	/* The following assumes that the variable is */
-	/* represented by a single integer */
+  /* The following assumes that the variable is */
+  /* represented by a single integer */

-	err=MPI_T_cvar_read(handle,val);
-	if (err!=MPI_SUCCESS) return err;
+  err=MPI_T_cvar_read(handle,val);
+  if (err!=MPI_SUCCESS) return err;

-	err=MPI_T_cvar_handle_free(&handle);
-	return err;
+  err=MPI_T_cvar_handle_free(&handle);
+  return err;
 }
 \end{verbatim}

@@ -868,6 +871,7 @@


 \subsection{Performance Variables}
+\mpitermtitleindex{performance variables -- tools interface}
 \label{sec:mpit:pvar}

 The following section focuses on the ability to list and to query
@@ -1165,7 +1169,7 @@

 This routine returns \const{MPI\_SUCCESS} on success and returns
 \const{MPI\_T\_ERR\_INVALID\_NAME} if \mpiarg{name} does not match the
-name of any performance variable provided by the implementation
+name of any performance variable of the specified \mpiarg{var\_class} provided by the implementation
 at the time of the call.

 \begin{rationale}
@@ -1235,7 +1239,7 @@

 This routine binds the performance variable specified by the argument
 \mpiarg{index} to an \MPI/ object  in the session identified
-by the parameter \mpiarg{session}. The object is passed in the
+by the parameter \mpishortarg{session}. The object is passed in the
 argument \mpiarg{obj\_handle} as an address to a local variable
 that stores the object's handle.
 The argument \mpiarg{obj\_handle} is ignored if the \mpifunc{MPI\_T\_PVAR\_GET\_INFO} call for this performance variable returned \const{MPI\_T\_BIND\_NO\_OBJECT} in the argument \mpiarg{bind}.
@@ -1313,7 +1317,7 @@

 If the constant \const{MPI\_T\_PVAR\_ALL\_HANDLES} is passed in
 \mpiarg{handle}, the \MPI/ implementation attempts to start all variables
-within the session identified by the parameter \mpiarg{session} for
+within the session identified by the parameter \mpishortarg{session} for
 which handles have been allocated.  In this case, the routine returns
 \const{MPI\_SUCCESS} if all variables are started successfully
 (even if there are no non-continuous variables to be started),
@@ -1339,7 +1343,7 @@
 If the constant \const{MPI\_T\_PVAR\_ALL\_HANDLES} is passed in
 \mpiarg{handle}, the \MPI/ implementation attempts to stop all
 variables within the session identified by the parameter
-\mpiarg{session} for which handles have been allocated.  In this case,
+\mpishortarg{session} for which handles have been allocated.  In this case,
 the routine returns \const{MPI\_SUCCESS} if all variables are stopped
 successfully
 (even if there are no non-continuous variables to be stopped),
@@ -1452,7 +1456,7 @@
 \begin{implementors}
 Sampling-based tools rely on the ability to call the \MPI/
 tool information interface, in particular routines to start, stop,
-read, write and reset performance variables, from any program
+read, write, and reset performance variables, from any program
 context, including asynchronous contexts such as signal handlers.
 \MPI/ implementations should strive, if possible in their particular
 environment, to enable these usage scenarios for all or a subset of the
@@ -1513,67 +1517,67 @@
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
-#include <mpi.h>
+#include <mpi.h>

 /* Global variables for the tool */
 static MPI_T_pvar_session session;
 static MPI_T_pvar_handle handle;

 int MPI_Init(int *argc, char ***argv ) {
-	int err, num, i, index, namelen, verbosity;
-        int var_class, bind, threadsup;
-	int readonly, continuous, atomic, count;
-	char name[18];
-	MPI_Comm comm;
-	MPI_Datatype datatype;
-	MPI_T_enum enumtype;
+      int err, num, i, index, namelen, verbosity;
+      int var_class, bind, threadsup;
+      int readonly, continuous, atomic, count;
+      char name[18];
+      MPI_Comm comm;
+      MPI_Datatype datatype;
+      MPI_T_enum enumtype;

-	err=PMPI_Init(argc,argv);
-	if (err!=MPI_SUCCESS) return err;
+      err=PMPI_Init(argc,argv);
+      if (err!=MPI_SUCCESS) return err;

-	err=PMPI_T_init_thread(MPI_THREAD_SINGLE,&threadsup);
-	if (err!=MPI_SUCCESS) return err;
+      err=PMPI_T_init_thread(MPI_THREAD_SINGLE,&threadsup);
+      if (err!=MPI_SUCCESS) return err;

-	err=PMPI_T_pvar_get_num(&num);
-	if (err!=MPI_SUCCESS) return err;
-	index=-1;
-	i=0;
-	while ((i<num) && (index<0) && (err==MPI_SUCCESS)) {
-		/* Pass a buffer that is at least one character longer than */
-		/* the name of the variable being searched for to avoid */
-		/* finding variables that have a name that has a prefix */
-		/* equal to the name of the variable being searched. */
-		namelen=18;
-		err=PMPI_T_pvar_get_info(i, name, &namelen, &verbosity,
-			&var_class, &datatype, &enumtype, NULL, NULL, &bind,
-			&readonly, &continuous, &atomic);
-		if (strcmp(name,"MPI_T_UMQ_LENGTH")==0) index=i;
-		i++; }
-	if (err!=MPI_SUCCESS) return err;
+      err=PMPI_T_pvar_get_num(&num);
+      if (err!=MPI_SUCCESS) return err;
+      index=-1;
+      i=0;
+      while ((i<num) && (index<0) && (err==MPI_SUCCESS)) {
+            /* Pass a buffer that is at least one character longer than */
+            /* the name of the variable being searched for to avoid */
+            /* finding variables that have a name that has a prefix */
+            /* equal to the name of the variable being searched. */
+            namelen=18;
+            err=PMPI_T_pvar_get_info(i, name, &namelen, &verbosity,
+                    &var_class, &datatype, &enumtype, NULL, NULL, &bind,
+                    &readonly, &continuous, &atomic);
+            if (strcmp(name,"MPI_T_UMQ_LENGTH")==0) index=i;
+            i++; }
+      if (err!=MPI_SUCCESS) return err;

-	/* this could be handled in a more flexible way for a generic tool */
-	assert(index>=0);
-	assert(var_class==MPI_T_PVAR_CLASS_LEVEL);
-	assert(datatype==MPI_INT);
-	assert(bind==MPI_T_BIND_MPI_COMM);
+      /* this could be handled in a more flexible way for a generic tool */
+      assert(index>=0);
+      assert(var_class==MPI_T_PVAR_CLASS_LEVEL);
+      assert(datatype==MPI_INT);
+      assert(bind==MPI_T_BIND_MPI_COMM);

-	/* Create a session */
-	err=PMPI_T_pvar_session_create(&session);
-	if (err!=MPI_SUCCESS) return err;
+      /* Create a session */
+      err=PMPI_T_pvar_session_create(&session);
+      if (err!=MPI_SUCCESS) return err;

-	/* Get a handle and bind to MPI_COMM_WORLD */
-	comm=MPI_COMM_WORLD;
-	err=PMPI_T_pvar_handle_alloc(session, index, &comm, &handle, &count);
-	if (err!=MPI_SUCCESS) return err;
+      /* Get a handle and bind to MPI_COMM_WORLD */
+      comm=MPI_COMM_WORLD;
+      err=PMPI_T_pvar_handle_alloc(session, index, &comm, &handle, &count);
+      if (err!=MPI_SUCCESS) return err;

-	/* this could be handled in a more flexible way for a generic tool */
-	assert(count==1);
+      /* this could be handled in a more flexible way for a generic tool */
+      assert(count==1);

-	/* Start variable */
-	err=PMPI_T_pvar_start(session, handle);
-	if (err!=MPI_SUCCESS) return err;
+      /* Start variable */
+      err=PMPI_T_pvar_start(session, handle);
+      if (err!=MPI_SUCCESS) return err;

-	return MPI_SUCCESS;
+      return MPI_SUCCESS;
 }
 \end{verbatim}

@@ -1593,20 +1597,20 @@
 int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source,
              int tag, MPI_Comm comm, MPI_Status *status)
 {
-	int value, err;
+        int value, err;

-	if (comm==MPI_COMM_WORLD) {
-		err=PMPI_T_pvar_read(session, handle, &value);
-		if ((err==MPI_SUCCESS) && (value>THRESHOLD))
-		{
+        if (comm==MPI_COMM_WORLD) {
+                err=PMPI_T_pvar_read(session, handle, &value);
+                if ((err==MPI_SUCCESS) && (value>THRESHOLD))
+                {
                         /* tool identified receive called with long UMQ */
-			/* execute tool functionality, */
-			/* e.g., gather and print call stack */
-		}
-	}
+                        /* execute tool functionality, */
+                        /* e.g., gather and print call stack */
+                }
+        }

-	return PMPI_Recv(buf, count, datatype, source, tag, comm, status);
-}
+        return PMPI_Recv(buf, count, datatype, source, tag, comm, status);
+}
 \end{verbatim}

 \paragraph{Part 3 --- Termination:}
@@ -1621,11 +1625,11 @@
 \begin{verbatim}
 int MPI_Finalize(void)
 {
-	int err;
-	err=PMPI_T_pvar_handle_free(session, &handle);
-	err=PMPI_T_pvar_session_free(&session);
-	err=PMPI_T_finalize();
-	return PMPI_Finalize();
+        int err;
+        err=PMPI_T_pvar_handle_free(session, &handle);
+        err=PMPI_T_pvar_session_free(&session);
+        err=PMPI_T_finalize();
+        return PMPI_Finalize();
 }
 \end{verbatim}

@@ -1877,7 +1881,8 @@
 \multicolumn{2}{|l|}{Return Codes for All Functions in the \MPI/ Tool Information Interface}\\
 \hline
 \const{MPI\_SUCCESS} & Call completed successfully\\
-\const{MPI\_T\_ERR\_INVALID} & Invalid use of the interface or bad parameter value(s) \\
+\const{MPI\_T\_ERR\_INVALID} & Invalid use of the interface or bad parameter \\
+ & values(s)\\
 \const{MPI\_T\_ERR\_MEMORY} & Out of memory\\
 \const{MPI\_T\_ERR\_NOT\_INITIALIZED} & Interface not initialized\\
 \const{MPI\_T\_ERR\_CANNOT\_INIT} & Interface not in the state to be initialized\\
Index: chap-removed/removed.tex
===================================================================
--- chap-removed/removed.tex	(revision 2030)
+++ chap-removed/removed.tex	(working copy)
@@ -1,4 +1,5 @@
 \chapter{Removed Interfaces}
+\mpitermtitleindex{removed interfaces}
 \label{chap:removed}
 \label{sec:removed}

Index: chap-one-side/one-side-2.tex
===================================================================
--- chap-one-side/one-side-2.tex	(revision 2030)
+++ chap-one-side/one-side-2.tex	(working copy)
@@ -13,12 +13,15 @@


 \chapter{One-Sided Communications}
+\mpitermtitleindexsubmain{one-sided}{communication}
 \label{chap:one-side-2}
 \label{sec:one-side-2}

 \section{Introduction}

-Remote Memory Access (\RMA/) extends the communication mechanisms of \MPI/ by
+\mpitermdefni{Remote Memory Access}\mpitermdefindex{Remote Memory Access|see{RMA}}
+\mpitermdefni{(\RMA/)}\mpitermdefindex{RMA}\mpitermdefindex{communication!RMA}
+extends the communication mechanisms of \MPI/ by
 allowing one process to specify all communication parameters, both for
 the sending side and for the receiving side.
 This mode of communication facilitates the coding of some applications
@@ -52,9 +55,9 @@
 in the same manner.

 Message-passing communication achieves two effects:
-\emph{communication} of data from sender to
+\mpiterm{communication} of data from sender to
 receiver and
-\emph{synchronization} of sender
+\mpiterm{synchronization} of sender
 with receiver.
 The \RMA/ design separates these two functions.
 The following communication calls are provided:
@@ -70,9 +73,10 @@
 remote read and update, and remote atomic swap operations as
 ``accumulate'' operations.

-\MPI/ supports two fundamentally different memory models: separate
-and unified. The
-separate model makes no assumption about memory consistency and is
+\MPI/ supports two fundamentally different \mpitermni{memory models}\mpitermindex{memory model}:
+\mpitermni{separate}\mpitermindex{separate memory model}\mpitermindex{memory model!separate}
+and \mpitermni{unified}\mpitermindex{unified memory model}\mpitermindex{memory model!unified}.
+The separate model makes no assumption about memory consistency and is
 highly portable. This model is similar to that of weakly coherent memory
 systems: the user must impose correct ordering of memory accesses
 through synchronization calls. The
@@ -94,8 +98,8 @@
 \RMA/ functions might need support for asynchronous communication agents in
 software (handlers, threads, etc.) in a distributed memory environment.

-We shall denote by \mpiterm{origin} the process that performs the call,
-and by \mpiterm{target} the process in which the memory is accessed.
+We shall denote by \mpitermdef{origin} the process that performs the call,
+and by \mpitermdef{target} the process in which the memory is accessed.
 Thus, in a put
 operation, source=origin and destination=target; in a get operation, source=target and destination=origin.

@@ -129,6 +133,7 @@
 user to dynamically control which memory is exposed by the window.

 \subsection{Window Creation}
+\mpitermtitleindex{window!creation}
 \label{chap:one-side-2:win_create}

 \begin{funcdef}{MPI\_WIN\_CREATE(base, size, disp\_unit, info, comm, win)}
@@ -173,17 +178,33 @@
 process, at window creation.

 \begin{rationale}
-The window size is specified using an address-sized integer, to allow windows that span
-more than 4~GB of
-address space.  (Even if the physical memory size is less than 4~GB, the
-address range may be larger than 4~GB, if addresses are not contiguous.)
+The window size is specified using an address-sized integer%
+%% B3.1
+\color{red}%
+%% 3.1Note: Nothing in C says that an int is 4 bytes.  This text is just wrong.
+%, to allow windows that span
+%more than 4~GB of
+%address space.  (Even if the physical memory size is less than 4~GB, the
+%address range may be larger than 4~GB, if addresses are not contiguous.)
+, rather than a basic integer type, to allow windows that span more memory than
+can be described with a basic integer type.
+%% E3.1
+\color{black}%
 \end{rationale}

 \begin{users}
 Common choices for \mpiarg{disp\_unit}
 are 1 (no scaling), and (in C syntax) \code{sizeof(type)}, for a
 window that consists of an array of elements of type \code{type}.  The
-later choice will allow one to use array indices in \RMA/ calls, and have those scaled correctly to byte displacements, even in a heterogeneous environment.
+%% B3.1
+\color{red}%
+%later
+latter
+%% E3.1
+\color{black}%
+choice will allow one to use array indices in \RMA/ calls,
+and have those scaled correctly to byte displacements, even in a
+heterogeneous environment.
 \end{users}

 The \mpiarg{info} argument provides
@@ -221,7 +242,13 @@

 \begin{users}
 The info query mechanism described in Section~\ref{subsec:window-info}
-can be used to query the specified info arguments windows that have been
+can be used to query the specified info arguments
+%% B3.1
+\color{red}%
+for
+%% E3.1
+\color{black}%
+windows that have been
 passed to a library. It is recommended that libraries check attached
 info keys for each passed window.
 \end{users}
@@ -284,6 +311,7 @@
 \end{implementors}

 \subsection{Window That Allocates Memory}
+\mpitermtitleindex{window!allocation}
 \label{sec:winalloc}

 %% Alloc_mem uses baseptr, which distinguishes this from the base in win_create
@@ -341,13 +369,13 @@
 \begin{verbatim}
 INTERFACE MPI_WIN_ALLOCATE
     SUBROUTINE MPI_WIN_ALLOCATE(SIZE, DISP_UNIT, INFO, COMM, BASEPTR, &
-    WIN, IERROR)
+                                WIN, IERROR)
         IMPORT ::  MPI_ADDRESS_KIND
         INTEGER DISP_UNIT, INFO, COMM, WIN, IERROR
         INTEGER(KIND=MPI_ADDRESS_KIND) SIZE, BASEPTR
     END SUBROUTINE
     SUBROUTINE MPI_WIN_ALLOCATE_CPTR(SIZE, DISP_UNIT, INFO, COMM, BASEPTR, &
-    WIN, IERROR)
+                                     WIN, IERROR)
         USE, INTRINSIC ::  ISO_C_BINDING, ONLY : C_PTR
         IMPORT ::  MPI_ADDRESS_KIND
         INTEGER ::  DISP_UNIT, INFO, COMM, WIN, IERROR
@@ -380,6 +408,7 @@
 %The following info key is  predefined:

 \subsection{Window That Allocates Shared Memory}
+\mpitermtitleindexmainsub{window}{shared memory allocation}
 \label{sec:winallocshared}

 %% Alloc_mem uses baseptr, which distinguishes this from the base in win_create
@@ -407,7 +436,11 @@


 This is a collective call executed by all processes in the group of
-\mpiarg{comm}. On each process $i$, it allocates memory of at least
+\mpiarg{comm}. On each process%
+%% B3.1
+% $i$
+%% E3.1
+, it allocates memory of at least
 \mpiarg{size} bytes that is shared among all processes in \mpiarg{comm},
 and returns a pointer to
 the locally allocated segment in \mpiarg{baseptr} that can be used for
@@ -447,13 +480,13 @@
 \begin{verbatim}
 INTERFACE MPI_WIN_ALLOCATE_SHARED
     SUBROUTINE MPI_WIN_ALLOCATE_SHARED(SIZE, DISP_UNIT, INFO, COMM, &
-    BASEPTR, WIN, IERROR)
+                                       BASEPTR, WIN, IERROR)
         IMPORT ::  MPI_ADDRESS_KIND
         INTEGER DISP_UNIT, INFO, COMM, WIN, IERROR
         INTEGER(KIND=MPI_ADDRESS_KIND) SIZE, BASEPTR
     END SUBROUTINE
     SUBROUTINE MPI_WIN_ALLOCATE_SHARED_CPTR(SIZE, DISP_UNIT, INFO, COMM, &
-    BASEPTR, WIN, IERROR)
+                                            BASEPTR, WIN, IERROR)
         USE, INTRINSIC ::  ISO_C_BINDING, ONLY : C_PTR
         IMPORT ::  MPI_ADDRESS_KIND
         INTEGER ::  DISP_UNIT, INFO, COMM, WIN, IERROR
@@ -491,12 +524,12 @@

 The consistency of load/store accesses from/to the shared memory as
 observed by the user program depends on the architecture. A consistent
-view can be created in the unified memory model (see
+view can be created in the \mpiterm{unified memory model}\mpitermindex{memory model!unified} (see
 Section~\ref{sec:1sided-memmodel}) by utilizing the window
 synchronization functions (see Section~\ref{sec:1sided-sync}) or
 explicitly completing outstanding store accesses (e.g., by calling
 \mpifunc{MPI\_WIN\_FLUSH}). \MPI/ does not define semantics for
-accessing shared memory windows in the separate memory model.
+accessing shared memory windows in the \mpiterm{separate memory model}\mpitermindex{memory model!separate}.

 \begin{funcdef}{MPI\_WIN\_SHARED\_QUERY(win, rank, size, disp\_unit, baseptr)}
 \funcarg{\IN}{win}{shared memory window object (handle)}
@@ -519,7 +552,13 @@
 different process-local addresses for the same physical memory on different
 processes. The returned memory can be used for load/store accesses subject to
 the constraints defined in Section~\ref{sec:1sided-semantics}. This function
-can only be called with windows of type
+can only be called with windows of
+%% B3.1
+\color{red}%
+% type
+flavor
+%% E3.1
+\color{black}%
 \mpiarg{MPI\_WIN\_FLAVOR\_SHARED}. If the passed window is not of flavor
 \mpiarg{MPI\_WIN\_FLAVOR\_SHARED}, the error
 \error{MPI\_ERR\_RMA\_FLAVOR} is raised.
@@ -545,13 +584,13 @@
 \begin{verbatim}
 INTERFACE MPI_WIN_SHARED_QUERY
     SUBROUTINE MPI_WIN_SHARED_QUERY(WIN, RANK, SIZE, DISP_UNIT, &
-    BASEPTR, IERROR)
+                                    BASEPTR, IERROR)
         IMPORT :: MPI_ADDRESS_KIND
         INTEGER WIN, RANK, DISP_UNIT, IERROR
         INTEGER (KIND=MPI_ADDRESS_KIND) SIZE, BASEPTR
     END SUBROUTINE
     SUBROUTINE MPI_WIN_SHARED_QUERY_CPTR(WIN, RANK, SIZE, DISP_UNIT, &
-    BASEPTR, IERROR)
+                                         BASEPTR, IERROR)
         USE, INTRINSIC :: ISO_C_BINDING, ONLY : C_PTR
         IMPORT :: MPI_ADDRESS_KIND
         INTEGER :: WIN, RANK, DISP_UNIT, IERROR
@@ -562,11 +601,12 @@
 \end{verbatim}

 The base procedure name of this overloaded function is
-\mpifunc{MPI\_WIN\_SHARED\_QUERY\_CPTR}. The implied specific
+\flushline\mpifunc{MPI\_WIN\_SHARED\_QUERY\_CPTR}. The implied specific
 procedure names
 are described in \sectionref{sec:f90:linker-names}.

 \subsection{Window of Dynamically Attached Memory}
+\mpitermtitleindexmainsub{window}{dynamically attached memory}
 \label{sec:rma-create-dynamic}

 The \MPIII/ \RMA/ model requires the user to identify the local memory
@@ -631,7 +671,15 @@
 \begin{users}
 Users are cautioned that displacement arithmetic can overflow in
 variables of type \type{MPI\_Aint} and result in unexpected values on some
-platforms.  This issue may be addressed in a future version of \MPI/.
+platforms.
+%% B3.1
+\color{red}%
+The \mpifunc{MPI\_AINT\_ADD} and \mpifunc{MPI\_AINT\_DIFF}
+functions can be used to safely perform address arithmetic with \type{MPI\_Aint}
+displacements.
+%This issue may be addressed in a future version of \MPI/.
+%% E3.1
+\color{black}%
 \end{users}

 \begin{implementors}
@@ -645,9 +693,16 @@
 from any process.
 \end{implementors}

-Memory in this window may not be used as the target of one-sided
-accesses in this window until it is attached using the function
-\mpifunc{MPI\_WIN\_ATTACH}.
+%% B3.1
+\color{red}%
+Memory at the target cannot be accessed with this window until that memory
+has been
+attached using the function \mpifunc{MPI\_WIN\_ATTACH}.
+%Memory in this window may not be used as the target of one-sided
+%accesses in this window until it is attached using the function
+%\mpifunc{MPI\_WIN\_ATTACH}.
+%% E3.1
+\color{black}%
 That is, in addition to using \mpifunc{MPI\_WIN\_CREATE\_DYNAMIC} to
 create an \MPI/ window, the user must use \mpifunc{MPI\_WIN\_ATTACH}
 before any local memory may be the target of an \MPI/ \RMA/ operation.
@@ -683,8 +738,12 @@
 regions may be attached to the same window.

 \begin{rationale}
-Requiring that memory be explicitly attached before it is exposed to
-one-sided access by other processes can significantly simplify
+Requiring that memory be explicitly attached before it is exposed to
+one-sided access by other processes can
+%% B3.1
+% significantly
+%% E3.1
+simplify
 implementations and improve performance. The ability to make memory
 available for \RMA/ operations without requiring a collective
 \mpifunc{MPI\_WIN\_CREATE} call is needed for some one-sided programming
@@ -1017,6 +1076,7 @@
 \end{users}

 \section{Communication Calls}
+\mpitermtitleindex{RMA!communication calls}
 \label{sec:onesided-putget}

 \MPI/ supports the following \RMA/ communication calls: \mpifunc{MPI\_PUT}
@@ -1031,10 +1091,10 @@
 before the accumulate operation; and
 \mpifunc{MPI\_COMPARE\_AND\_SWAP} performs a remote atomic compare and swap
 operation.
-These operations are \emph{nonblocking}: the call initiates
+These operations are \mpiterm{nonblocking}: the call initiates
 the transfer, but the transfer may continue after the call returns.
 The transfer is completed, at the origin or both the origin and the target, when
-a subsequent \emph{synchronization} call is issued by the caller on
+a subsequent \mpiterm{synchronization} call is issued by the caller on
 the involved window object.  These synchronization calls are described in
 \sectionref{sec:1sided-sync}.
 Transfers can also be completed with calls to flush routines; see
@@ -1177,8 +1237,15 @@
 is as if the target datatype object was defined at the target process
 by the same sequence of calls used to define it at the origin process.
 The target datatype must
-contain only relative displacements, not absolute addresses.  The same
-holds for get and accumulate.
+contain only relative displacements, not absolute addresses.
+The same
+holds for get and accumulate
+%% B3.1
+\color{red}%
+operations%
+%% E3.1
+\color{black}%
+.

 \begin{users}
 The \mpiarg{target\_datatype} argument is a handle to a datatype
@@ -1205,11 +1272,22 @@
 A high-quality
 implementation will attempt to
 prevent remote accesses to memory outside the
-window that was exposed by the process.  This, both for debugging
-purposes, and for protection with client-server codes that use \RMA/.
-I.e., a high-quality implementation will check, if possible,
+window that was exposed by the process.
+%% B3.1
+\color{red}%
+This is important both for debugging
+purposes and for protection with client-server codes that use \RMA/.
+%% E3.1
+\color{black}%
+%% B3.1
+\color{red}%
+%I.e.,
+That is,
+%% E3.1
+\color{black}%
+a high-quality implementation will check, if possible,
 window bounds on each \RMA/ call,
-and raise an \MPI/ exception at the origin call if an out-of-bound
+and raise an \MPI/ exception at the origin call if an out-of-bound
 situation occurs.
 Note that the condition can be checked at the origin.
 Of course, the added safety achieved by such checks has to be weighed
@@ -1422,11 +1500,17 @@
 \label{sec:1sided-accumulate}

 It is often useful in a put operation to combine the data moved to the
-target process with the data that resides at that process, rather
-then replacing the data there.  This will allow, for example, the
-accumulation of
+target process with the data that resides at that process, rather
+%% B3.1
+\color{red}%
+%then replacing the data there.
+than replacing it.
+%% E3.1
+\color{black}%
+This will allow, for example, the
+accumulation of
 a sum by having all involved processes add their
-contributions to the
+contributions to the
 sum variable in the memory of one process.
 The accumulate functions have slightly different
 semantics with respect to overlapping data accesses than
@@ -1749,6 +1833,7 @@


 \subsection{Request-based RMA Communication Operations}
+\mpitermtitleindex{RMA!communication calls!request-based}
 \label{sec:1sided-req}

 Request-based \RMA/ communication operations allow the user to
@@ -1929,10 +2014,13 @@
 the operation has been completed at the target window.

 \section{Memory Model}
+\mpitermtitleindex{RMA!memory model}
 \label{sec:1sided-memmodel}

-The memory semantics of \RMA/ are best understood by using the concept of public
-and private window copies. We assume that systems have a public
+The memory semantics of \RMA/ are best understood by using the concept of
+\mpitermni{public}\mpitermindex{public window copy}
+and \mpitermni{private}\mpitermindex{private window copy} window copies.
+We assume that systems have a public
 memory region that is addressable by all processes (e.g., the shared memory
 in shared memory machines or the exposed main memory in distributed
 memory machines). In addition, most machines have fast private
@@ -1948,12 +2036,14 @@
 private memory. Thus, in coherent memory, the public and the private
 window are identical while they remain logically separate in the
 non-coherent case.
-\MPI/ thus differentiates between two memory models called \emph{RMA
-unified}, if public and private window are logically identical, and \emph{RMA
-separate}, otherwise.
+\MPI/ thus differentiates between two
+\mpitermdefni{memory models}\mpitermdefindex{memory model} called
+\mpitermdefni{\RMA/ unified}\mpitermdefindex{unified memory model},
+if public and private window are logically identical, and
+\mpitermdefni{\RMA/ separate}\mpitermdefindex{separate memory model}, otherwise.

 In the \RMA/ separate model, there is only one instance of each variable
-in process memory, but a distinct \emph{public} copy of the variable for
+in process memory, but a distinct \mpitermni{public} copy of the variable for
 each window that contains it. A load accesses the instance in process
 memory (this includes \MPI/ sends).  A local store accesses and updates the
 instance in process memory (this includes \MPI/ receives), but the
@@ -2002,18 +2092,20 @@
 \const{MPI\_WIN\_SEPARATE}.

 \section{Synchronization Calls}
+\mpitermtitleindex{RMA!synchronization calls}
+\mpitermtitleindex{synchronization calls -- RMA}
 \label{sec:1sided-sync}

 \RMA/ communications fall in two categories:
 \begin{itemize}
 \item
-\mpiterm{active target} communication, where data is moved from the memory of one
+\mpitermdef{active target communication}, where data is moved from the memory of one
 process to the memory of another, and both are explicitly involved in the
 communication.  This communication pattern is similar to message
 passing, except that all the data transfer arguments are provided by
 one process, and the second process only participates in the synchronization.
 \item
-\mpiterm{passive target} communication, where data is moved from the memory of one
+\mpitermdef{passive target communication}, where data is moved from the memory of one
 process to the memory of another, and only the origin process is
 explicitly involved
 in
@@ -2028,7 +2120,7 @@
 \end{itemize}

 \RMA/ communication calls with argument \mpiarg{win} must occur at a process
-only within an \mpiterm{access epoch} for \mpiarg{win}.  Such an epoch
+only within an \mpitermdef{access epoch} for \mpiarg{win}.  Such an epoch
 starts with an \RMA/ synchronization
 call on \mpiarg{win}; it proceeds with zero or more \RMA/
 communication calls (e.g., \mpifunc{MPI\_PUT}, \mpifunc{MPI\_GET} or
@@ -2045,7 +2137,7 @@
 an epoch.

 In active target communication, a target window can be accessed by \RMA/
-operations only within an \mpiterm{exposure epoch}. Such an epoch is
+operations only within an \mpitermdef{exposure epoch}. Such an epoch is
 started and completed by \RMA/ synchronization calls executed by the
 target process.  Distinct exposure epochs at a process
 on the same window must be disjoint, but such an exposure epoch
@@ -2155,10 +2247,10 @@
 temporal order implied by the synchronizations: the \texttt{post}
 occurs before the matching \texttt{start}, and \texttt{complete} occurs before
 the
-matching \texttt{wait}.  However, such \mpiterm{strong synchronization} is more
+matching \texttt{wait}.  However, such \mpitermdef{strong synchronization} is more
 than
 needed for correct ordering of window accesses.  The semantics of
-\MPI/ calls allow \mpiterm{weak synchronization},
+\MPI/ calls allow \mpitermdef{weak synchronization},
 as illustrated in Figure~\ref{fig:1sided-sync14}.
 \begin{figure}[t]
 \centerline{\includegraphics[width=3.0in]{figures/sync14}}
@@ -2473,7 +2565,7 @@

 \begin{description}
 \item[\mpifunc{MPI\_WIN\_POST(group,0,win)}]
-initiate a nonblocking send with tag \mpiarg{tag0} to each
+initiates a nonblocking send with tag \mpiarg{tag0} to each
 process in \mpiarg{group}, using \mpiarg{wincomm}.
 There is no need to wait for the
 completion of these sends.
@@ -2483,11 +2575,11 @@
 a window in target process \mpiarg{i} is delayed until the receive
 from \mpiarg{i} is completed.
 \item[\mpifunc{MPI\_WIN\_COMPLETE(win)}]
-initiate a nonblocking send with tag \mpiarg{tag1} to each process in
+initiates a nonblocking send with tag \mpiarg{tag1} to each process in
 the group of the preceding start call.  No need to wait for the
 completion of these sends.
 \item[\mpifunc{MPI\_WIN\_WAIT(win)}]
-initiate a nonblocking receive with tag \mpiarg{tag1} from each
+initiates a nonblocking receive with tag \mpiarg{tag1} from each
 process in the group of the preceding post call.  Wait for the
 completion of all receives.
 \end{description}
@@ -2550,10 +2642,23 @@

 \mpicppemptybind{MPI::Win::Lock(int lock\_type, int rank, int assert) const}{void}

-Starts an \RMA/ access epoch.  Only the window at the
+Starts an \RMA/ access epoch.
+%% B3.1
+\color{red}%
+The
+%% E3.1
+\color{black}%
+window at the
 process with rank \mpiarg{rank} can be accessed by \RMA/ operations
 on \mpiarg{win} during that
-epoch.
+epoch.
+%% B3.1
+\color{red}%
+Multiple \RMA/ access epochs (with calls to \mpifunc{MPI\_WIN\_LOCK})
+can occur simultaneously; however, each access epoch must target a
+different process.
+%% E3.1
+\color{black}%

 \begin{funcdef}{MPI\_WIN\_LOCK\_ALL(assert, win)}
 \funcarg{\IN}{assert}{program assertion (integer)}
@@ -2597,8 +2702,13 @@

 \mpicppemptybind{MPI::Win::Unlock(int rank) const}{void}

-Completes an \RMA/ access epoch started by a call to
-\mpifunc{MPI\_WIN\_LOCK($\ldots$,win)}.  \RMA/ operations issued during this
+Completes an \RMA/ access epoch started by a call to
+%% B3.1
+\color{red}%
+\mpifunc{MPI\_WIN\_LOCK} on window \mpiarg{win}.
+%% E3.1
+\color{black}%
+\RMA/ operations issued during this
 period will have completed both at the origin and at the target when the call returns.

 \begin{funcdef}{MPI\_WIN\_UNLOCK\_ALL(win)}
@@ -2614,9 +2724,16 @@
 %\mpicppemptybind{MPI::Win::Unlock\_all() const}{void}

 Completes a shared \RMA/ access epoch started by a call to
-\mpifunc{MPI\_WIN\_LOCK\_ALL(assert, win)}.  \RMA/ operations issued during this
+%% B3.1
+\color{red}%
+\mpifunc{MPI\_WIN\_LOCK\_ALL} on window \mpiarg{win}.
+%% E3.1
+\color{black}%
+\RMA/ operations issued during this
 epoch will have completed both at the origin and at the target when the call returns.

+\bigskip%%ALLOWLATEX%%
+
 Locks are used to protect accesses to the locked target
 window effected by \RMA/ calls issued between the lock and unlock
 calls, and to protect
@@ -2663,7 +2780,7 @@
 \mpifunc{MPI\_ALLOC\_MEM}
 (\sectionref{sec:misc-memalloc}),
 \mpifunc{MPI\_WIN\_ALLOCATE} (\sectionref{sec:winalloc}), or attached with
-\mpifunc{MPI\_WIN\_ATTACH} (\sectionref{sec:rma-create-dynamic}).
+\mpifunc{MPI\_WIN\_ATTACH} \gb(\sectionref{sec:rma-create-dynamic}).
 Locks can be used portably only in such memory.

 \begin{rationale}
@@ -2675,11 +2792,13 @@
 impose restrictions that allows one to use shared memory for third
 party communication in shared memory machines.

-The downside of this decision is that passive target communication cannot be
-used without taking advantage of nonstandard Fortran features: namely,
-the availability of C-like pointers; these are not supported by some
-Fortran
-compilers.
+%% B3.1
+%The downside of this decision is that passive target communication cannot be
+%used without taking advantage of nonstandard Fortran features: namely,
+%the availability of C-like pointers; these are not supported by some
+%Fortran
+%compilers.
+%% E3.1
 \end{rationale}

 Consider the sequence of calls in the example below.
@@ -2812,6 +2931,7 @@
 actually end an epoch or complete any pending \MPI/ \RMA/ operations).

 \subsection{Assertions}
+\mpitermtitleindex{assertions}
 \label{sec:1sided-assert}

 The \mpiarg{assert} argument in the calls
@@ -2853,16 +2973,19 @@
 \begin{users}
 C/C++ users can use bit vector or ($\mid$) to combine these constants;
 Fortran 90 users
-can use the bit-vector \code{IOR} intrinsic.
-Fortran 77 users can use (nonportably)
-bit
-vector \code{IOR} on systems that support it.  Alternatively, Fortran users can
+can use the bit-vector \code{IOR} intrinsic.
+%% B3.1
+%Fortran 77 users can use (nonportably)
+%bit
+%vector \code{IOR} on systems that support it.
+%% E3.1
+Alternatively, Fortran users can
 portably use integer addition to OR the constants (each constant should
 appear at most once in the addition!).
 \end{users}

 \begin{description}
-\item[\mpifunc{MPI\_WIN\_START}:]
+\item[\mpifunc{MPI\_WIN\_START}:]\quad
 \begin{description}
 \item{\const{MPI\_MODE\_NOCHECK}}
 --- the matching calls to \mpifunc{MPI\_WIN\_POST}
@@ -2878,7 +3001,7 @@
 (However, ready-send is matched by a regular receive, whereas
 both start and post must specify the nocheck option.)
 \end{description}
-\item[\mpifunc{MPI\_WIN\_POST}:]
+\item[\mpifunc{MPI\_WIN\_POST}:]\quad
 \begin{description}
 \item{\const{MPI\_MODE\_NOCHECK}}
 --- the matching calls to \mpifunc{MPI\_WIN\_START}
@@ -2898,7 +3021,7 @@
 calls after the post call, until the ensuing (wait) synchronization.
 This may avoid the need for cache synchronization at the wait call.
 \end{description}
-\item[\mpifunc{MPI\_WIN\_FENCE}:]
+\item[\mpifunc{MPI\_WIN\_FENCE}:]\quad
 \begin{description}
 \item{\const{MPI\_MODE\_NOSTORE}}
 --- the local window was not updated by
@@ -2917,7 +3040,7 @@
 of locally issued \RMA/ calls.  If the assertion is given by any process
 in the window group, then it must be given by all processes in the group.
 \end{description}
-\item[\mpifunc{MPI\_WIN\_LOCK}, \mpifunc{MPI\_WIN\_LOCK\_ALL}:]
+\item[\mpifunc{MPI\_WIN\_LOCK}, \mpifunc{MPI\_WIN\_LOCK\_ALL}:]\quad
 \begin{description}
 \item{\const{MPI\_MODE\_NOCHECK}}
 --- no other process holds, or will attempt
@@ -2949,6 +3072,7 @@
 be used in \RMA/ communication.

 \section{Error Handling}
+\mpitermtitleindex{error handling!one-sided communication}
 \label{sec:1sided-errhandlers}

 \subsection{Error Handlers}
@@ -3006,6 +3130,7 @@


 \section{Semantics and Correctness}
+\mpitermtitleindex{semantics and correctness!one-sided communication}
 \label{sec:1sided-semantics}


@@ -3097,9 +3222,17 @@
 window copy can be delayed in both memory models until the window owner
 executes a synchronization call.
 When passive target
-synchronization (lock/unlock or even flush) is used, it is necessary to update the public window
-copy in the \RMA/ separate model, or the private window copy in the \RMA/
-unified model, even if the window owner does not execute any related
+synchronization
+%% B3.1
+% (lock/unlock or even flush)
+%% E3.1
+is used, it is necessary to update the public window
+copy
+%% B3.1
+% in the \RMA/ separate model, or the private window copy in the \RMA/
+%unified model,
+%% E3.1
+even if the window owner does not execute any related
 synchronization call.

 The rules above also define, by implication, when an update to a
@@ -3150,13 +3283,16 @@
 must obey the following rules.

 \begin{enumerate}
-\item
+%% B3.1
+\def\makelabel#1{\hss\llap{S#1}}%ALLOWLATEX%
+%% E3.1
+\item\label{rule:s1}
 A location in a window must not be accessed
 with load/store operations once an update to
 that location has started, until the update becomes visible in the
 private window copy in process
-memory.
-\item
+memory.
+\item\label{rule:s2}
 A location in a window must not  be accessed as a target of an \RMA/
 operation once an update to that location has started, until the
 update becomes visible in the public window copy. There is one
@@ -3165,7 +3301,7 @@
 predefined datatype, on the same window. Additional restrictions on the
 operation apply, see the info key \mpiarg{accumulate\_ops} in
 Section~\ref{chap:one-side-2:win_create}.
-\item
+\item\label{rule:s3}
 A put or accumulate must not access a target window once a
 %load/
 store % update
@@ -3200,18 +3336,25 @@
 (that is, updates to one are made visible to the other).

 In the \const{MPI\_WIN\_UNIFIED} memory model, the rules are
-much simpler because the public and private windows are the same.
+%% B3.1
+%much
+%% E3.1
+simpler because the public and private windows are the same.
 However, there are restrictions to avoid concurrent access to
 the same memory locations by different processes.
 The rules that a program with a well-defined outcome must obey in this case are:

 \begin{enumerate}
-\item
+%% B3.1
+\def\makelabel#1{\hss\llap{U#1}}%ALLOWLATEX%
+%% E3.1
+\item\label{rule:u1}
 A location in a window must not be accessed
 with load/store operations once an update to
 that location has started, until the update is complete,
 subject to the following special case.
-\item Accessing a location in the
+\item\label{rule:u2}
+Accessing a location in the
 window that is also the target of a remote update is valid (not
 erroneous) but the precise result will depend on the behavior of the
 implementation.  Updates from a remote process will appear in the memory of
@@ -3236,7 +3379,8 @@
 may produce unexpected results.
 \end{users}

-\item Updating a location in the
+\item\label{rule:u3}
+Updating a location in the
 window with a store operation
   that is also the target of a remote read (but not update) is valid
   (not erroneous) but the precise result will depend on the behavior
@@ -3253,7 +3397,7 @@
   behavior only if the other rules given here and
   elsewhere in this chapter
   are followed.
-\item
+\item\label{rule:u4}
 A location in a window must not be accessed as a
 target of an \RMA/
 operation once an update to that location has started and until the
@@ -3263,7 +3407,7 @@
 predefined datatype on the same window. Additional restrictions on the
 operation apply; see the info key \mpiarg{accumulate\_ops} in
 Section~\ref{chap:one-side-2:win_create}.
-\item
+\item\label{rule:u5}
 A put or accumulate must not access a target
 window once a store, put, or
 accumulate update to another (overlapping) target window
@@ -3289,13 +3433,14 @@
 Example~\ref{ex:shmem-sync}.
 \end{users}

-Note that \mpifunc{MPI\_WIN\_FLUSH} and \mpifunc{MPI\_WIN\_FLUSH\_ALL}
-may be used within a passive target epoch to complete \RMA/
-operations at the target process.
+%% B3.1
+%Note that \mpifunc{MPI\_WIN\_FLUSH} and \mpifunc{MPI\_WIN\_FLUSH\_ALL}
+%may be used within a passive target epoch to complete \RMA/
+%operations at the target process.
+%% E3.1

 A program that violates these rules has undefined behavior.

-
 \begin{users}
 A user can write correct programs by following the following rules:
 \begin{description}
@@ -3326,7 +3471,7 @@
 they may conflict.  Nonconflicting accesses (such as read-only accesses
 or accumulate accesses) are protected by shared locks,
 both for load/store accesses and for \RMA/ accesses.
-\item[changing window or synchronization mode:]
+\item[changing window or synchronization mode:]\quad
 %\hskip 0pt plus 2em minus 0em
 One can change synchronization mode, or change the window used to
 access a location that belongs to two overlapping windows, when the
@@ -3404,11 +3549,11 @@
 %%SKIP
 %%ENDHEADER
 \begin{verbatim}
-Process A:                 Process B:
-                           window location X
-
-                           store X /* update to private&public copy of B */
-MPI_Barrier                MPI_Barrier
+Process A:             Process B:
+                       window location X
+
+                       store X /* update to private & public copy of B */
+MPI_Barrier            MPI_Barrier
 MPI_Win_lock_all
 MPI_Get(X) /* ok, read from window */
 MPI_Win_flush_local(B)
@@ -3684,7 +3829,14 @@
 \mpifunc{MPI\_ACCUMULATE}) are executed and committed in program order.
 Ordering only applies to operations originating at the same origin that
 access overlapping target memory regions. \MPI/ does not provide any
-guarantees for accesses or updates from different origins to overlapping
+guarantees for accesses or updates from different
+%% B3.1
+\color{red}%
+% origins
+origin processes
+%% E3.1
+\color{black}%
+to overlapping
 target memory regions.

 The default strict ordering may incur a significant performance penalty.
@@ -3703,7 +3855,13 @@
 whether operations of the specified type complete in the order they were
 issued.
 For example, \infoval{raw} means that any writes must complete at the target
-before any reads.  These ordering requirements apply only to operations issued
+%% B3.1
+\color{red}%
+%before any reads.
+before subsequent reads.
+%% E3.1
+\color{black}%
+These ordering requirements apply only to operations issued
 by the same origin process and targeting the same target process.
 The default value for \infokey{accumulate\_ordering} is
 \constskip{rar,raw,war,waw}, which implies that writes complete at the target
@@ -3932,7 +4090,7 @@
     MPI_Put(&frombuf[i], 1, fromtype[i], toneighbor[i],
                          todisp[i], 1, totype[i], win);
   MPI_Win_fence((MPI_MODE_NOSTORE | MPI_MODE_NOSUCCEED), win);
-  }
+}
 \end{verbatim}
 The same code could be written with get rather than put.  Note that,
 during the communication phase, each
@@ -3972,7 +4130,7 @@
                     fromdisp[i], 1, fromtype[i], win);
   update_core(A);
   MPI_Win_fence(MPI_MODE_NOSUCCEED, win);
-  }
+}
 \end{verbatim}
 The get communication can be concurrent with the core update, since
 they do not access the same locations, and the local update of the
@@ -4013,7 +4171,7 @@
                          todisp[i], 1, totype[i], win);
   MPI_Win_complete(win);
   MPI_Win_wait(win);
-  }
+}
 \end{verbatim}
 \end{example}

@@ -4045,7 +4203,7 @@
   update_core(A);
   MPI_Win_complete(win);
   MPI_Win_wait(win);
-  }
+}
 \end{verbatim}
 \end{example}

@@ -4103,7 +4261,7 @@
     MPI_Win_post(neighbors, (MPI_MODE_NOCHECK | MPI_MODE_NOPUT), win0);
   MPI_Win_complete(win1);
   MPI_Win_wait(win1);
-  }
+}
 \end{verbatim}

 A process posts the local window associated with
@@ -4277,8 +4435,10 @@
 must be supplemented with a memory synchronization through calls to
 \mpifunc{MPI\_WIN\_SYNC}, which act locally as a processor-memory barrier. In
 Fortran, if \const{MPI\_ASYNC\_PROTECTS\_NONBLOCKING} is
+\exindex{MPI\_ASYNC\_PROTECTS\_NONBLOCKING}%
 \code{.FALSE.}
 or the variable \code{X} is not declared as \code{ASYNCHRONOUS},
+\exindex{ASYNCHRONOUS}%
 reordering of the accesses to the
 variable \code{X} must be prevented with \mpifunc{MPI\_F\_SYNC\_REG}
 operations. (No equivalent function is needed in C.)
@@ -4290,6 +4450,10 @@
 operation performed by process B ensures that process A's updates to \code{X}
 are visible to process B.

+\exindex{MPI\_Win\_lock\_all}%
+\exindex{MPI\_Win\_sync!shared memory windows}%
+\exindex{Shared memory windows!MPI\_Win\_sync}%
+\exindex{MPI\_F\_sync\_reg}%
 %%HEADER
 %%SKIP
 %%ENDHEADER
@@ -4361,11 +4525,11 @@
  else
    MPI_Waitany(M, put_req, &j, MPI_STATUS_IGNORE);

- MPI_Rget(data[j], N, MPI_DOUBLE, target, i*N, N, MPI_DOUBLE, win,
+ MPI_Rget(data[j], N, MPI_DOUBLE, target, i*N, N, MPI_DOUBLE, win,
           &get_req);
  MPI_Wait(&get_req,MPI_STATUS_IGNORE);
  compute(i, data[j], ...);
- MPI_Rput(data[j], N, MPI_DOUBLE, target, i*N, N, MPI_DOUBLE, win,
+ MPI_Rput(data[j], N, MPI_DOUBLE, target, i*N, N, MPI_DOUBLE, win,
           &put_req[j]);
 }

@@ -4448,7 +4612,7 @@
   elem_ptr->next  = nil;
   MPI_Win_attach(win, elem_ptr, sizeof(llist_elem_t));

-  /* Add the element to the list of local elements so we can free
+  /* Add the element to the list of local elements so we can free
      it later. */
   if (my_elems_size == my_elems_count) {
     my_elems_size += 100;
Index: getlatex
===================================================================
--- getlatex	(revision 2030)
+++ getlatex	(working copy)
@@ -48,6 +48,7 @@
     '>' => 1,
     ','=> 1,
     '=' => 1,
+    '~' => 1,    # accent
     'kill' => 1,
     'linebreak' => 1,
     'newline' => 1,
@@ -231,9 +232,17 @@
     'constitemthree' => 1,
     'constskipitem' => 1,
     'code' => 1,
+    'mpiublb' => 1,
     'mpicode' => 1,
     'mpiterm' => 1,
+    'mpitermindex' => 1,
     'mpitermni' => 1,
+    'mpitermtitleindex' => 1,
+    'mpitermdefni' => 1,
+    'mpitermdef' => 1,
+    'mpitermdefindex' => 1,
+    'mpitermtitleindexsubmain' => 1,
+    'mpitermtitleindexmainsub' => 1,
     'MPIreplace' => 1,
     'MPIupdate' => 1,
     'MPIdelete' => 1,
@@ -395,9 +404,9 @@
 	    }
 	    $inline2 = $before . $after1 .$after2;
 	}
-	# Look for " \ref{...}"
+	# Look for " \ref{...}" but not ", \ref{...}"
 	$inline2 = $inline;
-	while ($inline2 =~ /^(.*) \\ref{([^}]+)}(.*)/) {
+	while ($inline2 =~ /^([^,]*) \\ref{([^}]+)}(.*)/) {
 	    $before = $1;
 	    $after1  = $2;
 	    $after2  = $3;
@@ -414,6 +423,12 @@
 		print "$file:$linecount:double quote character: $_";
 	    }
 	}
+	# Look for punctuation on the wrong side of a quote
+	if (/\'[\.,]/) {
+	    # Note that quotes are sometimes used in the document
+	    # incorrectly to mark an MPI term.  Those should be fixed instead.
+	    print "$file:$linecount: check punctunation in quote: $_";
+	}
 	# Look for commands
 	while ($inline =~ /([^\\]*)\\([A-Za-z]*)(.*)/) {
 	    $before = $1;
Index: chap-frontm/history.tex
===================================================================
--- chap-frontm/history.tex	(revision 2030)
+++ chap-frontm/history.tex	(working copy)
@@ -3,10 +3,13 @@
 % Provide only the highest level summary here.

 \paragraph{Version 3.1:  XX, XX, 2015.}
-This docuemnt contains mostly corrections and clarifications to the
+This document contains mostly corrections and clarifications to the
 \mpiiiidoto/ document.  The largest change is a correction to the Fortran
-binding introduced in \mpiiii/.  A few routines were added but correct
-\mpiiii/ programs are correct \mpiiiidoti/ programs.
+bindings introduced in \mpiiiidoto/.
+Additionally, new functions added
+include routines to manipulate \code{MPI\_Aint} values in a portable manner, nonblocking
+collective I/O routines, and routines to get the index value by name for
+\mpiskipfunc{MPI\_T} performance and control variables.

 % It would be a terrible mistake to list all changes to MPI-3 here.  If
 % this list is unsatisfactory, the next best option is to list *no*
@@ -23,7 +26,7 @@
 considered a major update to the \MPI/ standard.  As with previous
 versions, new features have been adopted only when there were
 compelling needs for the users.  Some features, however, may have more
-than a minor impact on existing \MPI/ implementations.
+than a minor impact on existing \MPI/ implementations.

 \paragraph{Version 2.2:  September 4, 2009.}
 This document contains mostly corrections and clarifications to the
@@ -50,7 +53,7 @@
 \paragraph{Version 1.3: May 30, 2008.}
 This document combines the previous
 documents \mpiidoti/ (June 12, 1995) and the \mpiidotii/ Chapter in \mpiii/
-<(July 18, 1997). Additional errata collected by the \mpi/ Forum
+(July 18, 1997). Additional errata collected by the \mpi/ Forum
 referring to \mpiidoti/ and \mpiidotii/ are also included in this
 document.

Index: chap-frontm/abstract-cpy.tex
===================================================================
--- chap-frontm/abstract-cpy.tex	(revision 2030)
+++ chap-frontm/abstract-cpy.tex	(working copy)
@@ -10,7 +10,7 @@
 %  extended collective operations, external interfaces, I/O, and
 %  additional language bindings.
    This document describes the Message-Passing Interface (\mpi/)
-   standard, version 3.0.
+   standard, version 3.1.
    The \mpi/ standard includes point-to-point message-passing,
    collective communications, group and communicator concepts,
    process topologies, environmental management,
@@ -20,7 +20,7 @@
    Language bindings for C and Fortran are defined.

    Historically, the evolution of the standards is from
-   \mpiidoto/ (June 1994) to \mpiidoti/ (June 12, 1995) to
+   \mpiidoto/ (May 5, 1994) to \mpiidoti/ (June 12, 1995) to
    \mpiidotii/ (July 18, 1997), with several clarifications and additions
    and published as part of the \mpiii/ document, to
    \mpiiidoto/ (July 18, 1997), with new functionality, to
@@ -31,9 +31,10 @@
    to one combined document,
    and to \mpiiidoti/ (June 23, 2008), combining the
    previous documents.
-   Version \mpiiidotii/ (September 2009) added
+   Version \mpiiidotii/ (September 4, 2009) added
    additional clarifications and seven new routines.
-   This version, \mpiiiidoto/, is an extension of \mpiiidotii/.
+   Version \mpiiiidoto/ (September 21, 2012) is an extension of \mpiiidotii/.
+   This version, \mpiiiidoti/, adds clarifications and minor extensions to  \mpiiiidoto/

 \paragraph{Comments.}
 Please send comments on \MPI/ to the \MPI/ Forum as follows:
@@ -78,7 +79,7 @@

 \vfill%%ALLOWLATEX%

-\copyright 1993, 1994, 1995, 1996, 1997,
+\noindent \copyright 1993, 1994, 1995, 1996, 1997,
 2008, 2009, 2012, 2015
 University of Tennessee, Knoxville, Tennessee.
 Permission to copy without fee all or part of this material is
Index: chap-frontm/credits.tex
===================================================================
--- chap-frontm/credits.tex	(revision 2030)
+++ chap-frontm/credits.tex	(working copy)
@@ -122,6 +122,8 @@
 and by the Commission of the European Community through Esprit project
 P6643 (PPPE).

+\medskip%ALLOWLATEX%
+
 \subsection*{\MPIIDOTII/ and \MPIIIDOTO/:}

 Those who served as primary coordinators in \mpiidotii/ and \mpiiidoto/ are:
@@ -282,6 +284,8 @@
 The following institutions supported the \MPIII/ effort through
 time and travel support for the people listed above.

+\medskip%ALLOWLATEX%
+
 \begin{obeylines}
 Argonne National Laboratory
 Bolt, Beranek, and Newman
@@ -310,7 +314,7 @@
 National Institute of Standards and Technology
 National Oceanic and Atmospheric Adminstration
 Oak Ridge National Laboratory
-Ohio State University
+The Ohio State University
 PALLAS GmbH
 Pacific Northwest National Laboratory
 Pratt \& Whitney
@@ -337,6 +341,8 @@
 University of Wisconsin
 \end{obeylines}

+\medskip%ALLOWLATEX%
+
 \MPIII/ operated on a very tight budget (in reality, it had no budget when
 the first meeting was announced).  Many institutions helped the
 \MPIII/ effort by supporting the efforts and travel of the members of
@@ -344,6 +350,8 @@
 contract CDA-9115428 for travel by U.S. academic participants and
 Esprit under project HPC Standards (21111) for European participants.

+\medskip%ALLOWLATEX%
+
 \subsection*{\MPIIDOTIII/ and \MPIIIDOTI/:}

 The editors and organizers of the combined documents have been:
@@ -438,6 +446,8 @@
 The following institutions supported the \MPIII/ effort through
 time and travel support for the people listed above.

+\medskip%ALLOWLATEX%
+
 \begin{obeylines}
 Argonne National Laboratory
 Bull
@@ -458,7 +468,7 @@
 Myricom
 NEC Laboratories Europe, NEC Europe Ltd.
 Oak Ridge National Laboratory
-Ohio State University
+The Ohio State University
 Pacific Northwest National Laboratory
 QLogic Corporation
 Sandia National Laboratories
@@ -473,11 +483,14 @@
 University of Wisconsin
 \end{obeylines}

+\medskip%ALLOWLATEX%
+
 Funding for the \MPI/ Forum meetings was partially supported by award
 \#CCF-0816909 from the National Science Foundation.
 In addition, the HDF Group provided travel support for one
 U.S. academic.

+\medskip%ALLOWLATEX%

 \subsection*{\MPIIIDOTII/:}
 All chapters have been revisited to achieve a consistent \MPIIIDOTII/ text.
@@ -593,6 +606,8 @@
 The following institutions supported the \MPIIIDOTII/ effort through
 time and travel support for the people listed above.

+\medskip%ALLOWLATEX%
+
 \begin{obeylines}
 Argonne National Laboratory
 Auburn University
@@ -617,7 +632,7 @@
 Myricom
 NEC Corporation
 Oak Ridge National Laboratory
-Ohio State University
+The Ohio State University
 Pacific Northwest National Laboratory
 QLogic Corporation
 RunTime Computing Solutions, LLC
@@ -635,17 +650,19 @@
 University of Wisconsin
 \end{obeylines}

+\medskip%ALLOWLATEX%
+
 Funding for the \MPI/ Forum meetings was partially supported by
 awards
 \#CCF-0816909 and \#CCF-1144042 from the National Science Foundation.
 In addition, the HDF Group provided travel support for one
 U.S. academic.

-\subsection*{\MPIIII/:}
-\MPIIII/ is a signficant effort to extend and modernize the \MPI/
+\subsection*{\mpiiiidoto/:}
+\mpiiiidoto/ is a signficant effort to extend and modernize the \MPI/
 Standard.

-\noindent The editors and organizers of the \MPIIII/ have been:
+\noindent The editors and organizers of the \mpiiiidoto/ have been:
 %\emph{Taken from \MPIIIDOTII/ with minor corrections.  Need to
 %  separate the working groups list (which is currently reviewers) from
 %  the primary authors .  Also, did I miss active steering committee members?}
@@ -654,7 +671,7 @@
 Groups, Contexts, and
   Communicators, One-Sided Communications, and Bibliography
 \item Richard Graham, Steering committee, Point-to-Point Communication, Meeting
-Convener, and \MPIIII/ chair
+Convener, and \mpiiiidoto/ chair
 \item Torsten Hoefler, Collective Communication, One-Sided
 Communications, and Process Topologies
 \item George Bosilca, Datatypes and Environmental Management
@@ -662,7 +679,7 @@
 \item Bronis R.~de Supinski, External Interfaces and Tool Support
 \item Rajeev Thakur, I/O and One-Sided Communications
 \item Darius Buntinas, Info Object
-\item Jeffrey M.~Squyres, Language Bindings and \MPIIII/ Secretary
+\item Jeffrey M.~Squyres, Language Bindings and \mpiiiidoto/ Secretary
 \item Rolf Rabenseifner, Steering committee, Terms and Definitions, and
   Fortran Bindings,
   Deprecated Functions, Annex Change-Log, and Annex Language Bindings
@@ -802,6 +819,8 @@
 The following institutions supported the \MPIIII/ effort through
 time and travel support for the people listed above.

+\medskip%ALLOWLATEX%
+
 \begin{obeylines}
 Argonne National Laboratory
 Bull
@@ -850,6 +869,8 @@
 University of Tokyo
 \end{obeylines}

+\medskip%ALLOWLATEX%
+
 Funding for the \MPI/ Forum meetings was partially supported by
 awards
 \#CCF-0816909 and \#CCF-1144042 from the National
@@ -862,13 +883,15 @@

 \noindent The editors and organizers of the \MPIIIIDOTI/ have been:
 \begin{itemize}
-\item Martin Schulz, Meeting Convener, and \MPIIIIDOTI/ chair
+\item Martin Schulz, \MPIIIIDOTI/ chair
 \item William Gropp, Steering committee, Front matter, Introduction,
 One-Sided Communications, and Bibliography; Overall editor
 \item Rolf Rabenseifner, Steering committee, Terms and Definitions, and
   Fortran Bindings,
   Deprecated Functions, Annex Change-Log, and Annex Language Bindings
-\item Dan Holmes, Point-to-Point Communication,
+\item Richard L.~Graham,  Steering committee, Meeting Convener
+\item Jeffrey M.~Squyres, Language Bindings and \MPIIIIDOTI/ Secretary
+\item Daniel Holmes, Point-to-Point Communication
 \item George Bosilca, Datatypes and Environmental Management
 \item Torsten Hoefler, Collective Communication and Process Topologies
 \item Pavan Balaji, Groups, Contexts, and Communicators, and External Interfaces
@@ -877,54 +900,59 @@
 \item Quincey Koziol, I/O
 \item Kathryn Mohror, Tool Support
 \item Rajeev Thakur, One-Sided Communications
-\item Jeffrey M.~Squyres, Language Bindings and \MPIIIIDOTI/ Secretary
 \end{itemize}

 The following list includes some of the active participants who
-attended \MPIIII/ Forum meetings or participated in the e-mail
+attended \MPI/ Forum meetings or participated in the e-mail
 discussions. % and who are not mentioned above.

 \begin{center}
 \begin{tabular}{lll}
 Charles	Archer&
 Pavan	Balaji&
-Purushotham	Bangalore\\
+Purushotham V.~Bangalore\\
 Brian	Barrett&
 Wesley	Bland&
 Michael	Blocksome\\
 George	Bosilca&
 Aurelien	Bouteiller&
 Devendar	Bureddy\\
+Yohann	Burette &
 Mohamad	Chaarawi&
-Alexey	Cheptsov&
-James	Dinan\\
-Thomas	Francois&
-Balazs	Gerofi&
-Paddy	Gillies\\
+Alexey	Cheptsov\\
+James	Dinan&
+Dmitry	Durnov&
+Thomas	Francois\\
+Edgar	Gabriel&
+Todd	Gamblin&
+Balazs	Gerofi\\
+Paddy	Gillies&
 David	Goodell&
-Manjunath	Gorentla&
-Richard	Graham\\
-Ryan	Grant&
-William	Gropp&
-Khaled	Hamidouche\\
+Manjunath	Gorentla Venkata\\
+Richard L.~Graham&
+Ryan E.~Grant&
+William	Gropp\\
+Khaled	Hamidouche&
 Jeff	Hammond&
-Amin	Hassani&
-Marc-Andre	Hermanns\\
+Amin	Hassani\\
+Marc-Andr\'e	Hermanns&
 Nathan	Hjelm&
-Torsten	Hoefler&
-Daniel	Holmes\\
+Torsten	Hoefler\\
+Daniel	Holmes&
 Atsushi	Hori&
-Yutaka	Ishikawa&
-Hideyuki	Jitsumoto\\
+Yutaka	Ishikawa\\
+Hideyuki	Jitsumoto&
 Jithin	Jose&
-Krishna	Kandalla&
-Christos	Kavouklis\\
+Krishna	Kandalla\\
+Christos	Kavouklis&
 Takahiro	Kawashima&
-Chulho	Kim&
-Alice	Koniges\\
-Qunciey	Kozoil&
+Chulho	Kim\\
+Michael	Knobloch&
+Alice	Koniges&
+Quincey	Koziol\\
 Sameer	Kumar&
-Joshua	Ladd\\
+Joshua	Ladd&
+Ignacio	Laguna\\
 Huiwei	Lu&
 Guillaume	Mercier&
 Kathryn	Mohror\\
@@ -932,7 +960,7 @@
 Tomotake	Nakamura&
 Takeshi	Nanri\\
 Steve	Oyanagi&
-Antonio	Pena&
+Antonio J.~P\~ena&
 Sreeram	Potluri\\
 Howard	Pritchard&
 Rolf	Rabenseifner&
@@ -948,21 +976,29 @@
 Anthony	Skjellum\\
 Brian	Smith&
 David	Solt&
-Jeff	Squyres\\
+Jeffrey M.~Squyres\\
+\end{tabular}
+\end{center}
+\begin{center}
+\begin{tabular}{lll}
+Hari	Subramoni&
 Shinji	Sumimoto&
-Alexander	Supalov&
-Sayantan	Sur\\
-Masamichi	Takagi&
-Kieta	Teranishi&
-Rajeev	Thakur\\
-Fabian	Tillier&
+Alexander	Supalov\\
+Bronis R.~de Supinski&
+Sayantan	Sur&
+Masamichi	Takagi\\
+Keita	Teranishi&
+Rajeev	Thakur&
+Fabian	Tillier\\
 Yuichi	Tsujita&
+Geoffroy	Vall\'ee&
 Rolf	vandeVaart\\
 Akshay	Venkatesh&
 Jerome	Vienne&
 Venkat	Vishwanath\\
 Anh	Vo&
-Junchao	Zhang&
+Huseyin S.~Yildiz&
+Junchao	Zhang\\
 Xin	Zhao
 \end{tabular}
 \end{center}
@@ -977,17 +1013,19 @@
 The following institutions supported the \MPIIIIDOTI/ effort through
 time and travel support for the people listed above.

+\medskip%ALLOWLATEX%
+
 \begin{obeylines}
+Argonne National Laboratory
 Auburn University
 Cisco Systems, Inc.
 Cray
-EPCC
+EPCC, The University of Edinburgh
 ETH Zurich
 Forschungszentrum J\"ulich
 Fujitsu
 German Research School for Simulation Sciences
-HDF Group
-HLRS
+The HDF Group
 International Business Machines
 INRIA
 Intel Corporation
@@ -997,18 +1035,21 @@
 Lawrence Livermore National Laboratory
 Lenovo
 Los Alamos National Laboratory
-Mellanox
-Microsoft
-Nvidia
+Mellanox Technologies, Inc.
+Microsoft Corporation
+NEC Corporation
+NVIDIA Corporation
 Oak Ridge National Laboratory
-Ohio State University
-Riken AICS
-Sandia National Laboratory
+The Ohio State University
+RIKEN AICS
+Sandia National Laboratories
 Texas Advanced Computing Center
-Tokya Institute of Technology
-University of Alabama Birmingham
+Tokyo Institute of Technology
+University of Alabama at Birmingham
+University of Houston
 University of Illinois at Urbana-Champaign
-University Oregon
-University of Tennessee Knoxville
+University of Oregon
+University of Stuttgart, High Performance Computing Center Stuttgart (HLRS)
+University of Tennessee, Knoxville
 University of Tokyo
 \end{obeylines}