diff --git a/doc/manual/unsup.tex b/doc/manual/unsup.tex index b71d4c45..bb6d6b7c 100644 --- a/doc/manual/unsup.tex +++ b/doc/manual/unsup.tex @@ -68,6 +68,11 @@ \section{Source code} scaling factors (linear or dB), and adding a DC level. For Unix/MSDOS. +\item[random.c:] + randomization tool for selecting items from a list or + drawing numbers from a range. Reuses the EID linear + congruential generator from eid.c. For Unix/MSDOS. + \item[sb.c:] swap bytes for word-oriented files. For VMS/Unix/MSDOS. @@ -119,3 +124,178 @@ \section{Test files} \end{verbatim}} \end{Descr} + + +%---------------------------------------------------------------------- +\section{The {\tt random} tool} +\label{sec:random} +%---------------------------------------------------------------------- + +%---------------------------------------------------------------------- +\subsection{Introduction} +%---------------------------------------------------------------------- + +The {\tt random} tool provides deterministic pseudo-random selection +capabilities for use in speech and audio codec evaluation scripts. It +was originally developed for the EVS (Enhanced Voice Services) codec +processing scripts as specified in 3GPP S4-121078. + +The tool supports two modes of operation: +\begin{itemize} +\item {\bf Subset selection:} randomly select one or more items from a + provided list, sampling without replacement. +\item {\bf Range mode:} draw random integers from a specified numeric + range. +\end{itemize} + +Deterministic behavior is achieved through a seed-based linear +congruential sequence (LCS) generator, ensuring reproducible results +across platforms when the same seed is used. + +%---------------------------------------------------------------------- +\subsection{Description of the algorithm} +%---------------------------------------------------------------------- + +The pseudo-random number generator is inherited from the EID tool +({\tt eid.c}) in the STL. It implements a linear congruential +generator as described by Knuth~\cite{Knuth}: + +\[ + \mathrm{seed}_{n+1} = (69069 \times \mathrm{seed}_n + 1) \bmod 2^{32} +\] + +The generator returns a uniformly distributed value in $[0, 1)$ by +scaling: +\[ + r = 2^{-32} \times \mathrm{seed} +\] + +The {\tt unsigned int} type (32 bits on all modern platforms) is used +to ensure identical behavior on Windows, Linux, and macOS. + +\subsubsection{Subset mode} + +In subset mode, the tool selects {\em n} items from a list of {\em N} +items without replacement. For each selection: +\begin{enumerate} +\item A random index $k = \lfloor N \cdot r \rfloor$ is computed. +\item The item at position $k$ is output. +\item The item is removed from the list (remaining items shift down). +\item $N$ is decremented for the next draw. +\end{enumerate} + +\subsubsection{Range mode} + +In range mode, the tool draws {\em n} random integers from the +inclusive range $[\mathrm{start}, \mathrm{stop}]$. Each value is computed +as: +\[ + v = \mathrm{start} + \lfloor (\mathrm{stop} - \mathrm{start} + 1) \cdot r \rfloor +\] + +Note that in range mode, values are drawn independently (with +replacement). + +\subsubsection{Dummy pre-runs} + +An optional number of dummy pre-runs can be specified. These advance +the RNG state before selection begins, effectively providing a +secondary diversification mechanism beyond the seed value. + +%---------------------------------------------------------------------- +\subsection{Implementation} +%---------------------------------------------------------------------- + +The tool is implemented as a single source file {\tt random.c} in the +{\tt src/unsup/} directory. + +\subsubsection{Usage} + +{\tt +\begin{verbatim} +random [OPTIONS] [ITEM_LIST] +\end{verbatim} +} + +\subsubsection{Options} + +\begin{Descr}{40mm} +\item[\tt -s SEED] + Seed for the random number generator. Any value between 0 and + $2^{32}-1$. Default: 3141592653. + +\item[\tt -d PRERUNS] + Number of dummy pre-runs to advance the RNG state before + selection begins. Default: 0. + +\item[\tt -r START STOP] + Range mode: draw random integers from the inclusive range + $[\mathrm{START}, \mathrm{STOP}]$. When this option is specified, + no item list is expected. + +\item[\tt -n NUM\_ITEMS] + Number of items to select (subset mode) or numbers to draw + (range mode). Default: 1. + +\item[\tt -v] + Verbose output: shows the internal state of the item list + after each selection (subset mode only). + +\item[\tt -h] + Print usage information and exit. +\end{Descr} + +\subsubsection{Examples} + +\paragraph{Select one item from a list:} +{\tt +\begin{verbatim} +$ random -s 42 alpha bravo charlie delta echo +alpha +\end{verbatim} +} + +\paragraph{Select 3 items from a list with 3 dummy pre-runs:} +{\tt +\begin{verbatim} +$ random -s 42 -d 3 -n 3 alpha bravo charlie delta echo +bravo delta charlie +\end{verbatim} +} + +\paragraph{Draw 5 random integers from a range:} +{\tt +\begin{verbatim} +$ random -s 12345 -r 10 20 -n 5 +12 19 12 14 14 +\end{verbatim} +} + +\subsubsection{Limitations} + +\begin{itemize} +\item Maximum of 1000 items in subset mode. +\item The seed is parsed with {\tt atoi()}, limiting practical input to + signed integer range on some platforms. For full 32-bit range, + numeric overflow wraps as expected for unsigned arithmetic. +\end{itemize} + +%---------------------------------------------------------------------- +\subsection{Tests and portability} +%---------------------------------------------------------------------- + +Three CTest test cases validate the tool: + +\begin{enumerate} +\item {\bf random1:} Range mode with seed 12345, range $[10, 20]$, 5 + draws. Expected output: ``12 19 12 14 14''. +\item {\bf random2:} Subset mode with seed 42, selecting 1 item from 5. + Expected output starts with ``alpha''. +\item {\bf random3:} Subset mode with seed 42, 3 dummy pre-runs, + selecting 3 items from 5. Expected output: ``bravo delta charlie''. +\end{enumerate} + +Tests use {\tt PASS\_REGULAR\_EXPRESSION} to verify stdout matches +expected values, ensuring cross-platform reproducibility of the RNG +sequence. + diff --git a/src/unsup/CMakeLists.txt b/src/unsup/CMakeLists.txt old mode 100644 new mode 100755 index 03944831..837adbcf --- a/src/unsup/CMakeLists.txt +++ b/src/unsup/CMakeLists.txt @@ -39,6 +39,9 @@ add_executable(sh2chr sh2chr.c) add_executable(crc getcrc32.c) +add_executable(random random.c) +target_link_libraries(random ${M_LIBRARY}) + add_test(astrip1 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/astrip -q -smooth -wlen 145 -sample test_data/cftest1.dat test_data/astrip.smp) add_test(astrip1-verify ${CMAKE_COMMAND} -E compare_files test_data/astrip.smp test_data/astrip.ref) @@ -71,3 +74,12 @@ add_test(fdelay5-verify ${CMAKE_COMMAND} -E compare_files test_data/delay-15.tst add_test(fdelay6 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/fdelay -hex 0xF test_data/litend.src test_data/delay-0f.tst) add_test(fdelay6-verify ${CMAKE_COMMAND} -E compare_files test_data/delay-0f.tst test_data/delay-15.ref) + +add_test(random1 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/random -s 12345 -r 10 20 -n 5) +set_tests_properties(random1 PROPERTIES PASS_REGULAR_EXPRESSION "12 19 12 14 14") + +add_test(random2 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/random -s 42 alpha bravo charlie delta echo) +set_tests_properties(random2 PROPERTIES PASS_REGULAR_EXPRESSION "^alpha ") + +add_test(random3 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/random -s 42 -d 3 -n 3 alpha bravo charlie delta echo) +set_tests_properties(random3 PROPERTIES PASS_REGULAR_EXPRESSION "bravo delta charlie") diff --git a/src/unsup/README.md b/src/unsup/README.md index 4a414471..5a58be27 100644 --- a/src/unsup/README.md +++ b/src/unsup/README.md @@ -56,6 +56,10 @@ oper.c: implement arithmetic operation on two files: add, subtract, multiply or divide two files applying scaling factors (linear or dB), and adding a DC level. +random.c: randomization tool for selecting items from a list or drawing + numbers from a range. Reuses the EID linear congruential + generator from eid.c. + sb.c swap bytes for word-oriented files. For VMS/Unix/MSDOS. sh2chr.c: convert short-oriented (16-bit words) files to char-oriented diff --git a/src/unsup/random.c b/src/unsup/random.c new file mode 100644 index 00000000..d15e626a --- /dev/null +++ b/src/unsup/random.c @@ -0,0 +1,225 @@ +/*---------------------------------------------------------------------------* + * Randomization tool, V1.1 * + * ------------------------------------------ * + * (C) 2012 Fraunhofer IIS. All rights reserved. * + * * + * =============================================================== * + * COPYRIGHT NOTE: This source code, and all of its derivations, * + * is subject to the "ITU-T General Public License". Please have * + * it read in the distribution disk, or in the ITU-T Recommendation * + * G.191 on "SOFTWARE TOOLS FOR SPEECH AND AUDIO CODING STANDARDS". * + * See LICENSE.md in the top-level directory for terms. * + * =============================================================== * + * * + * The LCS generator is copied from ITU-T G.191, file STL/eid.c. * + * * + * Fraunhofer IIS makes no representation nor warranty in regard to * + * the accuracy, completeness or sufficiency of The Software, nor * + * shall Fraunhofer IIS be held liable for any damages whatsoever * + * relating to use of said Software. * + *---------------------------------------------------------------------------*/ + +#include +#include +#include +#include +#include + +#define MAX_ITEMS 1000 +#define VERBOSE 0 +#define DEFAULT_SEED 3141592653U + +typedef enum { + SUBSET = 0, + FROM_RANGE = 1 +} MODE; + +/* +============================================================================ + double EID_random (unsigned long *seed); inherited from STL/eid.c, + ~~~~~~~~~~~~~~~~~ + + Description: + ~~~~~~~~~~~~ + Returns a new random number, generated a linear congruential + sequence (LCS) generator. See: Knuth, D.E. 1981: "Seminumerical + Algorithms" vol.2 of The Art of Computer Programming; Reading, + Mass.; Addison-Wesley. + + Parameters: + ~~~~~~~~~~~ + seed: ... long seed. + + Return value: + ~~~~~~~~~~~~~ + Returns a random number as double in the range 0..1. + + Author: + ~~~~~~~ + History: + ~~~~~~~~ + 28.Feb.92 v1.0 Release of 1st version + 20.Apr.92 v2.0 Change of polinomial generator to LCG + + 26.Jan.98 v3.0 Corrected bug for 64-bit operating systems (where + longs have 64, not 32 bits). Implemented by + , after bug reported + by + 12.Aug.12 v4.0 Changed unsigned long to unsigned int to have identical + behavior on at least Windows, Linux, OSX +============================================================================ */ + +static double EID_random(seed) +unsigned int *seed; +{ + /* Size in bits (=size in bytes * 8) for int variables*/ + static double bits_in_int = sizeof(unsigned int) * 8; + /* Update RNG */ + *seed = ((unsigned int)69069L * (*seed) +1L); + /* Return random number as a double */ +#ifdef WAS + return(pow((double)2.0, (double)-32.0) * (double)(*seed)); +#else + return(pow((double)2.0, -bits_in_int) * (double)(*seed)); +#endif +} + +static void usage() +{ + fprintf( stdout,"usage: random [OPTIONS] [ITEM_LIST]\n" ); + fprintf( stdout," OPTIONS:\n" ); + fprintf( stdout," -s SEED any number in between 0 and %u: as a seed for the RNG, default: %u\n", UINT_MAX, DEFAULT_SEED ); + fprintf( stdout," -d PRERUNS number of dummy pre-runs to alter the seed, default: 0\n" ); + fprintf( stdout," -r START STOP range mode, values define lowest and highest allowed value, not set by default\n" ); + fprintf( stdout," -n NUM_ITEMS select more than one item from the provided item list, default: 1\n" ); + exit(-1); +} + +int main( int argc, char** argv ) +{ + unsigned int seed = DEFAULT_SEED; + MODE mode = SUBSET; + unsigned int num_items = 1; + unsigned int num_dummyruns = 0; + unsigned int items_in_list = 0; + unsigned int selected_item = 0; + unsigned int verbose = 0; + unsigned int i = 0, j = 0; + unsigned int range_start = 0, range_stop = 1; + double rand_f = 0.0f; + char * item_list [MAX_ITEMS] = {0}; + + for(i=1; i= argc) usage(); + seed = atoi(argv[i]); + continue; + } + /* num items to select */ + if(!strcmp(argv[i],"-n")) { + if(++i >= argc) usage(); + num_items = atoi(argv[i]); + continue; + } + /* num dummy runs before the item selection is started - similar to different seed */ + if(!strcmp(argv[i],"-d")) { + if(++i >= argc) usage(); + num_dummyruns = atoi(argv[i]); + continue; + } + /* select item within given range instead of a subset */ + if(!strcmp(argv[i],"-r")) { + mode = FROM_RANGE; + if(++i >= argc) usage(); + range_start = atoi(argv[i]); + if(++i >= argc) usage(); + range_stop = atoi(argv[i]); + continue; + } + break; + } + + if(mode == SUBSET) { + /* list items are the last */ + if(i == (argc)) usage(); + items_in_list = argc - i; + if(items_in_list > MAX_ITEMS) + { + fprintf(stderr, "only up to %u items are supported by this tool\n", MAX_ITEMS); + exit(-2); + } + } + + for(i=0; i items_in_list) + { + fprintf(stderr, "requested %u items, but only %u in list\n", num_items, items_in_list); + exit (-1); + } + + for(i=0; iselected_item && i