Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d5a49c7
feat(p56): extend speech voltmeter to support arbitrary bit depths
Apr 28, 2026
5aa41cb
chore(build): update CMake minimum version and clean up .gitignore
Apr 28, 2026
447610c
🔧 chore(build): enforce 32-bit build configuration
May 11, 2026
9a466a0
🔧 chore: add /build to .gitignore
May 11, 2026
44a86e0
feat(sv56): enhance speech voltmeter for arbitrary bit depths
May 21, 2026
827065c
🧪 test(sv56): update test configurations and working directories
May 26, 2026
dafb0df
🛠️ chore(cmake): update minimum version and remove 32-bit enforcement
May 28, 2026
9782426
feat(sv56): add variable bit-width support and new utilities
May 28, 2026
0c7aa82
🔥 chore(sv56): remove test data files from .gitignore
May 29, 2026
353558a
feat(sv56): add reference and source files for 16, 24, and 32 bit audio
May 29, 2026
a454ec9
doc(sv56): update manual for arbitrary bit depth support
May 27, 2026
3351935
feat(sv56): re-integrate WAV I/O with multi-bit-depth support
Jun 4, 2026
1dd843d
fix(test): use CRLF-tolerant text comparison for log verify tests
Jun 4, 2026
cb2301d
doc(sv56): update README with WAV support and bit-depth comparison
Jun 4, 2026
c1bfec2
test(sv56): add 12-bit test cases for bitno backward compatibility
Jun 5, 2026
4a1c219
refactor(sv56): remove sv56-util.h, use wav_io.h in actlevel.c
Jun 8, 2026
3352078
Merge pull request #1 from ludomal/sv56_wavio
jr2804 Jun 9, 2026
f5e49bd
✨ feat(wav_io): add support for 8/16/24/32-bit PCM and 32-bit float WAV
Jun 9, 2026
1e1056a
🐛 fix(wav_io): correct bps check for 8-bit samples (not 1-bit!)
Jun 9, 2026
545c35e
feat(sv56): update actlev for 24/32-bit WAV support
Jun 9, 2026
b2cbeca
Merge pull request #2 from ludomal/sv56_wavio
jr2804 Jun 9, 2026
9b48cb6
Merge branch 'feature/p56-bitdepth-extension' of https://github.com/j…
Jun 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Comment thread
jr2804 marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ sweep.[aru]*
*.tst
!*voic*.tst

src/**/test_data/

src/g722/test_data/inpsp.bs
src/g722/test_data/outsp.e-d
src/g727/test_data/rn54_a.out
Expand Down Expand Up @@ -97,3 +99,8 @@ bld/
.vs/
*vcxproj*
*.sln*
.vscode/
/build
*.double
stderr.txt
stdout.txt
6 changes: 4 additions & 2 deletions CMakeLists.txt
Comment thread
jr2804 marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
cmake_minimum_required(VERSION 3.1)
cmake_minimum_required(VERSION 3.10)

project(STL2023 VERSION 2023)
add_definitions( -DVERSION_STL="${CMAKE_PROJECT_NAME}" )

add_definitions( -DVERSION_STL="${CMAKE_PROJECT_NAME}" -DCMAKE_POLICY_VERSION_MINIMUM=3.5 )

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

Expand Down
79 changes: 52 additions & 27 deletions doc/manual/sv56.tex
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,11 @@ \section{Implementation}
{\em n}.}:
\begin{quote} \normalsize
{\em f} \hfill \parbox{100mm}{\SF Sampling frequency, in Hz }\\
{\em a[15]} \hfill \parbox{100mm}{\SF Activity count }\\
{\em c[15]} \hfill \parbox{100mm}{\SF Threshold level }\\
{\em hang[15]} \hfill \parbox{100mm}{\SF Hangover count }\\
{\em a[SVP56\_MAX\_THRESHOLDS]} \hfill \parbox{100mm}{\SF Activity count }\\
{\em c[SVP56\_MAX\_THRESHOLDS]} \hfill \parbox{100mm}{\SF Threshold level }\\
{\em hang[SVP56\_MAX\_THRESHOLDS]} \hfill \parbox{100mm}{\SF Hangover count }\\
{\em thres\_no} \hfill \parbox{100mm}{\SF Actual number of thresholds in use (bitno$-$1) }\\
{\em bitno} \hfill \parbox{100mm}{\SF Bit depth of input signal (default 16) }\\
{\em n} \hfill \parbox{100mm}{\SF Number of samples read since last
reset }\\
{\em s} \hfill \parbox{100mm}{\SF Sum of all samples since last
Expand Down Expand Up @@ -270,7 +272,7 @@ \subsection{{\tt init\_speech\_voltmeter}}
\#include "sv-p56.h"\\
void init\_speech\_voltmeter
\pbox{110mm}{
(SVP56\_state {\em *state}, double {\em f});
(SVP56\_state {\em *state}, double {\em f}, int {\em bitno});
}
}

Expand All @@ -280,13 +282,20 @@ \subsection{{\tt init\_speech\_voltmeter}}

{\tt init\_speech\_voltmeter} performs the initialization of the
speech voltmeter state variables in the structure pointed by {\em
state} to the appropriate initial values. The only value required from
the user is the sampling rate $f$ (in Hz) of the signal that the speech
voltmeter is supposed to measure. Note that when measuring new
state} to the appropriate initial values. The values required from
the user are the sampling rate $f$ (in Hz) of the signal that the speech
voltmeter is supposed to measure, and the bit depth {\em bitno} of the
input signal. Note that when measuring new
speech material, the state variable shall be re-initialized, otherwise
accumulation of previous measurements will happen and wrong measurements
will be reported.

The header file {\tt sv-p56.h} defines two constants that control the
maximum bit depth supported:
\begin{quote} \normalsize
{\tt SVP56\_MAX\_NO\_BITS} \hfill \parbox{100mm}{\SF Maximum bit depth supported (default: 32). }\\
{\tt SVP56\_MAX\_THRESHOLDS} \hfill \parbox{100mm}{\SF Maximum number of thresholds ($=$ SVP56\_MAX\_NO\_BITS $-$ 1). }
\end{quote}


{\bf Variables: }
Expand All @@ -298,6 +307,14 @@ \subsection{{\tt init\_speech\_voltmeter}}
Is the sampling rate (in Hz) of the signal to be measured in
the next calls of {\tt speech\_voltmeter}. If zero or
negative, the sampling rate is initialized to 16000 Hz.

\item[\pbox{20mm}{\em bitno}] %\rulex{1mm}\\
Is the bit depth (resolution) of the input signal. Typical
values are 16 (default for narrowband speech) and 24 or 32
for higher-resolution signals. This parameter sets the number
of thresholds used by the algorithm to {\em bitno $-$ 1}, and
initializes the peak detectors {\em maxP} and {\em maxN} to
$\pm 2^{bitno-1}$. Must not exceed {\tt SVP56\_MAX\_NO\_BITS}.
\end{Descr}

{\bf Return value: }
Expand Down Expand Up @@ -435,25 +452,31 @@ \section{Portability and compliance}
input file, which is saved in an aoutput file. Levels are reported in
dBov.

In general, input files are in integer representation, 16-bit words,
2's complement (i.e., {\tt short} data). In UGST convention, this
data must be left-adjusted, {\em rather} than right-adjusted. Since
the speech voltmeter uses {\tt float} input data, it is necessary to
convert from {\tt short} (in the mentioned format) to {\tt float};
this is carried out by the function {\tt sh2fl()}. In addition, the
option to `normalize' the input data to the range -1..+1 is selected.
The example programs support variable bit-width input files from 8 to
32 bits per sample (controlled by the {\em bitno} parameter). Supported
storage widths are 8-bit (1 byte/sample), 16-bit (2 bytes/sample),
24-bit (3 bytes/sample), and 32-bit (4 bytes/sample). For bit depths
between 12 and 15, samples are stored in 2 bytes with the least
significant bits masked to zero. Files use native byte order.

Since the speech voltmeter uses {\tt float} input data normalized to
the range --1.0..+1.0, the helper functions in {\tt sv56-util.h} handle
the conversion between raw integer samples and normalized floats:
{\tt sv56\_raw2fl()} reads raw bytes and normalizes by dividing by
$2^{bitno-1}$, and {\tt sv56\_fl2raw()} performs the inverse operation
with truncation and hard-clipping.

After the equalization factor is found, results are reported on the
screen, which varies according to the program used and some of the
command-line options.
command-line options.

While program {\tt actlevel.c} stops at this point, program {\tt
sv56demo.c} proceeds calling the function {\tt scale()} to carry out
the (amplitude) equalization using single (rather than double) float
precision. After equalization, the samples are converted back to
integer (short, right-justified) with the routine {\tt fl2sh()} using
truncation, no zero-padding of the least significant bits,
left-justification of data, and hard-clipping of data above the
overload point. After that, data is saved to the user-specified file .
raw integer format with {\tt sv56\_fl2raw()} using
truncation and hard-clipping of data above the
overload point. After that, data is saved to the user-specified file.


%-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
Expand All @@ -471,7 +494,7 @@ \section{Portability and compliance}
#include <math.h>
#include "ugstdemo.h" /* ... UGST demonstration program defs ... */
#include "sv-p56.h" /* ... SV-P56 prototypes & defs ... */
#include "ugst-utl.h" /* ... UGST utilities ... */
#include "sv56-util.h" /* ... SV56 I/O utilities ... */
#define BLK_LEN 256

void main(argc, argv)
Expand All @@ -482,7 +505,8 @@ \section{Portability and compliance}
char FileIn[180]; /* input file name */
FILE *Fi; /* input file pointers */
long N=BLK_LEN, l;
short bitno, buffer[BLK_LEN];
int bitno, bps;
unsigned char raw_buf[BLK_LEN * 4];
float Buf[BLK_LEN];
double ActiveLeveldB, sf, satur;

Expand All @@ -493,20 +517,21 @@ \section{Portability and compliance}
FIND_PAR_D(2, "_Sampling Frequency: .. ", sf, 16000);
FIND_PAR_L(3, "_A/D resolution: ...... ", bitno, 16);

/* Calculate overload point in the non-normalized range */
/* Calculate bytes per sample and overload point */
bps = sv56_bytes_per_sample(bitno);
satur = pow ((double)2.0, (double)(bitno - 1));

/* Reset- variables for speech level measurements */
init_speech_voltmeter(&state, sf);
init_speech_voltmeter(&state, sf, bitno);

/* Opening input file */
Fi = fopen(FileIn, RB);

/* Read samples ... */
while ((l = fread(buffer, N, sizeof(short), Fi)) > 0)
while ((l = fread(raw_buf, bps, N, Fi)) > 0)
{
/* ... Convert samples to float, normalizing to +1..-1 */
sh2fl((long) l, buffer, Buf, (long) state.bitno, 1);
/* ... Convert raw samples to float, normalizing to +1..-1 */
sv56_raw2fl((long) l, raw_buf, Buf, bitno);

/* ... Get the active level */
ActiveLeveldB = speech_voltmeter(Buf, (long) l, &state);
Expand All @@ -524,7 +549,7 @@ \section{Portability and compliance}
printf("\n Maximum negative value: .. %7.0f [PCM]",
SVP56_get_neg_max(state) * satur);
printf("\n Long-term energy (rms): .. %7.3f [dBov]",
SVP56_get_rms_dB(state);
SVP56_get_rms_dB(state));
printf("\n Active speech level: ..... %7.3f [dBov]", ActiveLeveldB);
printf("\n Activity factor: ......... %7.3f [%%]",
SVP56_get_activity(state));
Expand Down
34 changes: 32 additions & 2 deletions src/sv56/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,40 @@ add_executable(actlev actlevel.c sv-p56.c ../utl/ugst-utl.c)
target_link_libraries(actlev ${M_LIBRARY})

add_test(sv56demo1 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/sv56demo -q test_data/voice.src test_data/voice.prc 256 1 0 -30)
add_test(sv56demo1-verify ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/cf -q test_data/voice.nrm test_data/voice.prc)
set_tests_properties(sv56demo1 PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56demo1-verify ${CMAKE_COMMAND} -E compare_files test_data/voice.nrm test_data/voice.prc)
set_tests_properties(sv56demo1-verify PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56demo2 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/sv56demo -q -rms test_data/voice.src test_data/voice.rms 256 1 0 -30)
add_test(sv56demo2-verify ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/cf -q test_data/voice.ltl test_data/voice.rms)
set_tests_properties(sv56demo2 PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56demo2-verify ${CMAKE_COMMAND} -E compare_files test_data/voice.ltl test_data/voice.rms)
set_tests_properties(sv56demo2-verify PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56demo3 ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/actlev -q test_data/voice.src test_data/voice.nrm test_data/voice.prc test_data/voice.ltl test_data/voice.rms)
set_tests_properties(sv56demo3 PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_executable(sv56test sv56test.c sv-p56.c)
target_link_libraries(sv56test ${M_LIBRARY})

add_executable(sv56-convert sv56-convert.c)
target_link_libraries(sv56-convert ${M_LIBRARY})

add_test(sv56test-16bit ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/sv56test 16 test_data/voice.src test_data/voice_16bit.out)
set_tests_properties(sv56test-16bit PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56test-16bit-verify ${CMAKE_COMMAND} -E compare_files test_data/voice_16bit.ref test_data/voice_16bit.out)
set_tests_properties(sv56test-16bit-verify PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56test-24bit ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/sv56test 24 test_data/voice_24.src test_data/voice_24bit.out)
set_tests_properties(sv56test-24bit PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56test-24bit-verify ${CMAKE_COMMAND} -E compare_files test_data/voice_24bit.ref test_data/voice_24bit.out)
set_tests_properties(sv56test-24bit-verify PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56test-32bit ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/sv56test 32 test_data/voice_32.src test_data/voice_32bit.out)
set_tests_properties(sv56test-32bit PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_test(sv56test-32bit-verify ${CMAKE_COMMAND} -E compare_files test_data/voice_32bit.ref test_data/voice_32bit.out)
set_tests_properties(sv56test-32bit-verify PROPERTIES WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
23 changes: 16 additions & 7 deletions src/sv56/actlevel.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@

/* ... Include of utilities ... */
#include "ugst-utl.h"
#include "sv56-util.h"

/* ... Local definitions ... */
#define DEF_BLK_LEN 256 /* samples per block */
Expand Down Expand Up @@ -430,8 +431,9 @@ int main (int argc, char *argv[]) {
#endif

/* Other variables */
short buffer[4096];
unsigned char raw_buf[4096 * 4];
float Buf[4096];
int bps;
long start_byte, bitno = 16;
double sf = 16000; /* Hz */
double ActiveLeveldB, level = 0, gain = 0;
Expand Down Expand Up @@ -538,9 +540,16 @@ int main (int argc, char *argv[]) {


/* ......... SOME INITIALIZATIONS ......... */
/* Validate bitno */
if (bitno < 8 || bitno > SVP56_MAX_NO_BITS) {
fprintf (stderr, "Error: bitno must be between 8 and %d\n", SVP56_MAX_NO_BITS);
exit (1);
}
bps = sv56_bytes_per_sample ((int) bitno);

/* funny_size = strlen(funny); */
start_byte = --N1;
start_byte *= N * sizeof (short);
start_byte *= N * bps;
N2_ori = N2;

/* Overflow (saturation) point */
Expand All @@ -555,13 +564,13 @@ int main (int argc, char *argv[]) {
argc--;

/* Reset variables for speech level measurements */
init_speech_voltmeter (&state, sf);
init_speech_voltmeter (&state, sf, (int)bitno);

/* ......... FILE PREPARATION ......... */

/* Opening input file; abort if there's any problem */
#ifdef VMS
sprintf (mrs, "mrs=%d", 2 * N);
sprintf (mrs, "mrs=%d", bps * N);
#endif
if ((Fi = fopen (FileIn, RB)) == NULL)
KILL (FileIn, 2);
Expand All @@ -573,7 +582,7 @@ int main (int argc, char *argv[]) {
if (N2 == 0) {
struct stat st;
stat (FileIn, &st);
N2 = ceil (st.st_size / (double) (N * sizeof (short)));
N2 = ceil (st.st_size / (double) (N * bps));
}

/* Move pointer to 1st block of interest */
Expand All @@ -587,9 +596,9 @@ int main (int argc, char *argv[]) {
if (!quiet)
fprintf (stderr, " Processing \r");
for (i = 0; i < N2; i++) {
if ((l = fread (buffer, sizeof (short), N, Fi)) > 0) {
if ((l = fread (raw_buf, bps, N, Fi)) > 0) {
/* ... Convert samples to float */
sh2fl ((long) l, buffer, Buf, bitno, 1);
sv56_raw2fl ((long) l, raw_buf, Buf, (int) bitno);

/* ... Get the active level */
ActiveLeveldB = speech_voltmeter (Buf, (long) l, &state);
Expand Down
22 changes: 12 additions & 10 deletions src/sv56/sv-p56.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,21 +223,23 @@ double bin_interp (double upcount, double lwcount, double upthr, double lwthr, d
#define M 15.9 /* in [dB] */
#define THRES_NO 15 /* number of thresholds in the speech voltmeter */

void init_speech_voltmeter (SVP56_state * state, double sampl_freq) {
void init_speech_voltmeter (SVP56_state * state, double sampl_freq, int bitno) {
double x;
long I, j;


/* First initializations */
state->f = sampl_freq;
state->bitno = bitno;
state->thres_no = bitno - 1;
I = floor (H * state->f + 0.5);

/* Inicialization of threshold vector */
for (x = 0.5, j = 1; j <= THRES_NO; j++, x /= 2.0)
state->c[THRES_NO - j] = x;
/* Inicialization of threshold vector: geometric progression from 0.5 down to 2^-(bitno-1) */
for (x = 0.5, j = 1; j <= state->thres_no; j++, x /= 2.0)
state->c[state->thres_no - j] = x;

/* Inicialization of activity and hangover count vectors */
for (j = 0; j < THRES_NO; j++) {
for (j = 0; j < state->thres_no; j++) {
state->a[j] = 0;
state->hang[j] = I;
}
Expand All @@ -247,8 +249,8 @@ void init_speech_voltmeter (SVP56_state * state, double sampl_freq) {

/* Inicialization of other quantities referring to state variables */
state->max = 0;
state->maxP = -32768.;
state->maxN = 32767.;
state->maxP = -pow(2.0, (double)(bitno - 1));
state->maxN = pow(2.0, (double)(bitno - 1)) - 1.0;

/* Defining the 0 dB reference level in terms of normalized values */
state->refdB = 0 /* dBov */ ;
Expand Down Expand Up @@ -350,7 +352,7 @@ double speech_voltmeter (float *buffer, long smpno, SVP56_state * state) {
int I, j;
long k;
double g, x, AdB, CdB, AmdB, CmdB, ActiveSpeechLevel;
double LongTermLevel, Delta[15];
double LongTermLevel, Delta[SVP56_MAX_THRESHOLDS];


/* Some initializations */
Expand Down Expand Up @@ -380,7 +382,7 @@ double speech_voltmeter (float *buffer, long smpno, SVP56_state * state) {
state->q = g * (state->q) + (1 - g) * (state->p);

/* Applies threshold to the envelope q */
for (j = 0; j < THRES_NO; j++) {
for (j = 0; j < state->thres_no; j++) {
if ((state->q) >= state->c[j]) {
state->a[j]++;
state->hang[j] = 0;
Expand Down Expand Up @@ -412,7 +414,7 @@ double speech_voltmeter (float *buffer, long smpno, SVP56_state * state) {
return (ActiveSpeechLevel);

/* Proceed serially for steps 2 and up -- this is the most common case */
for (j = 1; j < THRES_NO; j++) {
for (j = 1; j < state->thres_no; j++) {
if (state->a[j] != 0) {
AdB = 10 * log10 (((state->sq) / state->a[j]) + MIN_LOG_OFFSET);
CdB = 20 * log10 (((double) state->c[j]) + MIN_LOG_OFFSET);
Expand Down
Loading