Skip to content

Commit 6f8c964

Browse files
authored
gh-149342: _remote_debugging: Fix binary profile corruption when sampling a (temporarily) empty stack (#149343)
1 parent 9dca1ff commit 6f8c964

4 files changed

Lines changed: 181 additions & 20 deletions

File tree

Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py

Lines changed: 133 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,11 @@ def tearDown(self):
148148

149149
def create_binary_file(self, samples, interval=1000, compression="none"):
150150
"""Create a test binary file and track it for cleanup."""
151+
filename, _ = self.write_binary_file(samples, interval, compression)
152+
return filename
153+
154+
def write_binary_file(self, samples, interval=1000, compression="none"):
155+
"""Like create_binary_file but also returns the writer collector."""
151156
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
152157
filename = f.name
153158
self.temp_files.append(filename)
@@ -158,7 +163,7 @@ def create_binary_file(self, samples, interval=1000, compression="none"):
158163
for sample in samples:
159164
collector.collect(sample)
160165
collector.export(None)
161-
return filename
166+
return filename, collector
162167

163168
def roundtrip(self, samples, interval=1000, compression="none"):
164169
"""Write samples to binary and read back."""
@@ -805,6 +810,133 @@ def test_invalid_file_path(self):
805810
with BinaryReader("/nonexistent/path/file.bin") as reader:
806811
reader.replay_samples(RawCollector())
807812

813+
def test_writer_handles_empty_stack_first_sample(self):
814+
"""BinaryWriter.write_sample tolerates an empty stack on a fresh thread.
815+
816+
Regression test for the C-level RLE bug in process_thread_sample: a
817+
freshly-created ThreadEntry has prev_stack_depth == 0, so an empty
818+
curr_stack compares as STACK_REPEAT against the zero-initialized
819+
previous stack. Before the fix, this fell through the
820+
`&& !is_new_thread` guard into write_sample_with_encoding, which had
821+
no handler for STACK_REPEAT and raised
822+
RuntimeError("Invalid stack encoding type"). Goes through
823+
BinaryWriter.write_sample directly so the test cannot be masked by
824+
any Python-level filtering.
825+
"""
826+
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
827+
filename = f.name
828+
self.temp_files.append(filename)
829+
830+
writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
831+
empty_sample = [
832+
make_interpreter(
833+
0, [make_thread(99, [], status=THREAD_STATUS_UNKNOWN)]
834+
)
835+
]
836+
# First sample for a fresh thread has empty frame_info — the exact
837+
# scenario that exposes the bug.
838+
writer.write_sample(empty_sample, 1000)
839+
writer.write_sample(empty_sample, 2000)
840+
# Mix in a real sample to exercise the transition out of the
841+
# empty-stack RLE buffer.
842+
real_sample = [
843+
make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
844+
]
845+
writer.write_sample(real_sample, 3000)
846+
writer.finalize()
847+
848+
reader_collector = RawCollector()
849+
with BinaryReader(filename) as reader:
850+
count = reader.replay_samples(reader_collector)
851+
# Empty-stack samples are recorded as STACK_REPEAT records with
852+
# depth-0 stacks; the file must replay all three samples.
853+
self.assertEqual(count, 3)
854+
855+
def test_writer_handles_mixed_empty_and_real_first_sample(self):
856+
"""First sample with one empty + one real thread roundtrips through C."""
857+
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
858+
filename = f.name
859+
self.temp_files.append(filename)
860+
861+
writer = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
862+
sample = [
863+
make_interpreter(
864+
0,
865+
[
866+
make_thread(1, [make_frame("a.py", 1, "f")]),
867+
make_thread(99, [], status=THREAD_STATUS_UNKNOWN),
868+
],
869+
)
870+
]
871+
# Two samples so RLE state is exercised.
872+
writer.write_sample(sample, 1000)
873+
writer.write_sample(sample, 2000)
874+
writer.finalize()
875+
876+
# Replay must succeed without raising RuntimeError, and the real
877+
# thread's frames must round-trip.
878+
reader_collector = RawCollector()
879+
with BinaryReader(filename) as reader:
880+
reader.replay_samples(reader_collector)
881+
self.assertIn((0, 1), reader_collector.by_thread)
882+
self.assertEqual(len(reader_collector.by_thread[(0, 1)]), 2)
883+
884+
def test_writer_total_samples_after_finalize_matches_reader(self):
885+
"""BinaryWriter.total_samples after finalize() matches the reader's count."""
886+
# Five IDENTICAL samples force every sample beyond the first into the
887+
# per-thread RLE buffer. Regression for the cached_total_samples
888+
# ordering bug: capturing the cache BEFORE binary_writer_finalize()
889+
# missed the buffered samples that flush_pending_rle() counts. Keep
890+
# the samples identical to preserve coverage. See gh-149342.
891+
samples = [
892+
[make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])]
893+
] * 5
894+
filename, writer_collector = self.write_binary_file(samples)
895+
reader_collector = RawCollector()
896+
with BinaryReader(filename) as reader:
897+
replayed = reader.replay_samples(reader_collector)
898+
self.assertEqual(writer_collector.total_samples, len(samples))
899+
self.assertEqual(writer_collector.total_samples, replayed)
900+
901+
def test_writer_total_samples_after_context_manager_matches_reader(self):
902+
"""total_samples after `with BinaryWriter(...)` matches the reader's count.
903+
904+
Regression for the asymmetry between finalize() and __exit__ in
905+
module.c: __exit__ also calls binary_writer_finalize and must
906+
preserve cached_total_samples like finalize() does, otherwise the
907+
getter returns 0 once self->writer is NULL.
908+
"""
909+
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
910+
filename = f.name
911+
self.temp_files.append(filename)
912+
913+
sample = [
914+
make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
915+
]
916+
with _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0) as w:
917+
for i in range(5):
918+
w.write_sample(sample, i * 1000)
919+
self.assertEqual(w.total_samples, 5)
920+
921+
reader_collector = RawCollector()
922+
with BinaryReader(filename) as reader:
923+
self.assertEqual(reader.replay_samples(reader_collector), 5)
924+
925+
def test_writer_total_samples_after_close_returns_zero(self):
926+
"""close() discards data; total_samples reflects no cached count."""
927+
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
928+
filename = f.name
929+
self.temp_files.append(filename)
930+
931+
w = _remote_debugging.BinaryWriter(filename, 1000, 0, compression=0)
932+
sample = [
933+
make_interpreter(0, [make_thread(1, [make_frame("a.py", 1, "f")])])
934+
]
935+
for i in range(5):
936+
w.write_sample(sample, i * 1000)
937+
w.close()
938+
self.assertEqual(w.total_samples, 0)
939+
808940

809941
class TestBinaryEncodings(BinaryFormatTestBase):
810942
"""Tests specifically targeting different stack encodings."""
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Fix :mod:`!_remote_debugging` binary writing so that sampling a thread
2+
whose Python frame stack is empty (for example while it is in a C call or
3+
mid-syscall) no longer raises ``RuntimeError("Invalid stack encoding
4+
type")``, and so that ``BinaryWriter.total_samples`` after :meth:`!finalize`
5+
or context-manager exit includes samples flushed from the RLE buffer.
6+
Patch by Maurycy Pawłowski-Wieroński.

Modules/_remote_debugging/binary_io_writer.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ writer_get_or_create_thread_entry(BinaryWriter *writer, uint64_t thread_id,
484484
entry->prev_stack_capacity = MAX_STACK_DEPTH;
485485
entry->pending_rle_capacity = INITIAL_RLE_CAPACITY;
486486

487-
entry->prev_stack = PyMem_Malloc(entry->prev_stack_capacity * sizeof(uint32_t));
487+
entry->prev_stack = PyMem_Calloc(entry->prev_stack_capacity, sizeof(uint32_t));
488488
if (!entry->prev_stack) {
489489
PyErr_NoMemory();
490490
return NULL;
@@ -938,9 +938,8 @@ process_thread_sample(BinaryWriter *writer, PyObject *thread_info,
938938
}
939939
uint8_t status = (uint8_t)status_long;
940940

941-
int is_new_thread = 0;
942941
ThreadEntry *entry = writer_get_or_create_thread_entry(
943-
writer, thread_id, interpreter_id, &is_new_thread);
942+
writer, thread_id, interpreter_id, NULL);
944943
if (!entry) {
945944
return -1;
946945
}
@@ -963,8 +962,15 @@ process_thread_sample(BinaryWriter *writer, PyObject *thread_info,
963962
curr_stack, curr_depth,
964963
&shared_count, &pop_count, &push_count);
965964

966-
if (encoding == STACK_REPEAT && !is_new_thread) {
967-
/* Buffer this sample for RLE */
965+
if (encoding == STACK_REPEAT) {
966+
/* Buffer this sample for RLE.
967+
*
968+
* STACK_REPEAT also covers the "first sample for a fresh thread,
969+
* empty stack" case: a new ThreadEntry has prev_stack_depth == 0
970+
* and a zero-initialized prev_stack, so compare_stacks() returns
971+
* STACK_REPEAT against an empty curr_stack (depth 0). Buffering
972+
* it here is correct; the RLE flush path emits it as a normal
973+
* STACK_REPEAT record. */
968974
if (GROW_ARRAY(entry->pending_rle, entry->pending_rle_count,
969975
entry->pending_rle_capacity, PendingRLESample) < 0) {
970976
return -1;

Modules/_remote_debugging/module.c

Lines changed: 31 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,6 +1544,24 @@ _remote_debugging_BinaryWriter_write_sample_impl(BinaryWriterObject *self,
15441544
Py_RETURN_NONE;
15451545
}
15461546

1547+
/* Finalize the writer, cache total_samples, and destroy it.
1548+
*
1549+
* The cache assignment must happen AFTER binary_writer_finalize(): finalize
1550+
* flushes pending RLE samples via flush_pending_rle(), which increments
1551+
* writer->total_samples for each one. Caching before finalize would lose
1552+
* those trailing samples. */
1553+
static int
1554+
binary_writer_finalize_and_cache(BinaryWriterObject *self)
1555+
{
1556+
if (binary_writer_finalize(self->writer) < 0) {
1557+
return -1;
1558+
}
1559+
self->cached_total_samples = self->writer->total_samples;
1560+
binary_writer_destroy(self->writer);
1561+
self->writer = NULL;
1562+
return 0;
1563+
}
1564+
15471565
/*[clinic input]
15481566
_remote_debugging.BinaryWriter.finalize
15491567
@@ -1561,16 +1579,10 @@ _remote_debugging_BinaryWriter_finalize_impl(BinaryWriterObject *self)
15611579
return NULL;
15621580
}
15631581

1564-
/* Save total_samples before finalizing */
1565-
self->cached_total_samples = self->writer->total_samples;
1566-
1567-
if (binary_writer_finalize(self->writer) < 0) {
1582+
if (binary_writer_finalize_and_cache(self) < 0) {
15681583
return NULL;
15691584
}
15701585

1571-
binary_writer_destroy(self->writer);
1572-
self->writer = NULL;
1573-
15741586
Py_RETURN_NONE;
15751587
}
15761588

@@ -1624,14 +1636,18 @@ _remote_debugging_BinaryWriter___exit___impl(BinaryWriterObject *self,
16241636
if (self->writer) {
16251637
/* Only finalize on normal exit (no exception) */
16261638
if (exc_type == Py_None) {
1627-
if (binary_writer_finalize(self->writer) < 0) {
1628-
binary_writer_destroy(self->writer);
1629-
self->writer = NULL;
1639+
if (binary_writer_finalize_and_cache(self) < 0) {
1640+
if (self->writer) {
1641+
binary_writer_destroy(self->writer);
1642+
self->writer = NULL;
1643+
}
16301644
return NULL;
16311645
}
16321646
}
1633-
binary_writer_destroy(self->writer);
1634-
self->writer = NULL;
1647+
else {
1648+
binary_writer_destroy(self->writer);
1649+
self->writer = NULL;
1650+
}
16351651
}
16361652
Py_RETURN_FALSE;
16371653
}
@@ -1658,8 +1674,9 @@ _remote_debugging_BinaryWriter_get_stats_impl(BinaryWriterObject *self)
16581674
}
16591675

16601676
static PyObject *
1661-
BinaryWriter_get_total_samples(BinaryWriterObject *self, void *closure)
1677+
BinaryWriter_get_total_samples(PyObject *op, void *closure)
16621678
{
1679+
BinaryWriterObject *self = BinaryWriter_CAST(op);
16631680
if (!self->writer) {
16641681
/* Use cached value after finalize/close */
16651682
return PyLong_FromUnsignedLong(self->cached_total_samples);
@@ -1668,7 +1685,7 @@ BinaryWriter_get_total_samples(BinaryWriterObject *self, void *closure)
16681685
}
16691686

16701687
static PyGetSetDef BinaryWriter_getset[] = {
1671-
{"total_samples", (getter)BinaryWriter_get_total_samples, NULL, "Total samples written", NULL},
1688+
{"total_samples", BinaryWriter_get_total_samples, NULL, "Total samples written", NULL},
16721689
{NULL}
16731690
};
16741691

0 commit comments

Comments
 (0)