Skip to content

Commit 04ce318

Browse files
authored
gh-146256: Add --jsonl collector to the profiling.sampling (#146257)
1 parent f025dba commit 04ce318

12 files changed

Lines changed: 1037 additions & 14 deletions

File tree

Lib/profiling/sampling/__init__.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,15 @@
99
from .stack_collector import CollapsedStackCollector
1010
from .heatmap_collector import HeatmapCollector
1111
from .gecko_collector import GeckoCollector
12+
from .jsonl_collector import JsonlCollector
1213
from .string_table import StringTable
1314

14-
__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "HeatmapCollector", "GeckoCollector", "StringTable")
15+
__all__ = (
16+
"Collector",
17+
"PstatsCollector",
18+
"CollapsedStackCollector",
19+
"HeatmapCollector",
20+
"GeckoCollector",
21+
"JsonlCollector",
22+
"StringTable",
23+
)

Lib/profiling/sampling/binary_reader.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from .gecko_collector import GeckoCollector
66
from .stack_collector import FlamegraphCollector, CollapsedStackCollector
7+
from .jsonl_collector import JsonlCollector
78
from .pstats_collector import PstatsCollector
89

910

@@ -117,6 +118,8 @@ def convert_binary_to_format(input_file, output_file, output_format,
117118
collector = PstatsCollector(interval)
118119
elif output_format == 'gecko':
119120
collector = GeckoCollector(interval)
121+
elif output_format == "jsonl":
122+
collector = JsonlCollector(interval)
120123
else:
121124
raise ValueError(f"Unknown output format: {output_format}")
122125

Lib/profiling/sampling/cli.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from .stack_collector import CollapsedStackCollector, FlamegraphCollector, DiffFlamegraphCollector
2121
from .heatmap_collector import HeatmapCollector
2222
from .gecko_collector import GeckoCollector
23+
from .jsonl_collector import JsonlCollector
2324
from .binary_collector import BinaryCollector
2425
from .binary_reader import BinaryReader
2526
from .constants import (
@@ -101,6 +102,7 @@ def __call__(self, parser, namespace, values, option_string=None):
101102
"diff_flamegraph": "html",
102103
"gecko": "json",
103104
"heatmap": "html",
105+
"jsonl": "jsonl",
104106
"binary": "bin",
105107
}
106108

@@ -111,6 +113,7 @@ def __call__(self, parser, namespace, values, option_string=None):
111113
"diff_flamegraph": DiffFlamegraphCollector,
112114
"gecko": GeckoCollector,
113115
"heatmap": HeatmapCollector,
116+
"jsonl": JsonlCollector,
114117
"binary": BinaryCollector,
115118
}
116119

@@ -488,6 +491,13 @@ def _add_format_options(parser, include_compression=True, include_binary=True):
488491
action=DiffFlamegraphAction,
489492
help="Generate differential flamegraph comparing current profile to `BASELINE` binary file",
490493
)
494+
format_group.add_argument(
495+
"--jsonl",
496+
action="store_const",
497+
const="jsonl",
498+
dest="format",
499+
help="Generate newline-delimited JSON (JSONL) for programmatic consumers",
500+
)
491501
if include_binary:
492502
format_group.add_argument(
493503
"--binary",
@@ -611,15 +621,18 @@ def _sort_to_mode(sort_choice):
611621
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
612622

613623
def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=False,
614-
output_file=None, compression='auto', diff_baseline=None):
624+
mode=None, output_file=None, compression='auto',
625+
diff_baseline=None):
615626
"""Create the appropriate collector based on format type.
616627
617628
Args:
618-
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary', 'diff_flamegraph')
629+
format_type: The output format ('pstats', 'collapsed', 'flamegraph',
630+
'gecko', 'heatmap', 'jsonl', 'binary', 'diff_flamegraph')
619631
sample_interval_usec: Sampling interval in microseconds
620632
skip_idle: Whether to skip idle samples
621633
opcodes: Whether to collect opcode information (only used by gecko format
622634
for creating interval markers in Firefox Profiler)
635+
mode: Profiling mode for collectors that expose it in metadata
623636
output_file: Output file path (required for binary format)
624637
compression: Compression type for binary format ('auto', 'zstd', 'none')
625638
diff_baseline: Path to baseline binary file for differential flamegraph
@@ -655,6 +668,11 @@ def _create_collector(format_type, sample_interval_usec, skip_idle, opcodes=Fals
655668
skip_idle = False
656669
return collector_class(sample_interval_usec, skip_idle=skip_idle, opcodes=opcodes)
657670

671+
if format_type == "jsonl":
672+
return collector_class(
673+
sample_interval_usec, skip_idle=skip_idle, mode=mode
674+
)
675+
658676
return collector_class(sample_interval_usec, skip_idle=skip_idle)
659677

660678

@@ -1142,7 +1160,7 @@ def _handle_attach(args):
11421160

11431161
# Create the appropriate collector
11441162
collector = _create_collector(
1145-
args.format, args.sample_interval_usec, skip_idle, args.opcodes,
1163+
args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
11461164
output_file=output_file,
11471165
compression=getattr(args, 'compression', 'auto'),
11481166
diff_baseline=args.diff_baseline
@@ -1249,7 +1267,7 @@ def _handle_run(args):
12491267

12501268
# Create the appropriate collector
12511269
collector = _create_collector(
1252-
args.format, args.sample_interval_usec, skip_idle, args.opcodes,
1270+
args.format, args.sample_interval_usec, skip_idle, args.opcodes, mode,
12531271
output_file=output_file,
12541272
compression=getattr(args, 'compression', 'auto'),
12551273
diff_baseline=args.diff_baseline

Lib/profiling/sampling/collector.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,33 @@ def normalize_location(location):
2020
"""Normalize location to a 4-tuple format.
2121
2222
Args:
23-
location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
23+
location: tuple (lineno, end_lineno, col_offset, end_col_offset),
24+
an integer line number, or None
2425
2526
Returns:
2627
tuple: (lineno, end_lineno, col_offset, end_col_offset)
2728
"""
2829
if location is None:
2930
return DEFAULT_LOCATION
31+
if isinstance(location, int):
32+
return (location, location, -1, -1)
3033
return location
3134

3235

3336
def extract_lineno(location):
3437
"""Extract lineno from location.
3538
3639
Args:
37-
location: tuple (lineno, end_lineno, col_offset, end_col_offset) or None
40+
location: tuple (lineno, end_lineno, col_offset, end_col_offset),
41+
an integer line number, or None
3842
3943
Returns:
4044
int: The line number (0 for synthetic frames)
4145
"""
4246
if location is None:
4347
return 0
48+
if isinstance(location, int):
49+
return location
4450
return location[0]
4551

4652
def _is_internal_frame(frame):

Lib/profiling/sampling/constants.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,14 @@
1111
PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks
1212
PROFILING_MODE_EXCEPTION = 4 # Only samples when thread has an active exception
1313

14+
PROFILING_MODE_NAMES = {
15+
PROFILING_MODE_WALL: "wall",
16+
PROFILING_MODE_CPU: "cpu",
17+
PROFILING_MODE_GIL: "gil",
18+
PROFILING_MODE_ALL: "all",
19+
PROFILING_MODE_EXCEPTION: "exception",
20+
}
21+
1422
# Sort mode constants
1523
SORT_MODE_NSAMPLES = 0
1624
SORT_MODE_TOTTIME = 1

0 commit comments

Comments
 (0)