@@ -22233,34 +22233,25 @@ pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, si
2223322233 .warn_mismatched_indentation = true
2223422234 };
2223522235
22236- // Pre-size the arenas based on input size to reduce the number of block
22237- // allocations (and the kernel page zeroing they trigger). The ratios were
22238- // measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
22239- // The reserve call is a no-op when the capacity is at or below the default
22240- // arena block size, so small inputs don't waste an extra allocation.
22236+ /* Pre-size the arenas based on input size to reduce the number of block
22237+ * allocations (and the kernel page zeroing they trigger). The ratios were
22238+ * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
22239+ * The reserve call is a no-op when the capacity is at or below the default
22240+ * arena block size, so small inputs don't waste an extra allocation. */
2224122241 if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
2224222242 if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
2224322243
22244- // Initialize the constant pool. We're going to completely guess as to the
22245- // number of constants that we'll need based on the size of the input. The
22246- // ratio we chose here is actually less arbitrary than you might think.
22247- //
22248- // We took ~50K Ruby files and measured the size of the file versus the
22249- // number of constants that were found in those files. Then we found the
22250- // average and standard deviation of the ratios of constants/bytesize. Then
22251- // we added 1.34 standard deviations to the average to get a ratio that
22252- // would fit 75% of the files (for a two-tailed distribution). This works
22253- // because there was about a 0.77 correlation and the distribution was
22254- // roughly normal.
22255- //
22256- // This ratio will need to change if we add more constants to the constant
22257- // pool for another node type.
22258- uint32_t constant_size = ((uint32_t) size) / 95;
22244+ /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the
22245+ * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135.
22246+ * We use 120 as a balance between over-allocation waste and resize
22247+ * frequency. Resizes are cheap with arena allocation, so we lean toward
22248+ * under-estimating. */
22249+ uint32_t constant_size = ((uint32_t) size) / 120;
2225922250 pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
2226022251
22261- // Initialize the newline list. Similar to the constant pool, we're going to
22262- // guess at the number of newlines that we'll need based on the size of the
22263- // input.
22252+ /* Initialize the line offset list. Similar to the constant pool, we are
22253+ * going to estimate the number of newlines that we will need based on the
22254+ * size of the input. */
2226422255 size_t newline_size = size / 22;
2226522256 pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
2226622257
0 commit comments