Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
3f0b2bd
benchmark update
gshigin Aug 25, 2025
7ae736a
Added tokenizer traverse benchmark
gshigin Aug 25, 2025
fc28c5b
Merge branch 'pp' into scraper-opt
gshigin Aug 26, 2025
e49a92f
small ref
gshigin Aug 26, 2025
4656f71
initial encoding
gshigin Aug 29, 2025
0027219
read + passing all tests
gshigin Aug 30, 2025
9b62e6a
read/write optimization
gshigin Sep 1, 2025
aca4850
sample optimization
gshigin Sep 1, 2025
26d0af4
scraper read benchmark 2 shards
gshigin Sep 1, 2025
9f623c7
restore MarkupBuffer base class
gshigin Sep 1, 2025
2036d7e
change MetricParser into parse_metric method
gshigin Sep 1, 2025
1113d04
parse_metric refactoring
gshigin Sep 1, 2025
2455a51
benchmark update
gshigin Sep 2, 2025
9457484
allocated_memory fix
gshigin Sep 3, 2025
00c833e
BenchmarkScraperParse allocation speed fix
gshigin Sep 3, 2025
b8c7aa4
change codec 1234 to 0124
gshigin Sep 3, 2025
c6251c4
general opts
gshigin Sep 3, 2025
ce8ac34
add padding
gshigin Sep 3, 2025
ef7f541
likelyhood optimizations
gshigin Sep 3, 2025
4a8dac4
parse_metric reorganize
gshigin Sep 4, 2025
0f93e8f
1ms on read optimization
gshigin Sep 4, 2025
1d8fd63
somehow its faster
gshigin Sep 4, 2025
adc6a9f
read opts
gshigin Sep 5, 2025
8249e7f
LabelSet reserve to resize
gshigin Sep 5, 2025
8d1ec65
LabelSet reserve to resize
gshigin Sep 5, 2025
7795763
scraper fixes
gshigin Sep 9, 2025
95d4519
sample encoding + tests
gshigin Sep 10, 2025
c4acfaf
scraper fixes
gshigin Sep 10, 2025
86b5b04
LabelCodec + tests
gshigin Sep 10, 2025
d749eb9
fixes
gshigin Sep 10, 2025
cb9446a
refactoring
gshigin Sep 10, 2025
853e2fa
Merge branch 'pp' into scraper-opt
gshigin Sep 10, 2025
c54628e
tidy fix
gshigin Sep 10, 2025
2ac3cbf
x64-x86 double infinity fix
gshigin Sep 10, 2025
376aba8
small opts
gshigin Sep 11, 2025
d87a4d6
undo pop_back
gshigin Sep 11, 2025
8c416ee
review fixes
gshigin Sep 16, 2025
e2a0099
more review fixes
gshigin Sep 16, 2025
371ae93
more review fixes
gshigin Sep 17, 2025
212cedc
more review fixes
gshigin Sep 17, 2025
97d1839
tidy fix
gshigin Sep 17, 2025
f720a35
Merge branch 'pp' into scraper-opt
gshigin Sep 24, 2025
0251a2d
Merge remote-tracking branch 'origin/pp' into scraper-opt
gshigin Mar 25, 2026
65f4e2c
bug fixes
gshigin Mar 25, 2026
68c399d
Merge remote-tracking branch 'origin/pp' into scraper-opt
gshigin Apr 29, 2026
adf4202
docs
gshigin Apr 29, 2026
9328f35
Merge remote-tracking branch 'origin/pp' into scraper-opt
gshigin May 6, 2026
032fc41
scraper 3f0b2bd72
gshigin May 13, 2026
72bd60d
scraper 4656f7137
gshigin May 13, 2026
688d62a
scraper 0027219f0
gshigin May 13, 2026
04bcf6b
scraper 9b62e6ae
gshigin May 13, 2026
6bbe705
scraper 2036d7ea
gshigin May 13, 2026
df0808d
scraper final
gshigin May 13, 2026
f3e0c94
new final with opts
gshigin May 13, 2026
289f093
Merge remote-tracking branch 'origin/pp' into scraper-remastered
gshigin May 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 60 additions & 22 deletions pp/wal/hashdex/scraper/encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

#include <bit>
#include <cmath>
#include <cstring>
#include <string>
#include <string_view>

#include "bare_bones/bit.h"
#include "marked_common.h"
Expand Down Expand Up @@ -36,16 +39,16 @@ class SampleCodec {
using encoding::SampleValueType;

const double val = sample.value();
if (const auto type = layout.value_type(); type == SampleValueType::kUint32) [[likely]] {
out = write_value(out, static_cast<uint32_t>(val));
} else if (type == SampleValueType::kDouble) [[likely]] {
out = write_value(out, val);
} else if (type == SampleValueType::kUint8) [[unlikely]] {
if (const auto type = layout.value_type(); type == SampleValueType::kUint8) [[likely]] {
out = write_value(out, static_cast<uint8_t>(val));
} else if (type == SampleValueType::kUint16) [[unlikely]] {
} else if (type == SampleValueType::kUint16) [[likely]] {
out = write_value(out, static_cast<uint16_t>(val));
} else if (type == SampleValueType::kUint32) [[likely]] {
out = write_value(out, static_cast<uint32_t>(val));
} else if (type == SampleValueType::kFloat) [[unlikely]] {
out = write_value(out, static_cast<float>(val));
} else if (type == SampleValueType::kDouble) [[unlikely]] {
out = write_value(out, val);
}

if (layout.has_timestamp()) [[unlikely]] {
Expand All @@ -69,23 +72,23 @@ class SampleCodec {
uint64_t chunk;
std::memcpy(&chunk, in, sizeof(chunk));

if (const auto type = layout.value_type(); type == SampleValueType::kUint32) [[likely]] {
val = static_cast<double>(static_cast<uint32_t>(chunk));
in += sizeof(uint32_t);
} else if (type == SampleValueType::kDouble) [[likely]] {
val = std::bit_cast<double>(chunk);
in += sizeof(double);
} else if (type == SampleValueType::kUint8) [[unlikely]] {
if (const auto type = layout.value_type(); type == SampleValueType::kUint8) [[likely]] {
val = static_cast<double>(static_cast<uint8_t>(chunk));
in += sizeof(uint8_t);
} else if (type == SampleValueType::kUint16) [[unlikely]] {
} else if (type == SampleValueType::kUint16) [[likely]] {
val = static_cast<double>(static_cast<uint16_t>(chunk));
in += sizeof(uint16_t);
} else if (type == SampleValueType::kUint32) [[likely]] {
val = static_cast<double>(static_cast<uint32_t>(chunk));
in += sizeof(uint32_t);
} else if (type == SampleValueType::kFloat) [[unlikely]] {
val = static_cast<double>(std::bit_cast<float>(static_cast<uint32_t>(chunk)));
in += sizeof(float);
} else if (type == SampleValueType::kZero) [[unlikely]] {
val = 0.0;
} else if (type == SampleValueType::kDouble) [[unlikely]] {
val = std::bit_cast<double>(chunk);
in += sizeof(double);
} else {
val = Prometheus::kNormalNan;
}
Expand Down Expand Up @@ -158,23 +161,59 @@ class LabelCodec {
MarkedLabel label;
};

template <class LabelSet>
static PROMPP_ALWAYS_INLINE void decode_and_append(const char*& in, const char* base, LabelSet& label_set) noexcept {
uint64_t chunk;
std::memcpy(&chunk, in, sizeof(chunk));
const auto layout = static_cast<uint8_t>(chunk);

if (layout == 0b01010101) [[likely]] {
label_set.append(std::string_view(base + static_cast<uint8_t>(chunk >> 8), static_cast<uint8_t>(chunk >> 16)),
std::string_view(base + static_cast<uint8_t>(chunk >> 24), static_cast<uint8_t>(chunk >> 32)));
in += 5;
return;
}

if (layout == 0b01000000) [[likely]] {
label_set.append(Prometheus::kMetricLabelName, std::string_view(base, static_cast<uint8_t>(chunk >> 8)));
in += 2;
return;
}

const auto [next, label] = decode_from_chunk(in, chunk, layout);
in = next;
if (label.name.is_reserved_name()) [[unlikely]] {
label_set.append(Prometheus::kMetricLabelName, std::string_view(base + label.value.offset, label.value.length));
} else {
label_set.append(std::string_view(base + label.name.offset, label.name.length), std::string_view(base + label.value.offset, label.value.length));
}
}

static DecodeResult decode(const char* in) noexcept {
uint64_t chunk;
std::memcpy(&chunk, in, sizeof(chunk));
const auto layout = static_cast<uint8_t>(chunk);

return decode_from_chunk(in, chunk, layout);
}

private:
static PROMPP_ALWAYS_INLINE DecodeResult decode_from_chunk(const char* in, const uint64_t chunk, const uint8_t layout) noexcept {
if (layout == 0b01010101) [[likely]] {
return decode_4_bytes(in, chunk);
}

if (layout == 0b01000000) [[likely]] {
return DecodeResult{.next = in + 2, .label = {.name = {.offset = 0, .length = 0}, .value = {.offset = 0, .length = static_cast<uint8_t>(chunk >> 8)}}};
}

if ((layout & 0x0F) == 0) [[likely]] {
return decode_value_only(in, chunk, layout);
}

return decode_generic(++in, layout);
}

private:
static PROMPP_ALWAYS_INLINE char* encode_value_only(char* out, const uint32_t label_value_offset, const uint32_t label_value_length) noexcept {
char* start = out++;

Expand Down Expand Up @@ -348,16 +387,15 @@ class LayoutCountCodec {
};

static PROMPP_ALWAYS_INLINE DecodeResult decode(const char* in) noexcept {
uint64_t chunk;
std::memcpy(&chunk, in, sizeof(chunk));

LayoutMarker layout{};
std::memcpy(&layout, &chunk, sizeof(layout));
std::memcpy(&layout, in, sizeof(layout));

chunk >>= 8;
const uint64_t mask = (1ULL << BareBones::Bit::to_bits(layout.size_length_in_bytes())) - 1;
if (layout.size_length_in_bytes() == sizeof(uint8_t)) [[likely]] {
return {in + sizeof(layout) + sizeof(uint8_t), layout, static_cast<uint32_t>(static_cast<uint8_t>(in[1]))};
}

auto labels_count = static_cast<uint32_t>(chunk & mask);
uint32_t labels_count = 0;
std::memcpy(&labels_count, in + sizeof(layout), layout.size_length_in_bytes());

return {in + sizeof(layout) + layout.size_length_in_bytes(), layout, labels_count};
}
Expand Down
15 changes: 3 additions & 12 deletions pp/wal/hashdex/scraper/marked.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,10 @@ class Metric {
const auto [next_ptr, layout, labels_count] = encoding::LayoutCountCodec::decode(ptr);
ptr = next_ptr;

ts.label_set().resize(labels_count);

auto label_iter = ts.label_set().begin();
ts.label_set().reserve(labels_count);
const auto buf_ptr = buffer_.data() + item_->base_offset;
for (uint32_t i = 0; i < labels_count; ++i) {
const auto [next_ptr, label] = encoding::LabelCodec::decode(ptr);
ptr = next_ptr;

if (const auto buf_ptr = buffer_.data() + item_->base_offset; label.name.is_reserved_name()) [[unlikely]] {
std::construct_at(label_iter++, Prometheus::kMetricLabelName, std::string_view(buf_ptr + label.value.offset, label.value.length));
} else {
std::construct_at(label_iter++, std::string_view(buf_ptr + label.name.offset, label.name.length),
std::string_view(buf_ptr + label.value.offset, label.value.length));
}
encoding::LabelCodec::decode_and_append(ptr, buf_ptr, ts.label_set());
}

auto [p, sample] = encoding::SampleCodec::decode(ptr, layout, default_timestamp_);
Expand Down
Loading