Skip to content

Commit 2b293a0

Browse files
WebAssembly 2025 queries (#4239)
* For WebAssembly Report 2025, Updated all the queries from year (2021 & 2022) to the new schema where applicable. Added query for wasm usage counts by pages and by distinct wasm's names (counts.sql & counts_distinct_name.sql) Added query for various compression methods (compression_methods.sql) Added query for wasm component usage for domains and cross domains (domains.sql & cross_domain.sql) Added query for various instruction kinds (instruction_kinds.sql) Added query for various languages usage in wasm (language_usage.sql) Added query for various mimeType usage in wasm (mime_types.sql) Added query for page rankings (page_rankings.sql) Added query to understand usages by size (module_sizes.sql, section_sizes.sql, large_custom_sections.sql and total_sizes_by_percentile.sql) Info : Drop statustics for size_total_br, size_total_strip, size_total_strip_br, size_total_opt, size_total_opt_br, br_savings, strip_br_savings, opt_br_savings and opt_savings because of dataset has null values. Added query for wasm proposals.sql. Added query to understand popular wasm libraries in use. (popular_library_by_name.sql) All the queries are grouped by clients i.e. desktop and mobile. * Update sql/2025/webassembly/page_rankings.sql extended extra category Co-authored-by: Barry Pollard <barrypollard@google.com> * Updated query with date '2025-07-01', Removed test folder and Simplified json values without $ * rephrased query for compression methods by clients. * Refactor wasm count query * Update sql/2025/webassembly/page_rankings.sql Co-authored-by: Barry Pollard <barrypollard@google.com> * Added new line for code lint pipeline and rephrased queries * Apply suggestion from @tunetheweb * Apply suggestion from @tunetheweb * Apply suggestion from @tunetheweb * Apply suggestion from @tunetheweb * Apply suggestion from @tunetheweb * Apply suggestion from @tunetheweb * Update sql/2025/webassembly/instruction_kinds.sql --------- Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent 355a1a7 commit 2b293a0

14 files changed

Lines changed: 319 additions & 0 deletions
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Query to list out various compression methods used in wasm
2+
3+
SELECT
4+
client,
5+
compression_method,
6+
COUNT(0) AS wasm_requests,
7+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total_wasm_requests_by_client,
8+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS percentage
9+
FROM (
10+
SELECT
11+
client,
12+
response_headers.value AS compression_method
13+
FROM
14+
`httparchive.crawl.requests`,
15+
UNNEST(response_headers) AS response_headers
16+
WHERE
17+
date = '2025-07-01' AND
18+
type = 'wasm' AND
19+
LOWER(response_headers.name) = 'content-encoding'
20+
)
21+
GROUP BY
22+
client,
23+
compression_method
24+
ORDER BY
25+
percentage DESC

sql/2025/webassembly/counts.sql

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Query for wasm requests' count with distinct wasm name
2+
3+
WITH wasmRequests AS (
4+
SELECT
5+
client,
6+
CASE
7+
WHEN REGEXP_CONTAINS(url, r'/(hyphenopoly|patterns).*/[a-z-]{2,5}\.wasm')
8+
THEN '(hyphenopoly dictionary)'
9+
WHEN ENDS_WITH(url, '.unityweb')
10+
THEN '(unityweb app)'
11+
ELSE
12+
REGEXP_REPLACE(
13+
REGEXP_EXTRACT(LOWER(url), r'.*/([^./?]*)'), -- lowercase & extract filename between last `/` and `.` or `?`
14+
r'-[0-9a-f]{20,32}$', -- trim trailing hashes to transform `name-0abc43234[...]` to `name`
15+
''
16+
)
17+
END AS name
18+
FROM
19+
`httparchive.crawl.requests`
20+
WHERE
21+
date = '2025-07-01' AND
22+
type = 'wasm'
23+
)
24+
25+
SELECT
26+
client,
27+
COUNT(0) AS total_wasm,
28+
COUNT(DISTINCT name) AS total_distinct_wasm
29+
FROM
30+
wasmRequests
31+
GROUP BY
32+
client
33+
ORDER BY
34+
client
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Query for wasm cross origin
2+
# The % of wasm files requested cross-origin.
3+
4+
SELECT
5+
client,
6+
COUNTIF(NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url)) / COUNT(0) AS cross_origin_percentage
7+
FROM
8+
`httparchive.crawl.requests`
9+
WHERE
10+
date = '2025-07-01' AND
11+
type = 'wasm'
12+
GROUP BY
13+
client
14+
ORDER BY
15+
client

sql/2025/webassembly/domains.sql

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Query for the % of domains which use wasm
2+
3+
SELECT
4+
client,
5+
all_domains,
6+
domains_using_wasm,
7+
domains_using_wasm / all_domains AS percentage
8+
FROM (
9+
SELECT
10+
client,
11+
COUNT(DISTINCT NET.REG_DOMAIN(page)) AS all_domains
12+
FROM
13+
`httparchive.crawl.pages`
14+
WHERE
15+
date = '2025-07-01'
16+
GROUP BY
17+
client
18+
)
19+
JOIN (
20+
SELECT
21+
client,
22+
COUNT(DISTINCT NET.REG_DOMAIN(page)) AS domains_using_wasm
23+
FROM
24+
`httparchive.crawl.requests`
25+
WHERE
26+
date = '2025-07-01' AND
27+
type = 'wasm'
28+
GROUP BY
29+
client
30+
)
31+
USING (client)
32+
ORDER BY
33+
client
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Query for instructions kinds
2+
3+
SELECT
4+
client,
5+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.other) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS other_pct,
6+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.constants) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS constants_pct,
7+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.wait_notify) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS wait_notify_pct,
8+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.indirect_calls) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS indirect_calls_pct,
9+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.direct_calls) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS direct_calls_pct,
10+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.load_store) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS load_store_pct,
11+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.memory) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS memory_pct,
12+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.control_flow) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS control_flow_pct,
13+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.table) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS table_pct,
14+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.global_var) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS global_var_pct,
15+
SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.categories.local_var) AS INT64)) / SUM(SAFE_CAST(JSON_VALUE(payload._wasm_stats.instr.total) AS INT64)) AS local_var_pct
16+
FROM
17+
`httparchive.crawl.requests`
18+
WHERE
19+
date = '2025-07-01' AND
20+
type = 'wasm' AND
21+
payload._wasm_stats IS NOT NULL
22+
GROUP BY
23+
client
24+
ORDER BY
25+
client
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Query for the counts of language usage across wasm component.
2+
3+
SELECT
4+
client,
5+
JSON_VALUE(payload._wasm_stats.language) AS language,
6+
COUNT(0) AS count,
7+
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
8+
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS percentage
9+
FROM
10+
`httparchive.crawl.requests`
11+
WHERE
12+
date = '2025-07-01' AND
13+
type = 'wasm' AND
14+
payload._wasm_stats IS NOT NULL
15+
GROUP BY
16+
client,
17+
language
18+
ORDER BY
19+
count DESC
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Query to get largest custom size for wasm component
2+
3+
SELECT
4+
client,
5+
ANY_VALUE(url) AS url,
6+
ANY_VALUE(JSON_VALUE(payload._wasm_stats.sections.custom)) AS custom_sections,
7+
MAX(SAFE_CAST(JSON_VALUE(payload._wasm_stats.size.custom) AS INT64)) AS custom_sections_size
8+
FROM
9+
`httparchive.crawl.requests`
10+
WHERE
11+
date = '2025-07-01' AND
12+
type = 'wasm' AND
13+
payload._wasm_stats IS NOT NULL
14+
GROUP BY
15+
client
16+
ORDER BY
17+
client
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Query to get counts on various mimeType used in wasm component
2+
3+
SELECT
4+
client,
5+
JSON_VALUE(summary.mimeType) AS mimeType,
6+
COUNT(0) AS count
7+
FROM
8+
`httparchive.crawl.requests`
9+
WHERE
10+
date = '2025-07-01' AND
11+
type = 'wasm'
12+
GROUP BY
13+
client,
14+
mimeType
15+
ORDER BY
16+
client,
17+
count DESC
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Query for Wasm module sizes, grouped by percentile at intervals of 0, 10, 25, 50, 75, 90, 100.
2+
3+
SELECT
4+
client,
5+
percentile,
6+
APPROX_QUANTILES(SAFE_CAST(JSON_VALUE(summary.respBodySize) AS INT64), 1000)[OFFSET(percentile * 10)] AS raw_size,
7+
APPROX_QUANTILES(SAFE_CAST(JSON_VALUE(payload._wasm_stats.size.total) AS INT64), 1000)[OFFSET(percentile * 10)] AS size_total
8+
FROM
9+
`httparchive.crawl.requests`,
10+
UNNEST([0, 10, 25, 50, 75, 90, 100]) AS percentile
11+
WHERE
12+
date = '2025-07-01' AND
13+
type = 'wasm' AND
14+
payload._wasm_stats IS NOT NULL
15+
GROUP BY
16+
percentile,
17+
client
18+
ORDER BY
19+
percentile,
20+
client
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Query to get count of pages which use wasm at page ranking intervals.
2+
3+
SELECT
4+
client,
5+
_rank AS rank_grouping,
6+
CASE
7+
WHEN _rank = 100000000 THEN 'all'
8+
ELSE CAST(_rank AS STRING)
9+
END AS ranking,
10+
COUNT(DISTINCT page) AS pages
11+
FROM
12+
`httparchive.crawl.requests`,
13+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank
14+
WHERE
15+
date = '2025-07-01' AND
16+
type = 'wasm' AND
17+
rank <= _rank
18+
GROUP BY
19+
client,
20+
rank
21+
ORDER BY
22+
rank

0 commit comments

Comments
 (0)