Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions sql/2025/webassembly/counts.sql
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
# Query for wasm requests' count with distinct wasm name
# Query for wasm requests' count with distinct wasm origin name

WITH wasmRequests AS (
SELECT
client,
page,
CASE
WHEN REGEXP_CONTAINS(url, r'/(hyphenopoly|patterns).*/[a-z-]{2,5}\.wasm')
THEN '(hyphenopoly dictionary)'
WHEN ENDS_WITH(url, '.unityweb')
THEN '(unityweb app)'
ELSE
REGEXP_REPLACE(
REGEXP_EXTRACT(LOWER(url), r'.*/([^./?]*)'), -- lowercase & extract filename between last `/` and `.` or `?`
REGEXP_EXTRACT(LOWER(url), r'./([^./?])'), -- lowercase & extract filename between last `/` and `.` or `?`
r'-[0-9a-f]{20,32}$', -- trim trailing hashes to transform `name-0abc43234[...]` to `name`
''
)
Expand All @@ -25,7 +26,7 @@ WITH wasmRequests AS (
SELECT
client,
COUNT(0) AS total_wasm,
COUNT(DISTINCT name) AS total_distinct_wasm
COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_origin
FROM
wasmRequests
GROUP BY
Expand Down
17 changes: 8 additions & 9 deletions sql/2025/webassembly/page_rankings.sql
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
# Query to get count of pages which use wasm at page ranking intervals.

SELECT
client,
_rank AS rank_grouping,
rank_grouping,
CASE
WHEN _rank = 100000000 THEN 'all'
ELSE CAST(_rank AS STRING)
WHEN rank_grouping = 100000000 THEN 'all'
ELSE CAST(rank_grouping AS STRING)
END AS ranking,
COUNT(DISTINCT page) AS pages
FROM
`httparchive.crawl.requests`,
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
WHERE
date = '2025-07-01' AND
type = 'wasm' AND
rank <= _rank
rank <= rank_grouping
GROUP BY
client,
rank
rank_grouping
ORDER BY
rank
client,
rank_grouping