diff --git a/sql/2025/webassembly/counts.sql b/sql/2025/webassembly/counts.sql index dc934e045b1..49890610094 100644 --- a/sql/2025/webassembly/counts.sql +++ b/sql/2025/webassembly/counts.sql @@ -1,8 +1,9 @@ -# Query for wasm requests' count with distinct wasm name +# Query for wasm requests' count with distinct wasm origin name WITH wasmRequests AS ( SELECT client, + page, CASE WHEN REGEXP_CONTAINS(url, r'/(hyphenopoly|patterns).*/[a-z-]{2,5}\.wasm') THEN '(hyphenopoly dictionary)' @@ -10,7 +11,7 @@ WITH wasmRequests AS ( THEN '(unityweb app)' ELSE REGEXP_REPLACE( - REGEXP_EXTRACT(LOWER(url), r'.*/([^./?]*)'), -- lowercase & extract filename between last `/` and `.` or `?` + REGEXP_EXTRACT(LOWER(url), r'./([^./?])'), -- lowercase & extract filename between last `/` and `.` or `?` r'-[0-9a-f]{20,32}$', -- trim trailing hashes to transform `name-0abc43234[...]` to `name` '' ) @@ -25,7 +26,7 @@ WITH wasmRequests AS ( SELECT client, COUNT(0) AS total_wasm, - COUNT(DISTINCT name) AS total_distinct_wasm + COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_origin FROM wasmRequests GROUP BY diff --git a/sql/2025/webassembly/page_rankings.sql b/sql/2025/webassembly/page_rankings.sql index 050672449e2..1a7596bef35 100644 --- a/sql/2025/webassembly/page_rankings.sql +++ b/sql/2025/webassembly/page_rankings.sql @@ -1,22 +1,21 @@ -# Query to get count of pages which use wasm at page ranking intervals. - SELECT client, - _rank AS rank_grouping, + rank_grouping, CASE - WHEN _rank = 100000000 THEN 'all' - ELSE CAST(_rank AS STRING) + WHEN rank_grouping = 100000000 THEN 'all' + ELSE CAST(rank_grouping AS STRING) END AS ranking, COUNT(DISTINCT page) AS pages FROM `httparchive.crawl.requests`, - UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS _rank + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping WHERE date = '2025-07-01' AND type = 'wasm' AND - rank <= _rank + rank <= rank_grouping GROUP BY client, - rank + rank_grouping ORDER BY - rank + client, + rank_grouping