|
1 | | -# Query for wasm requests' count with distinct wasm origin name |
| 1 | +# Query for wasm requests and sites counts |
2 | 2 |
|
3 | 3 | WITH wasmRequests AS ( |
4 | 4 | SELECT |
| 5 | + date, |
5 | 6 | client, |
6 | 7 | page, |
7 | | - CASE |
8 | | - WHEN REGEXP_CONTAINS(url, r'/(hyphenopoly|patterns).*/[a-z-]{2,5}\.wasm') |
9 | | - THEN '(hyphenopoly dictionary)' |
10 | | - WHEN ENDS_WITH(url, '.unityweb') |
11 | | - THEN '(unityweb app)' |
12 | | - ELSE |
13 | | - REGEXP_REPLACE( |
14 | | - REGEXP_EXTRACT(LOWER(url), r'./([^./?])'), -- lowercase & extract filename between last `/` and `.` or `?` |
15 | | - r'-[0-9a-f]{20,32}$', -- trim trailing hashes to transform `name-0abc43234[...]` to `name` |
16 | | - '' |
17 | | - ) |
18 | | - END AS name |
| 8 | + root_page, |
| 9 | + url, |
| 10 | + REGEXP_EXTRACT(url, r'([^/]+)$') AS filename -- lowercase & extract filename between last `/` and `.` or `?` |
19 | 11 | FROM |
20 | 12 | `httparchive.crawl.requests` |
21 | 13 | WHERE |
22 | | - date = '2025-07-01' AND |
23 | | - type = 'wasm' |
| 14 | + date IN ('2021-07-01', '2022-06-01', '2024-06-01', '2025-07-01') AND |
| 15 | + ( |
| 16 | + (date IN ('2024-06-01', '2025-07-01') AND type = 'wasm') -- wasm type was added in Jan 2024 |
| 17 | + OR |
| 18 | + (date IN ('2021-07-01', '2022-06-01') AND (JSON_VALUE(summary.mimeType) = 'application/wasm' OR JSON_VALUE(summary.ext) = 'wasm')) |
| 19 | + ) |
| 20 | +), |
| 21 | + |
| 22 | +totals AS ( |
| 23 | + SELECT |
| 24 | + date, |
| 25 | + client, |
| 26 | + COUNT(DISTINCT root_page) AS total_sites, |
| 27 | + COUNT(DISTINCT NET.REG_DOMAIN(page)) AS total_reg_domains |
| 28 | + FROM |
| 29 | + `httparchive.crawl.requests` |
| 30 | + WHERE |
| 31 | + date IN ('2021-07-01', '2022-06-01', '2024-06-01', '2025-07-01') |
| 32 | + GROUP BY |
| 33 | + date, |
| 34 | + client |
24 | 35 | ) |
25 | 36 |
|
26 | 37 | SELECT |
| 38 | + date, |
27 | 39 | client, |
28 | 40 | COUNT(0) AS total_wasm, |
29 | | - COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_origin |
| 41 | + COUNT(DISTINCT filename) AS unique_wasm, |
| 42 | + COUNT(DISTINCT root_page) AS sites, |
| 43 | + total_sites, |
| 44 | + COUNT(DISTINCT root_page) / total_sites AS pct_sites, |
| 45 | + COUNT(DISTINCT NET.REG_DOMAIN(page)) AS reg_domains, |
| 46 | + total_reg_domains, |
| 47 | + COUNT(DISTINCT NET.REG_DOMAIN(page)) / total_reg_domains AS pct_reg_domains |
30 | 48 | FROM |
31 | 49 | wasmRequests |
| 50 | +INNER JOIN |
| 51 | + totals |
| 52 | +USING (date, client) |
32 | 53 | GROUP BY |
33 | | - client |
| 54 | + date, |
| 55 | + client, |
| 56 | + total_sites, |
| 57 | + total_reg_domains |
34 | 58 | ORDER BY |
| 59 | + date DESC, |
35 | 60 | client |
0 commit comments