-
-
Notifications
You must be signed in to change notification settings - Fork 209
Expand file tree
/
Copy pathcounts.sql
More file actions
60 lines (57 loc) · 1.41 KB
/
counts.sql
File metadata and controls
60 lines (57 loc) · 1.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Query for wasm requests and sites counts
WITH wasmRequests AS (
SELECT
date,
client,
page,
root_page,
url,
REGEXP_EXTRACT(url, r'([^/]+)$') AS filename -- lowercase & extract filename between last `/` and `.` or `?`
FROM
`httparchive.crawl.requests`
WHERE
date IN ('2021-07-01', '2022-06-01', '2024-06-01', '2025-07-01') AND
(
(date IN ('2024-06-01', '2025-07-01') AND type = 'wasm') -- wasm type was added in Jan 2024
OR
(date IN ('2021-07-01', '2022-06-01') AND (JSON_VALUE(summary.mimeType) = 'application/wasm' OR JSON_VALUE(summary.ext) = 'wasm'))
)
),
totals AS (
SELECT
date,
client,
COUNT(DISTINCT root_page) AS total_sites,
COUNT(DISTINCT NET.REG_DOMAIN(page)) AS total_reg_domains
FROM
`httparchive.crawl.requests`
WHERE
date IN ('2021-07-01', '2022-06-01', '2024-06-01', '2025-07-01')
GROUP BY
date,
client
)
SELECT
date,
client,
COUNT(0) AS total_wasm,
COUNT(DISTINCT filename) AS unique_wasm,
COUNT(DISTINCT root_page) AS sites,
total_sites,
COUNT(DISTINCT root_page) / total_sites AS pct_sites,
COUNT(DISTINCT NET.REG_DOMAIN(page)) AS reg_domains,
total_reg_domains,
COUNT(DISTINCT NET.REG_DOMAIN(page)) / total_reg_domains AS pct_reg_domains
FROM
wasmRequests
INNER JOIN
totals
USING (date, client)
GROUP BY
date,
client,
total_sites,
total_reg_domains
ORDER BY
date DESC,
client