Skip to content

Commit f06f529

Browse files
Merge branch 'main' into privacy-markdown-2025
2 parents 160abc0 + 6acf7cf commit f06f529

31 files changed

Lines changed: 533 additions & 168 deletions

sql/2025/webassembly/counts.sql

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,60 @@
1-
# Query for wasm requests' count with distinct wasm origin name
1+
# Query for wasm requests and sites counts
22

33
WITH wasmRequests AS (
44
SELECT
5+
date,
56
client,
67
page,
7-
CASE
8-
WHEN REGEXP_CONTAINS(url, r'/(hyphenopoly|patterns).*/[a-z-]{2,5}\.wasm')
9-
THEN '(hyphenopoly dictionary)'
10-
WHEN ENDS_WITH(url, '.unityweb')
11-
THEN '(unityweb app)'
12-
ELSE
13-
REGEXP_REPLACE(
14-
REGEXP_EXTRACT(LOWER(url), r'./([^./?])'), -- lowercase & extract filename between last `/` and `.` or `?`
15-
r'-[0-9a-f]{20,32}$', -- trim trailing hashes to transform `name-0abc43234[...]` to `name`
16-
''
17-
)
18-
END AS name
8+
root_page,
9+
url,
10+
REGEXP_EXTRACT(url, r'([^/]+)$') AS filename -- lowercase & extract filename between last `/` and `.` or `?`
1911
FROM
2012
`httparchive.crawl.requests`
2113
WHERE
22-
date = '2025-07-01' AND
23-
type = 'wasm'
14+
date IN ('2021-07-01', '2022-06-01', '2024-06-01', '2025-07-01') AND
15+
(
16+
(date IN ('2024-06-01', '2025-07-01') AND type = 'wasm') -- wasm type was added in Jan 2024
17+
OR
18+
(date IN ('2021-07-01', '2022-06-01') AND (JSON_VALUE(summary.mimeType) = 'application/wasm' OR JSON_VALUE(summary.ext) = 'wasm'))
19+
)
20+
),
21+
22+
totals AS (
23+
SELECT
24+
date,
25+
client,
26+
COUNT(DISTINCT root_page) AS total_sites,
27+
COUNT(DISTINCT NET.REG_DOMAIN(page)) AS total_reg_domains
28+
FROM
29+
`httparchive.crawl.requests`
30+
WHERE
31+
date IN ('2021-07-01', '2022-06-01', '2024-06-01', '2025-07-01')
32+
GROUP BY
33+
date,
34+
client
2435
)
2536

2637
SELECT
38+
date,
2739
client,
2840
COUNT(0) AS total_wasm,
29-
COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_origin
41+
COUNT(DISTINCT filename) AS unique_wasm,
42+
COUNT(DISTINCT root_page) AS sites,
43+
total_sites,
44+
COUNT(DISTINCT root_page) / total_sites AS pct_sites,
45+
COUNT(DISTINCT NET.REG_DOMAIN(page)) AS reg_domains,
46+
total_reg_domains,
47+
COUNT(DISTINCT NET.REG_DOMAIN(page)) / total_reg_domains AS pct_reg_domains
3048
FROM
3149
wasmRequests
50+
INNER JOIN
51+
totals
52+
USING (date, client)
3253
GROUP BY
33-
client
54+
date,
55+
client,
56+
total_sites,
57+
total_reg_domains
3458
ORDER BY
59+
date DESC,
3560
client

sql/2025/webassembly/page_rankings.sql

Lines changed: 0 additions & 21 deletions
This file was deleted.

sql/2025/webassembly/ranking.sql

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# WASM usage by page ranking
2+
3+
WITH totals AS (
4+
SELECT
5+
client,
6+
rank_grouping,
7+
COUNT(DISTINCT root_page) AS total_sites
8+
FROM
9+
`httparchive.crawl.requests`,
10+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
11+
WHERE
12+
date = '2025-07-01' AND
13+
rank <= rank_grouping
14+
GROUP BY
15+
client,
16+
rank_grouping
17+
ORDER BY
18+
client,
19+
rank_grouping
20+
)
21+
22+
SELECT
23+
client,
24+
rank_grouping,
25+
CASE
26+
WHEN rank_grouping = 100000000 THEN 'all'
27+
ELSE FORMAT("%'d", rank_grouping)
28+
END AS ranking,
29+
COUNT(DISTINCT root_page) AS sites,
30+
total_sites,
31+
COUNT(DISTINCT root_page) / total_sites AS pct_sites
32+
FROM
33+
`httparchive.crawl.requests`,
34+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
35+
INNER JOIN
36+
totals
37+
USING (client, rank_grouping)
38+
WHERE
39+
date = '2025-07-01' AND
40+
type = 'wasm' AND
41+
rank <= rank_grouping
42+
GROUP BY
43+
client,
44+
rank_grouping,
45+
total_sites
46+
ORDER BY
47+
client,
48+
rank_grouping

src/config/2025.json

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@
2222
"chapter_number": "2",
2323
"title": "WebAssembly",
2424
"slug": "webassembly",
25-
"hero_dir": "2021",
26-
"todo": true
25+
"hero_dir": "2021"
2726
},
2827
{
2928
"part": "I",
@@ -79,14 +78,6 @@
7978
{
8079
"part": "II",
8180
"chapter_number": "10",
82-
"title": "Capabilities",
83-
"slug": "capabilities",
84-
"hero_dir": "2020",
85-
"todo": true
86-
},
87-
{
88-
"part": "II",
89-
"chapter_number": "11",
9081
"title": "PWA",
9182
"slug": "pwa"
9283
}
@@ -98,13 +89,13 @@
9889
"chapters": [
9990
{
10091
"part": "III",
101-
"chapter_number": "12",
92+
"chapter_number": "11",
10293
"title": "CMS",
10394
"slug": "cms"
10495
},
10596
{
10697
"part": "III",
107-
"chapter_number": "13",
98+
"chapter_number": "12",
10899
"title": "Ecommerce",
109100
"slug": "ecommerce",
110101
"todo": true
@@ -117,19 +108,19 @@
117108
"chapters": [
118109
{
119110
"part": "IV",
120-
"chapter_number": "14",
111+
"chapter_number": "13",
121112
"title": "Page Weight",
122113
"slug": "page-weight"
123114
},
124115
{
125116
"part": "IV",
126-
"chapter_number": "15",
117+
"chapter_number": "14",
127118
"title": "CDN",
128119
"slug": "cdn"
129120
},
130121
{
131122
"part": "IV",
132-
"chapter_number": "16",
123+
"chapter_number": "15",
133124
"title": "Cookies",
134125
"slug": "cookies",
135126
"hero_dir": "2024"

src/config/contributors.json

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,6 @@
5757
"twitter": "DesignrKnight",
5858
"website": "http://designrknight.com/"
5959
},
60-
"abhishektiwari": {
61-
"avatar_url": "363839",
62-
"github": "abhishektiwari",
63-
"name": "Abhishek Tiwari",
64-
"teams": {
65-
"2025": [
66-
"committee",
67-
"editors"
68-
]
69-
}
70-
},
7160
"kleinab": {
7261
"avatar_url": "1319324",
7362
"github": "kleinab",
@@ -3683,6 +3672,19 @@
36833672
]
36843673
}
36853674
},
3675+
"nimeshgit": {
3676+
"avatar_url": "38841604",
3677+
"github": "nimeshgit",
3678+
"linkedin": "ops-ml-architect",
3679+
"name": "Nimesh Vadgama - VN",
3680+
"teams": {
3681+
"2025": [
3682+
"analysts",
3683+
"authors"
3684+
]
3685+
},
3686+
"website": "https://ops-ml-architect.blogspot.com/"
3687+
},
36863688
"NishuGoel": {
36873689
"avatar_url": "26349046",
36883690
"github": "NishuGoel",
@@ -3772,9 +3774,10 @@
37723774
"reviewers"
37733775
],
37743776
"2025": [
3775-
"leads",
37763777
"committee",
3777-
"authors"
3778+
"leads",
3779+
"authors",
3780+
"reviewers"
37783781
]
37793782
},
37803783
"twitter": "nrllah",

src/content/en/2025/security.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ hero_alt: Hero image of Web Almanac characters padlocking a web page, while othe
66
authors: [vikvanderlinden, GJFR]
77
reviewers: [anirudhduggal, martinakraus, GJFR, clarkio, JannisBush, securient]
88
analysts: [vsdaan]
9-
editors: [abhishektiwari]
9+
editors: [tunetheweb]
1010
translators: []
1111
GJFR_bio: Gertjan Franken is a postdoctoral researcher with the <a hreflang="en" href="https://distrinet.cs.kuleuven.be/">DistriNet Research Group</a> at KU Leuven. His research spans various aspects of web security and privacy, with a primary focus on the automated analysis of browser security policies. As part of this research, he maintains the open-source tool <a hreflang="en" href="https://github.com/DistriNet/BugHog">BugHog</a> for pinpointing bug lifecycles.
1212
vikvanderlinden_bio: Vik Vanderlinden is a PhD candidate in Computer Science at the <a hreflang="en" href="https://distrinet.cs.kuleuven.be/">DistriNet Research Group</a> at KU Leuven. His research focuses on web and network security, primarily focusing on timing leaks in web applications and protocols.

0 commit comments

Comments
 (0)