-
-
Notifications
You must be signed in to change notification settings - Fork 209
Expand file tree
/
Copy pathcookies_top_first_party_names.sql
More file actions
37 lines (34 loc) · 951 Bytes
/
cookies_top_first_party_names.sql
File metadata and controls
37 lines (34 loc) · 951 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
-- Most common cookie names, by number of domains on which they appear. Goal is to identify common trackers that use first-party cookies across sites.
WITH pages AS (
SELECT
client,
root_page,
custom_metrics,
COUNT(DISTINCT NET.HOST(root_page)) OVER (PARTITION BY client) AS total_domains
FROM `httparchive.crawl.pages`
WHERE date = '2025-07-01'
),
cookies AS (
SELECT
client,
cookie,
NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host,
NET.HOST(root_page) AS firstparty_host,
total_domains
FROM pages,
UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie
)
SELECT
client,
COUNT(DISTINCT firstparty_host) AS domain_count,
COUNT(DISTINCT firstparty_host) / ANY_VALUE(total_domains) AS pct_domains,
SAFE.STRING(cookie.name) AS cookie_name
FROM cookies
WHERE firstparty_host LIKE '%' || cookie_host
GROUP BY
client,
cookie_name
ORDER BY
domain_count DESC,
client DESC
LIMIT 500