-
-
Notifications
You must be signed in to change notification settings - Fork 209
Expand file tree
/
Copy pathtop100_cookies_set_from_header.sql
More file actions
92 lines (87 loc) · 1.7 KB
/
top100_cookies_set_from_header.sql
File metadata and controls
92 lines (87 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#standardSQL
# Top100 popular cookies and their origins
CREATE TEMPORARY FUNCTION cookieNames(headers STRING)
RETURNS ARRAY<STRING> DETERMINISTIC
LANGUAGE js AS '''
try {
var headers = JSON.parse(headers);
let cookies = headers.filter(h => h.name.match(/^set-cookie$/i));
cookieNames = cookies.map(h => {
name = h.value.split('=')[0]
return name;
})
return cookieNames;
} catch (e) {
return null;
}
''';
WITH whotracksme AS (
SELECT
domain,
category,
tracker
FROM
`httparchive.almanac.whotracksme`
WHERE
date = '2022-06-01'
),
request_headers AS (
SELECT
client,
page,
NET.REG_DOMAIN(url) AS request,
cookieNames(response_headers) AS cookie_names,
COUNT(DISTINCT page) OVER (PARTITION BY client) AS websites_per_client
FROM
`httparchive.almanac.requests`
WHERE
date = '2022-06-01'
GROUP BY
client,
page,
url,
response_headers
),
cookies AS (
SELECT
client,
request,
cookie,
COUNT(DISTINCT page) AS websites_count,
websites_per_client,
COUNT(DISTINCT page) / websites_per_client AS pct_websites
FROM
request_headers,
UNNEST(cookie_names) AS cookie
WHERE
cookie IS NOT NULL AND
cookie != ''
GROUP BY
client,
request,
cookie,
websites_per_client
)
SELECT
*
FROM (
SELECT
client,
whotracksme.category,
request,
cookie,
cookie || ' - ' || request AS cookie_and_request,
websites_count,
websites_per_client,
pct_websites,
RANK() OVER (PARTITION BY client, category ORDER BY pct_websites DESC) AS rank
FROM
cookies
LEFT JOIN
whotracksme
ON NET.HOST(request) = domain
ORDER BY
pct_websites DESC,
client
)
WHERE rank <= 10