-
-
Notifications
You must be signed in to change notification settings - Fork 209
Expand file tree
/
Copy pathbounce_domains_top.sql
More file actions
78 lines (72 loc) · 2.13 KB
/
bounce_domains_top.sql
File metadata and controls
78 lines (72 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
-- noqa: disable=PRS
-- Detection logic explained:
-- https://github.com/privacycg/proposals/issues/6
-- https://github.com/privacycg/nav-tracking-mitigations/blob/main/bounce-tracking-explainer.md
WITH redirect_requests AS (
FROM `httparchive.crawl.requests`
|> WHERE
date = '2025-07-01' AND
--rank = 1000 AND
SAFE.INT64(summary.status) BETWEEN 300 AND 399 AND
index <= 2
|> JOIN UNNEST(response_headers) AS header
|> WHERE LOWER(header.name) = 'location'
|> SELECT
client,
url,
index,
NET.REG_DOMAIN(header.value) AS location_domain,
root_page
),
-- Find the first navigation redirect
navigation_redirect AS (
FROM redirect_requests
|> WHERE
index = 1 AND
NET.REG_DOMAIN(root_page) = NET.REG_DOMAIN(url) AND
NET.REG_DOMAIN(url) != location_domain
|> SELECT
client,
root_page,
location_domain AS bounce_domain
),
-- Find the second navigation redirect
bounce_redirect AS (
FROM redirect_requests
|> WHERE
index = 2 AND
NET.REG_DOMAIN(root_page) != NET.REG_DOMAIN(url) AND
NET.REG_DOMAIN(url) != location_domain
|> SELECT
client,
url,
root_page,
location_domain AS bounce_redirect_location_domain
),
-- Combine the first and second navigation redirects
bounce_sequences AS (
FROM navigation_redirect AS nav
|> JOIN bounce_redirect AS bounce
ON
nav.client = bounce.client AND
nav.root_page = bounce.root_page
|> AGGREGATE COUNT(DISTINCT nav.root_page) AS websites_count
GROUP BY nav.client, bounce_domain
),
websites_total AS (
FROM `httparchive.crawl.pages`
|> WHERE date = '2025-07-01' --AND rank = 1000
|> AGGREGATE COUNT(DISTINCT root_page) AS total_websites GROUP BY client
)
FROM bounce_sequences
|> JOIN websites_total USING (client)
|> EXTEND websites_count / total_websites AS websites_pct
|> DROP total_websites
|> PIVOT(
ANY_VALUE(websites_count) AS cnt,
ANY_VALUE(websites_pct) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop, cnt_mobile AS mobile_count, cnt_desktop AS desktop_count
|> ORDER BY COALESCE(mobile_count, 0) + COALESCE(desktop_count, 0) DESC
|> LIMIT 100