|
1 | 1 | #standardSQL |
| 2 | +# Median third-parties & green third-party requests per websites by rank |
2 | 3 |
|
3 | 4 | WITH requests AS ( |
4 | 5 | SELECT |
@@ -34,109 +35,112 @@ pages AS ( |
34 | 35 |
|
35 | 36 | third_party AS ( |
36 | 37 | SELECT |
37 | | - domain, |
38 | | - COUNT(DISTINCT page) AS page_usage |
| 38 | + tp.domain, |
| 39 | + COUNT(DISTINCT r.page) AS page_usage |
39 | 40 | FROM |
40 | 41 | `httparchive.almanac.third_parties` AS tp |
41 | 42 | INNER JOIN |
42 | 43 | requests AS r |
43 | 44 | ON NET.HOST(r.url) = NET.HOST(tp.domain) |
44 | 45 | WHERE |
45 | | - date = '2025-06-01' AND |
46 | | - category NOT IN ('hosting') |
| 46 | + tp.date = '2025-06-01' AND |
| 47 | + tp.category NOT IN ('hosting') |
47 | 48 | GROUP BY |
48 | | - domain |
| 49 | + tp.domain |
49 | 50 | HAVING |
50 | 51 | page_usage >= 50 |
51 | 52 | ), |
52 | 53 |
|
53 | 54 | green_tp AS ( |
54 | | - SELECT domain |
| 55 | + SELECT tp.domain |
55 | 56 | FROM |
56 | 57 | `httparchive.almanac.third_parties` AS tp |
57 | 58 | INNER JOIN |
58 | 59 | green AS g |
59 | 60 | ON NET.HOST(g.host) = NET.HOST(tp.domain) |
60 | 61 | WHERE |
61 | | - date = '2025-06-01' AND |
62 | | - category NOT IN ('hosting') |
| 62 | + tp.date = '2025-06-01' AND |
| 63 | + tp.category NOT IN ('hosting') |
63 | 64 | GROUP BY |
64 | | - domain |
| 65 | + tp.domain |
65 | 66 | ), |
66 | 67 |
|
67 | 68 | base AS ( |
68 | 69 | SELECT |
69 | | - client, |
70 | | - page, |
71 | | - rank, |
72 | | - COUNT(domain) AS third_parties_per_page |
| 70 | + r.client, |
| 71 | + r.page, |
| 72 | + p.rank, |
| 73 | + COUNT(tp.domain) AS third_parties_per_page |
73 | 74 | FROM |
74 | | - requests |
| 75 | + requests AS r |
75 | 76 | LEFT JOIN |
76 | | - third_party |
| 77 | + third_party AS tp |
77 | 78 | ON |
78 | | - NET.HOST(requests.url) = NET.HOST(third_party.domain) |
| 79 | + NET.HOST(r.url) = NET.HOST(tp.domain) |
79 | 80 | INNER JOIN |
80 | | - pages |
81 | | - USING (client, page) |
| 81 | + pages AS p |
| 82 | + ON r.client = p.client AND r.page = p.page |
82 | 83 | GROUP BY |
83 | | - client, |
84 | | - page, |
85 | | - rank |
| 84 | + r.client, |
| 85 | + r.page, |
| 86 | + p.rank |
86 | 87 | ), |
87 | 88 |
|
88 | 89 | base_green AS ( |
89 | 90 | SELECT |
90 | | - client, |
91 | | - page, |
92 | | - rank, |
93 | | - COUNT(domain) AS green_third_parties_per_page |
| 91 | + r.client, |
| 92 | + r.page, |
| 93 | + p.rank, |
| 94 | + COUNT(gtp.domain) AS green_third_parties_per_page |
94 | 95 | FROM |
95 | | - requests |
| 96 | + requests AS r |
96 | 97 | LEFT JOIN |
97 | | - green_tp |
| 98 | + green_tp AS gtp |
98 | 99 | ON |
99 | | - NET.HOST(requests.url) = NET.HOST(green_tp.domain) |
| 100 | + NET.HOST(r.url) = NET.HOST(gtp.domain) |
100 | 101 | INNER JOIN |
101 | | - pages |
102 | | - USING (client, page) |
| 102 | + pages AS p |
| 103 | + ON r.client = p.client AND r.page = p.page |
103 | 104 | GROUP BY |
104 | | - client, |
105 | | - page, |
106 | | - rank |
| 105 | + r.client, |
| 106 | + r.page, |
| 107 | + p.rank |
107 | 108 | ) |
108 | 109 |
|
109 | 110 | SELECT |
110 | | - client, |
| 111 | + b.client, |
111 | 112 | rank_grouping, |
112 | 113 | CASE |
113 | 114 | WHEN rank_grouping = 0 THEN '' |
114 | 115 | WHEN rank_grouping = 100000000 THEN 'all' |
115 | 116 | ELSE FORMAT("%'d", rank_grouping) |
116 | 117 | END AS ranking, |
117 | 118 | APPROX_QUANTILES( |
118 | | - third_parties_per_page, 1000 |
| 119 | + b.third_parties_per_page, 1000 |
119 | 120 | ) [OFFSET(500)] AS p50_third_parties_per_page, |
120 | 121 | APPROX_QUANTILES( |
121 | | - green_third_parties_per_page, 1000 |
| 122 | + bg.green_third_parties_per_page, 1000 |
122 | 123 | ) [OFFSET(500)] AS p50_green_third_parties_per_page, |
123 | 124 | APPROX_QUANTILES( |
124 | | - SAFE_DIVIDE(green_third_parties_per_page, third_parties_per_page), 1000 |
| 125 | + SAFE_DIVIDE( |
| 126 | + bg.green_third_parties_per_page, |
| 127 | + b.third_parties_per_page |
| 128 | + ), 1000 |
125 | 129 | ) [OFFSET(500)] AS pct_green |
126 | 130 | FROM |
127 | | - base, |
| 131 | + base AS b, |
128 | 132 | UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping |
129 | 133 | INNER JOIN |
130 | | - base_green |
| 134 | + base_green AS bg |
131 | 135 | ON |
132 | | - base.client = base_green.client AND |
133 | | - base.page = base_green.page AND |
134 | | - base.rank = base_green.rank |
| 136 | + b.client = bg.client AND |
| 137 | + b.page = bg.page AND |
| 138 | + b.rank = bg.rank |
135 | 139 | WHERE |
136 | | - rank <= rank_grouping |
| 140 | + b.rank <= rank_grouping |
137 | 141 | GROUP BY |
138 | | - client, |
| 142 | + b.client, |
139 | 143 | rank_grouping |
140 | 144 | ORDER BY |
141 | | - client, |
| 145 | + b.client, |
142 | 146 | rank_grouping |
0 commit comments