Skip to content

Commit 188d17c

Browse files
committed
Fix queries for 2025
1 parent 89e880c commit 188d17c

7 files changed

Lines changed: 95 additions & 102 deletions

sql/2025/ecommerce/core_web_vitals_by_platform.sql

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,23 @@ FROM
4848
`chrome-ux-report.materialized.device_summary`
4949
JOIN (
5050
SELECT
51-
_TABLE_SUFFIX AS client,
52-
url,
53-
app AS ecomm
51+
client,
52+
page,
53+
tech.technology AS ecomm
5454
FROM
55-
`httparchive.technologies.2025_06_01_*`
55+
`httparchive.crawl.pages`,
56+
UNNEST(technologies) AS tech,
57+
UNNEST(tech.categories) AS category
5658
WHERE
59+
date = '2025-07-01' AND
5760
category = 'Ecommerce' AND
5861
(
59-
app != 'Cart Functionality' AND
60-
app != 'Google Analytics Enhanced eCommerce'
62+
tech.technology != 'Cart Functionality' AND
63+
tech.technology != 'Google Analytics Enhanced eCommerce'
6164
)
6265
)
6366
ON
64-
CONCAT(origin, '/') = url AND
67+
CONCAT(origin, '/') = page AND
6568
IF(device = 'desktop', 'desktop', 'mobile') = client
6669
WHERE
6770
date = '2025-07-01'

sql/2025/ecommerce/median_lighthouse_score_ecommsites.sql

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,23 @@ WITH technologies AS (
77
rank,
88
lighthouse,
99
COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites
10-
FROM `httparchive.all.pages`,
10+
FROM `httparchive.crawl.pages`,
1111
UNNEST(technologies) AS tech,
1212
UNNEST(categories) AS category
1313
WHERE
1414
date = '2025-07-01' AND
15-
is_root_page = TRUE
15+
is_root_page
1616
)
1717

1818
SELECT
1919
client,
2020
rank,
2121
technology,
2222
ARRAY_AGG(DISTINCT category) AS categories,
23-
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.performance.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_performance,
24-
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.accessibility.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_accessibility,
25-
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.seo.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_seo,
26-
APPROX_QUANTILES(CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.best-practices.score') AS NUMERIC), 1000)[OFFSET(500)] AS median_best_practices,
23+
APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse.categories.performance.score) AS NUMERIC), 1000)[OFFSET(500)] AS median_performance,
24+
APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse.categories.accessibility.score) AS NUMERIC), 1000)[OFFSET(500)] AS median_accessibility,
25+
APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse.categories.seo.score) AS NUMERIC), 1000)[OFFSET(500)] AS median_seo,
26+
APPROX_QUANTILES(CAST(JSON_VALUE(lighthouse.categories.`best-practices`.score) AS NUMERIC), 1000)[OFFSET(500)] AS median_best_practices,
2727
ANY_VALUE(total_websites) AS total_websites,
2828
COUNT(DISTINCT page) AS number_of_websites,
2929
COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS percent_of_websites

sql/2025/ecommerce/top_ecommerce.sql

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ SELECT
99
total,
1010
COUNT(DISTINCT page) / total AS pct
1111
FROM
12-
`httparchive.all.pages`,
12+
`httparchive.crawl.pages`,
1313
UNNEST(technologies) AS technologies,
1414
UNNEST(technologies.categories) AS cats
1515
JOIN (
1616
SELECT
1717
client,
1818
COUNT(0) AS total
1919
FROM
20-
`httparchive.all.pages`
20+
`httparchive.crawl.pages`
2121
WHERE
2222
date = '2025-07-01' AND
2323
is_root_page
@@ -43,15 +43,15 @@ SELECT
4343
total,
4444
COUNT(DISTINCT page) / total AS pct
4545
FROM
46-
`httparchive.all.pages`,
46+
`httparchive.crawl.pages`,
4747
UNNEST(technologies) AS technologies,
4848
UNNEST(technologies.categories) AS cats
4949
JOIN (
5050
SELECT
5151
client,
5252
COUNT(0) AS total
5353
FROM
54-
`httparchive.all.pages`
54+
`httparchive.crawl.pages`
5555
WHERE
5656
date = '2024-06-01' AND
5757
is_root_page
@@ -111,15 +111,15 @@ SELECT
111111
total,
112112
COUNT(DISTINCT page) / total AS pct
113113
FROM
114-
`httparchive.all.pages`,
114+
`httparchive.crawl.pages`,
115115
UNNEST(technologies) AS technologies,
116116
UNNEST(technologies.categories) AS cats
117117
JOIN (
118118
SELECT
119119
client,
120120
COUNT(0) AS total
121121
FROM
122-
`httparchive.all.pages`
122+
`httparchive.crawl.pages`
123123
WHERE
124124
date = '2022-08-01' AND -- noqa: CV09
125125
is_root_page
@@ -145,15 +145,15 @@ SELECT
145145
total,
146146
COUNT(DISTINCT page) / total AS pct
147147
FROM
148-
`httparchive.all.pages`,
148+
`httparchive.crawl.pages`,
149149
UNNEST(technologies) AS technologies,
150150
UNNEST(technologies.categories) AS cats
151151
JOIN (
152152
SELECT
153153
client,
154154
COUNT(0) AS total
155155
FROM
156-
`httparchive.all.pages`
156+
`httparchive.crawl.pages`
157157
WHERE
158158
date = '2021-07-01' AND
159159
is_root_page

sql/2025/ecommerce/top_payment_provider_by_geo.sql

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@ WITH geo_summary AS (
55
SELECT
66
`chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS geo,
77
IF(device = 'desktop', 'desktop', 'mobile') AS client,
8-
origin,
8+
CONCAT(origin, '/') AS root_page,
99
COUNT(DISTINCT origin) OVER (PARTITION BY country_code, IF(device = 'desktop', 'desktop', 'mobile')) AS total
1010
FROM
1111
`chrome-ux-report.materialized.country_summary`
1212
WHERE
13-
yyyymm = 202506
13+
yyyymm = 202507
1414
UNION ALL
1515
SELECT
1616
'ALL' AS geo,
1717
IF(device = 'desktop', 'desktop', 'mobile') AS client,
18-
origin,
18+
CONCAT(origin, '/') AS root_page,
1919
COUNT(DISTINCT origin) OVER (PARTITION BY IF(device = 'desktop', 'desktop', 'mobile')) AS total
2020
FROM
2121
`chrome-ux-report.materialized.device_summary`
2222
WHERE
23-
yyyymm = 202506
23+
yyyymm = 202507
2424
)
2525

2626
SELECT
@@ -30,40 +30,34 @@ FROM (
3030
client,
3131
geo,
3232
payment,
33-
COUNT(0) AS pages,
33+
COUNT(DISTINCT root_page) AS sites,
3434
ANY_VALUE(total) AS total,
35-
COUNT(DISTINCT url) / ANY_VALUE(total) AS pct
36-
FROM (
37-
SELECT DISTINCT
38-
geo,
39-
client,
40-
CONCAT(origin, '/') AS url,
41-
total
42-
FROM
43-
geo_summary
44-
) JOIN (
35+
COUNT(DISTINCT root_page) / ANY_VALUE(total) AS pct
36+
FROM
37+
geo_summary
38+
JOIN (
4539
SELECT DISTINCT
4640
client,
4741
cats,
4842
technologies.technology AS payment,
49-
page AS url
43+
page,
44+
root_page
5045
FROM
51-
`httparchive.all.pages`,
46+
`httparchive.crawl.pages`,
5247
UNNEST(technologies) AS technologies,
5348
UNNEST(technologies.categories) AS cats
5449
WHERE
5550
technologies.technology IS NOT NULL AND
5651
cats = 'Payment processors' AND
5752
technologies.technology != '' AND
58-
date = '2025-07-01' AND
59-
is_root_page
60-
) USING (client, url)
53+
date = '2025-07-01'
54+
) USING (client, root_page)
6155
GROUP BY
6256
client,
6357
geo,
6458
payment
6559
)
6660
WHERE
67-
pages > 1000
61+
sites > 1000
6862
ORDER BY
69-
pages DESC
63+
sites DESC

sql/2025/ecommerce/top_payment_providers.sql

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ SELECT
99
total,
1010
COUNT(DISTINCT page) / total AS pct
1111
FROM
12-
`httparchive.all.pages`,
12+
`httparchive.crawl.pages`,
1313
UNNEST(technologies) AS technologies,
1414
UNNEST(technologies.categories) AS cats
1515
JOIN (
1616
SELECT
1717
client,
1818
COUNT(0) AS total
1919
FROM
20-
`httparchive.all.pages`
20+
`httparchive.crawl.pages`
2121
WHERE
2222
date = '2025-07-01' AND
2323
is_root_page
@@ -42,15 +42,15 @@ SELECT
4242
total,
4343
COUNT(DISTINCT page) / total AS pct
4444
FROM
45-
`httparchive.all.pages`,
45+
`httparchive.crawl.pages`,
4646
UNNEST(technologies) AS technologies,
4747
UNNEST(technologies.categories) AS cats
4848
JOIN (
4949
SELECT
5050
client,
5151
COUNT(0) AS total
5252
FROM
53-
`httparchive.all.pages`
53+
`httparchive.crawl.pages`
5454
WHERE
5555
date = '2024-06-01' AND
5656
is_root_page
@@ -75,15 +75,15 @@ SELECT
7575
total,
7676
COUNT(DISTINCT page) / total AS pct
7777
FROM
78-
`httparchive.all.pages`,
78+
`httparchive.crawl.pages`,
7979
UNNEST(technologies) AS technologies,
8080
UNNEST(technologies.categories) AS cats
8181
JOIN (
8282
SELECT
8383
client,
8484
COUNT(0) AS total
8585
FROM
86-
`httparchive.all.pages`
86+
`httparchive.crawl.pages`
8787
WHERE
8888
date = '2023-06-01' AND
8989
is_root_page
@@ -108,25 +108,25 @@ SELECT
108108
total,
109109
COUNT(DISTINCT page) / total AS pct
110110
FROM
111-
`httparchive.all.pages`,
111+
`httparchive.crawl.pages`,
112112
UNNEST(technologies) AS technologies,
113113
UNNEST(technologies.categories) AS cats
114114
JOIN (
115115
SELECT
116116
client,
117117
COUNT(0) AS total
118118
FROM
119-
`httparchive.all.pages`
119+
`httparchive.crawl.pages`
120120
WHERE
121-
date = '2022-08-01' AND -- noqa: CV09
121+
date = '2022-06-01' AND
122122
is_root_page
123123
GROUP BY
124124
client
125125
)
126126
USING (client)
127127
WHERE
128128
cats = 'Payment processors' AND
129-
date = '2022-08-01' AND -- noqa: CV09
129+
date = '2022-06-01' AND
130130
is_root_page
131131
GROUP BY
132132
client,
@@ -141,15 +141,15 @@ SELECT
141141
total,
142142
COUNT(DISTINCT page) / total AS pct
143143
FROM
144-
`httparchive.all.pages`,
144+
`httparchive.crawl.pages`,
145145
UNNEST(technologies) AS technologies,
146146
UNNEST(technologies.categories) AS cats
147147
JOIN (
148148
SELECT
149149
client,
150150
COUNT(0) AS total
151151
FROM
152-
`httparchive.all.pages`
152+
`httparchive.crawl.pages`
153153
WHERE
154154
date = '2021-07-01' AND
155155
is_root_page

sql/2025/ecommerce/top_shopsystem_by_geo.sql

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,22 @@ WITH geo_summary AS (
55
SELECT
66
`chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS geo,
77
IF(device = 'desktop', 'desktop', 'mobile') AS client,
8-
origin,
8+
CONCAT(origin, '/') AS root_page,
99
COUNT(DISTINCT origin) OVER (PARTITION BY country_code, IF(device = 'desktop', 'desktop', 'mobile')) AS total
1010
FROM
1111
`chrome-ux-report.materialized.country_summary`
1212
WHERE
13-
yyyymm = 202506
13+
yyyymm = 202507
1414
UNION ALL
1515
SELECT
1616
'ALL' AS geo,
1717
IF(device = 'desktop', 'desktop', 'mobile') AS client,
18-
origin,
18+
CONCAT(origin, '/') AS root_page,
1919
COUNT(DISTINCT origin) OVER (PARTITION BY IF(device = 'desktop', 'desktop', 'mobile')) AS total
2020
FROM
2121
`chrome-ux-report.materialized.device_summary`
2222
WHERE
23-
yyyymm = 202506
23+
yyyymm = 202507
2424
)
2525

2626
SELECT
@@ -30,25 +30,20 @@ FROM (
3030
client,
3131
geo,
3232
app,
33-
COUNT(0) AS pages,
33+
COUNT(DISTINCT root_page) AS sites,
3434
ANY_VALUE(total) AS total,
35-
COUNT(DISTINCT url) / ANY_VALUE(total) AS pct
36-
FROM (
37-
SELECT DISTINCT
38-
geo,
39-
client,
40-
CONCAT(origin, '/') AS url,
41-
total
42-
FROM
43-
geo_summary
44-
) JOIN (
35+
COUNT(DISTINCT root_page) / ANY_VALUE(total) AS pct
36+
FROM
37+
geo_summary
38+
JOIN (
4539
SELECT DISTINCT
4640
client,
4741
cats,
4842
technologies.technology AS app,
49-
page AS url
43+
page,
44+
root_page
5045
FROM
51-
`httparchive.all.pages`,
46+
`httparchive.crawl.pages`,
5247
UNNEST(technologies) AS technologies,
5348
UNNEST(technologies.categories) AS cats
5449
WHERE
@@ -57,15 +52,14 @@ FROM (
5752
technologies.technology != 'Cart Functionality' AND
5853
technologies.technology != 'Google Analytics Enhanced eCommerce' AND
5954
technologies.technology != '' AND
60-
date = '2025-07-01' AND
61-
is_root_page
62-
) USING (client, url)
55+
date = '2025-07-01'
56+
) USING (client, root_page)
6357
GROUP BY
6458
client,
6559
geo,
6660
app
6761
)
6862
WHERE
69-
pages > 1000
63+
sites > 1000
7064
ORDER BY
71-
pages DESC
65+
sites DESC

0 commit comments

Comments
 (0)