Skip to content

Commit 95c46e1

Browse files
committed
Merge branch 'main' into capabilities-2025-chapter
2 parents c716422 + 993f77a commit 95c46e1

File tree

194 files changed

+6426
-812
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

194 files changed

+6426
-812
lines changed

.github/linters/.python-lint

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,6 @@ persistent=yes
3131
# Specify a configuration file.
3232
#rcfile=
3333

34-
# When enabled, pylint would attempt to guess common misconfiguration and emit
35-
# user-friendly hints instead of false-positive error messages
36-
suggestion-mode=yes
37-
3834
# Allow loading of arbitrary C extensions. Extensions are imported into the
3935
# active Python interpreter and may run arbitrary code.
4036
unsafe-load-any-extension=no
@@ -470,5 +466,6 @@ valid-metaclass-classmethod-first-arg=mcs
470466
[EXCEPTIONS]
471467

472468
# Exceptions that will emit a warning when being caught. Defaults to
473-
# "Exception"
474-
overgeneral-exceptions=Exception
469+
# ["builtins.BaseException", "builtins.Exception"]
470+
overgeneral-exceptions = builtins.BaseException,
471+
builtins.Exception

SECURITY.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Security Policy
2+
3+
## Reporting a Vulnerability
4+
5+
Please report any suspected security issues to team@httparchive.org. We currently to not participate in a bug bounty programme.

sql/2024/privacy/number_of_websites_with_related_origin_trials.sql

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,28 @@
11
# Pages that participate in the privacy-relayed origin trials
2-
CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS (
3-
SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70))
4-
);
5-
6-
CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING)
7-
RETURNS STRUCT<
2+
CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT<
83
token STRING,
94
origin STRING,
105
feature STRING,
116
expiry TIMESTAMP,
127
is_subdomain BOOL,
138
is_third_party BOOL
14-
> AS (
15-
STRUCT(
16-
DECODE_ORIGIN_TRIAL(token) AS token,
17-
JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin,
18-
JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature,
19-
TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry,
20-
JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain,
21-
JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party
9+
>
10+
DETERMINISTIC AS (
11+
(
12+
WITH decoded_token AS (
13+
SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded
14+
)
15+
16+
SELECT
17+
STRUCT(
18+
decoded AS token,
19+
JSON_VALUE(decoded, '$.origin') AS origin,
20+
JSON_VALUE(decoded, '$.feature') AS feature,
21+
TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry,
22+
JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain,
23+
JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party
24+
)
25+
FROM decoded_token
2226
)
2327
);
2428

sql/2025/cdn/distribution_of_compression_types_by_cdn.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ FROM (
1515
CASE
1616
WHEN a.value = 'gzip' THEN 'Gzip'
1717
WHEN a.value = 'br' THEN 'Brotli'
18+
WHEN a.value = 'zstd' THEN 'Zstandard'
1819
WHEN a.value = '' THEN 'no text compression'
1920
ELSE 'other'
2021
END AS compression_type

sql/2025/cdn/distribution_of_compression_types_cdn_vs_origin.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ FROM (
1515
CASE
1616
WHEN a.value = 'gzip' THEN 'Gzip'
1717
WHEN a.value = 'br' THEN 'Brotli'
18+
WHEN a.value = 'zstd' THEN 'Zstandard'
1819
WHEN a.value = '' THEN 'no text compression'
1920
ELSE 'other'
2021
END AS compression_type
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#standardSQL
2+
# CrUX Core Web Vitals performance of Ecommerce vendors by device (fid was upated to inp, and is non optinal now)
3+
CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
4+
good / (good + needs_improvement + poor) >= 0.75
5+
);
6+
7+
CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
8+
good + needs_improvement + poor > 0
9+
);
10+
11+
12+
SELECT
13+
date,
14+
client,
15+
ecomm,
16+
COUNT(DISTINCT origin) AS origins,
17+
# Origins with good LCP divided by origins with any LCP.
18+
SAFE_DIVIDE(
19+
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
20+
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
21+
) AS pct_good_lcp,
22+
23+
# Origins with good INP divided by origins with any inp.
24+
SAFE_DIVIDE(
25+
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
26+
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
27+
) AS pct_good_inp,
28+
29+
# Origins with good CLS divided by origins with any CLS.
30+
SAFE_DIVIDE(
31+
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
32+
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
33+
) AS pct_good_cls,
34+
35+
# Origins with good LCP, inp, and CLS divided by origins with any LCP, inp, and CLS.
36+
SAFE_DIVIDE(
37+
COUNT(DISTINCT IF(
38+
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
39+
IS_GOOD(fast_inp, avg_inp, slow_inp) AND
40+
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL
41+
)),
42+
COUNT(DISTINCT IF(
43+
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
44+
IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AND
45+
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL
46+
))
47+
) AS pct_good_cwv
48+
FROM
49+
`chrome-ux-report.materialized.device_summary`
50+
JOIN (
51+
SELECT DISTINCT
52+
date,
53+
client,
54+
root_page,
55+
tech.technology AS ecomm
56+
FROM
57+
`httparchive.crawl.pages`,
58+
UNNEST(technologies) AS tech,
59+
UNNEST(tech.categories) AS category
60+
WHERE
61+
date = '2025-07-01' AND
62+
category = 'Ecommerce' AND
63+
(
64+
tech.technology != 'Cart Functionality' AND
65+
tech.technology != 'Google Analytics Enhanced eCommerce'
66+
)
67+
)
68+
ON
69+
CONCAT(origin, '/') = root_page AND
70+
IF(device = 'desktop', 'desktop', 'mobile') = client AND
71+
date
72+
WHERE
73+
date = '2025-07-01'
74+
GROUP BY
75+
client,
76+
ecomm
77+
ORDER BY
78+
origins DESC
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#standardSQL
2+
# CrUX Core Web Vitals performance of Ecommerce vendors by device (fid was upated to inp, and is non optinal now)
3+
CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
4+
good / (good + needs_improvement + poor) >= 0.75
5+
);
6+
7+
CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
8+
good + needs_improvement + poor > 0
9+
);
10+
11+
12+
SELECT
13+
client,
14+
ecomm,
15+
COUNT(DISTINCT origin) AS origins,
16+
# Origins with good LCP divided by origins with any LCP.
17+
SAFE_DIVIDE(
18+
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
19+
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
20+
) AS pct_good_lcp,
21+
22+
# Origins with good INP divided by origins with any inp.
23+
SAFE_DIVIDE(
24+
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
25+
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
26+
) AS pct_good_inp,
27+
28+
# Origins with good CLS divided by origins with any CLS.
29+
SAFE_DIVIDE(
30+
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
31+
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
32+
) AS pct_good_cls,
33+
34+
# Origins with good LCP, inp, and CLS divided by origins with any LCP, inp, and CLS.
35+
SAFE_DIVIDE(
36+
COUNT(DISTINCT IF(
37+
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
38+
IS_GOOD(fast_inp, avg_inp, slow_inp) AND
39+
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL
40+
)),
41+
COUNT(DISTINCT IF(
42+
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
43+
IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AND
44+
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL
45+
))
46+
) AS pct_good_cwv
47+
FROM
48+
`chrome-ux-report.materialized.device_summary`
49+
JOIN (
50+
SELECT DISTINCT
51+
client,
52+
root_page,
53+
tech.technology AS ecomm
54+
FROM
55+
`httparchive.crawl.pages`,
56+
UNNEST(technologies) AS tech,
57+
UNNEST(tech.categories) AS category
58+
WHERE
59+
date = '2025-07-01' AND
60+
category = 'Ecommerce' AND
61+
(
62+
tech.technology != 'Cart Functionality' AND
63+
tech.technology != 'Google Analytics Enhanced eCommerce'
64+
)
65+
)
66+
ON
67+
CONCAT(origin, '/') = root_page AND
68+
IF(device = 'desktop', 'desktop', 'mobile') = client
69+
WHERE
70+
date = '2025-07-01'
71+
GROUP BY
72+
client,
73+
ecomm
74+
ORDER BY
75+
origins DESC
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#standardSQL
2+
# CrUX Core Web Vitals performance of Ecommerce vendors by device (fid was upated to inp, and is non optinal now)
3+
CREATE TEMP FUNCTION IS_GOOD(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
4+
good / (good + needs_improvement + poor) >= 0.75
5+
);
6+
7+
CREATE TEMP FUNCTION IS_NON_ZERO(good FLOAT64, needs_improvement FLOAT64, poor FLOAT64) RETURNS BOOL AS (
8+
good + needs_improvement + poor > 0
9+
);
10+
11+
12+
SELECT
13+
client,
14+
ecomm,
15+
COUNT(DISTINCT origin) AS origins,
16+
# Origins with good LCP divided by origins with any LCP.
17+
SAFE_DIVIDE(
18+
COUNT(DISTINCT IF(IS_GOOD(fast_lcp, avg_lcp, slow_lcp), origin, NULL)),
19+
COUNT(DISTINCT IF(IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp), origin, NULL))
20+
) AS pct_good_lcp,
21+
22+
# Origins with good INP divided by origins with any inp.
23+
SAFE_DIVIDE(
24+
COUNT(DISTINCT IF(IS_GOOD(fast_inp, avg_inp, slow_inp), origin, NULL)),
25+
COUNT(DISTINCT IF(IS_NON_ZERO(fast_inp, avg_inp, slow_inp), origin, NULL))
26+
) AS pct_good_inp,
27+
28+
# Origins with good CLS divided by origins with any CLS.
29+
SAFE_DIVIDE(
30+
COUNT(DISTINCT IF(IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL)),
31+
COUNT(DISTINCT IF(IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL))
32+
) AS pct_good_cls,
33+
34+
# Origins with good LCP, inp, and CLS divided by origins with any LCP, inp, and CLS.
35+
SAFE_DIVIDE(
36+
COUNT(DISTINCT IF(
37+
IS_GOOD(fast_lcp, avg_lcp, slow_lcp) AND
38+
IS_GOOD(fast_inp, avg_inp, slow_inp) AND
39+
IS_GOOD(small_cls, medium_cls, large_cls), origin, NULL
40+
)),
41+
COUNT(DISTINCT IF(
42+
IS_NON_ZERO(fast_lcp, avg_lcp, slow_lcp) AND
43+
IS_NON_ZERO(fast_inp, avg_inp, slow_inp) AND
44+
IS_NON_ZERO(small_cls, medium_cls, large_cls), origin, NULL
45+
))
46+
) AS pct_good_cwv
47+
FROM
48+
`chrome-ux-report.materialized.device_summary`
49+
JOIN (
50+
SELECT DISTINCT
51+
client,
52+
root_page,
53+
app AS ecomm
54+
FROM
55+
`httparchive.crawl.pages`,
56+
UNNEST(technologies) AS tech,
57+
UNNEST(categories) AS category
58+
WHERE
59+
date = '2025-07-01' AND
60+
category = 'Ecommerce' AND
61+
(
62+
technology != 'Cart Functionality' AND
63+
technology != 'Google Analytics Enhanced eCommerce'
64+
)
65+
)
66+
ON
67+
CONCAT(origin, '/') = root_page AND
68+
IF(device = 'desktop', 'desktop', 'mobile') = client
69+
WHERE
70+
date = '2025-07-01'
71+
GROUP BY
72+
client,
73+
ecomm
74+
ORDER BY
75+
origins DESC

sql/2025/ecommerce/counts.sql

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
SELECT
2+
client,
3+
date,
4+
EXTRACT(YEAR FROM date) AS year,
5+
rank_grouping,
6+
CASE
7+
WHEN rank_grouping = 100000000 THEN 'all'
8+
ELSE FORMAT("%'d", rank_grouping)
9+
END AS ranking,
10+
COUNT(DISTINCT root_page) AS ecommerce_sites,
11+
total,
12+
COUNT(DISTINCT root_page) / total AS pct_ecommerce
13+
FROM
14+
`httparchive.crawl.pages`,
15+
UNNEST(technologies) AS technologies,
16+
UNNEST(technologies.categories) AS cats,
17+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
18+
JOIN (
19+
SELECT
20+
date,
21+
client,
22+
rank_grouping,
23+
COUNT(DISTINCT root_page) AS total
24+
FROM
25+
`httparchive.crawl.pages`,
26+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
27+
WHERE
28+
date IN ('2025-07-01', '2024-06-01', '2023-07-01', '2022-06-01', '2025-07-01') AND
29+
rank <= rank_grouping
30+
GROUP BY
31+
date,
32+
client,
33+
rank_grouping
34+
)
35+
USING (date, client, rank_grouping)
36+
WHERE
37+
date IN ('2025-07-01', '2024-06-01', '2023-07-01', '2022-06-01', '2025-07-01') AND
38+
rank <= rank_grouping AND
39+
cats = 'Ecommerce' AND
40+
technologies.technology NOT IN ('Cart Functionality', 'Google Analytics Enhanced eCommerce')
41+
GROUP BY
42+
date,
43+
client,
44+
rank_grouping,
45+
total
46+
ORDER BY
47+
date DESC,
48+
client,
49+
rank_grouping

0 commit comments

Comments
 (0)