Skip to content

Commit c5a6a4e

Browse files
committed
fix linter issues
1 parent e0dd613 commit c5a6a4e

27 files changed

Lines changed: 2888 additions & 1540 deletions

sql/2025/sustainability/cache_header_usage.sql

Lines changed: 81 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -2,49 +2,94 @@
22
# The distribution of cache header adoption on websites by client.
33

44
SELECT
5-
client,
6-
COUNT(0) AS total_requests,
7-
8-
COUNTIF(uses_cache_control) AS total_using_cache_control,
9-
COUNTIF(uses_max_age) AS total_using_max_age,
10-
COUNTIF(uses_expires) AS total_using_expires,
11-
COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires,
12-
COUNTIF(uses_cache_control AND uses_expires) AS total_using_both_cc_and_expires,
13-
COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither_cc_and_expires,
14-
COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control,
15-
COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires,
16-
17-
COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control,
18-
COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age,
19-
COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires,
20-
COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires,
21-
COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both_cc_and_expires,
22-
COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither_cc_nor_expires,
23-
COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control,
24-
COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires
5+
client,
6+
COUNT(*) AS total_requests,
7+
8+
COUNTIF(uses_cache_control) AS total_using_cache_control,
9+
COUNTIF(uses_max_age) AS total_using_max_age,
10+
COUNTIF(uses_expires) AS total_using_expires,
11+
COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires,
12+
COUNTIF(
13+
uses_cache_control AND uses_expires
14+
) AS total_using_both_cc_and_expires,
15+
COUNTIF(
16+
NOT uses_cache_control AND NOT uses_expires
17+
) AS total_using_neither_cc_and_expires,
18+
COUNTIF(
19+
uses_cache_control AND NOT uses_expires
20+
) AS total_using_only_cache_control,
21+
COUNTIF(
22+
NOT uses_cache_control AND uses_expires
23+
) AS total_using_only_expires,
24+
25+
COUNTIF(uses_cache_control) / COUNT(*) AS pct_cache_control,
26+
COUNTIF(uses_max_age) / COUNT(*) AS pct_using_max_age,
27+
COUNTIF(uses_expires) / COUNT(*) AS pct_using_expires,
28+
COUNTIF(
29+
uses_max_age AND uses_expires
30+
) / COUNT(*) AS pct_using_max_age_and_expires,
31+
COUNTIF(
32+
uses_cache_control AND uses_expires
33+
) / COUNT(*) AS pct_using_both_cc_and_expires,
34+
COUNTIF(
35+
NOT uses_cache_control AND NOT uses_expires
36+
) / COUNT(*) AS pct_using_neither_cc_nor_expires,
37+
COUNTIF(
38+
uses_cache_control AND NOT uses_expires
39+
) / COUNT(*) AS pct_using_only_cache_control,
40+
COUNTIF(
41+
NOT uses_cache_control AND uses_expires
42+
) / COUNT(*) AS pct_using_only_expires
2543

2644
FROM (
27-
SELECT
28-
client,
45+
SELECT
46+
client,
2947

30-
JSON_EXTRACT_SCALAR(summary, '$.resp_expires') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_expires')) != '' AS uses_expires,
31-
JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control')) != '' AS uses_cache_control,
32-
REGEXP_CONTAINS(JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), r'(?i)max-age\s*=\s*[0-9]+') AS uses_max_age,
48+
JSON_EXTRACT_SCALAR(
49+
summary, '$.resp_expires'
50+
) IS NOT NULL AND TRIM(
51+
JSON_EXTRACT_SCALAR(summary, '$.resp_expires')
52+
) != '' AS uses_expires,
53+
JSON_EXTRACT_SCALAR(
54+
summary, '$.resp_cache_control'
55+
) IS NOT NULL AND TRIM(
56+
JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control')
57+
) != '' AS uses_cache_control,
58+
REGEXP_CONTAINS(
59+
JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'),
60+
r'(?i)max-age\s*=\s*[0-9]+'
61+
) AS uses_max_age,
3362

34-
JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NULL OR TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) = '' AS uses_no_etag,
35-
JSON_EXTRACT_SCALAR(summary, '$.resp_etag') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')) != '' AS uses_etag,
36-
JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') IS NOT NULL AND TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified')) != '' AS uses_last_modified,
63+
JSON_EXTRACT_SCALAR(
64+
summary, '$.resp_etag'
65+
) IS NULL OR TRIM(
66+
JSON_EXTRACT_SCALAR(summary, '$.resp_etag')
67+
) = '' AS uses_no_etag,
68+
JSON_EXTRACT_SCALAR(
69+
summary, '$.resp_etag'
70+
) IS NOT NULL AND TRIM(
71+
JSON_EXTRACT_SCALAR(summary, '$.resp_etag')
72+
) != '' AS uses_etag,
73+
JSON_EXTRACT_SCALAR(
74+
summary, '$.resp_last_modified'
75+
) IS NOT NULL AND TRIM(
76+
JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified')
77+
) != '' AS uses_last_modified,
3778

38-
REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"') AS uses_weak_etag,
39-
REGEXP_CONTAINS(TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"') AS uses_strong_etag
79+
REGEXP_CONTAINS(
80+
TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"'
81+
) AS uses_weak_etag,
82+
REGEXP_CONTAINS(
83+
TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"'
84+
) AS uses_strong_etag
4085

41-
FROM
42-
`httparchive.crawl.requests`
43-
WHERE
44-
date = '2025-06-01'
86+
FROM
87+
`httparchive.crawl.requests`
88+
WHERE
89+
date = '2025-06-01'
4590
)
4691

4792
GROUP BY
48-
client
93+
client
4994
ORDER BY
50-
client;
95+
client;

sql/2025/sustainability/cdn_adoption.sql

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,32 @@
22
# The distribution of CDN adoption on websites by client.
33

44
SELECT
5-
client,
6-
IF(cdn = '', 'No CDN', cdn) AS cdn,
7-
COUNT(0) AS freq,
8-
total,
9-
COUNT(0) / total AS pct
10-
FROM (
11-
SELECT
125
client,
13-
COUNT(0) AS total,
14-
ARRAY_CONCAT_AGG(SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ')) AS cdn_list
15-
FROM
16-
`httparchive.crawl.pages`
17-
WHERE
18-
date = '2025-06-01' AND
19-
is_root_page = TRUE
20-
GROUP BY
21-
client
6+
total,
7+
IF(cdn = '', 'No CDN', cdn) AS cdn,
8+
COUNT(*) AS freq,
9+
COUNT(*) / total AS pct
10+
FROM (
11+
SELECT
12+
client,
13+
COUNT(*) AS total,
14+
ARRAY_CONCAT_AGG(
15+
SPLIT(JSON_EXTRACT_SCALAR(summary, '$.cdn'), ', ')
16+
) AS cdn_list
17+
FROM
18+
`httparchive.crawl.pages`
19+
WHERE
20+
date = '2025-06-01' AND
21+
is_root_page = TRUE
22+
GROUP BY
23+
client
2224
),
23-
UNNEST(cdn_list) AS cdn
25+
UNNEST(cdn_list) AS cdn
2426
GROUP BY
25-
client,
26-
cdn,
27-
total
27+
client,
28+
cdn,
29+
total
2830
ORDER BY
29-
pct DESC,
30-
client,
31-
cdn;
31+
pct DESC,
32+
client ASC,
33+
cdn ASC;

0 commit comments

Comments
 (0)