|
9 | 9 | COUNTIF(uses_max_age) AS total_using_max_age, |
10 | 10 | COUNTIF(uses_expires) AS total_using_expires, |
11 | 11 | COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, |
12 | | - COUNTIF( |
13 | | - uses_cache_control AND uses_expires |
14 | | - ) AS total_using_both_cc_and_expires, |
15 | | - COUNTIF( |
16 | | - NOT uses_cache_control AND NOT uses_expires |
17 | | - ) AS total_using_neither_cc_and_expires, |
18 | | - COUNTIF( |
19 | | - uses_cache_control AND NOT uses_expires |
20 | | - ) AS total_using_only_cache_control, |
21 | | - COUNTIF( |
22 | | - NOT uses_cache_control AND uses_expires |
23 | | - ) AS total_using_only_expires, |
| 12 | + COUNTIF(uses_cache_control AND uses_expires) AS total_using_both_cc_and_expires, |
| 13 | + COUNTIF(NOT uses_cache_control AND NOT uses_expires) AS total_using_neither_cc_and_expires, |
| 14 | + COUNTIF(uses_cache_control AND NOT uses_expires) AS total_using_only_cache_control, |
| 15 | + COUNTIF(NOT uses_cache_control AND uses_expires) AS total_using_only_expires, |
24 | 16 |
|
25 | 17 | COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control, |
26 | 18 | COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, |
27 | 19 | COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, |
28 | | - COUNTIF( |
29 | | - uses_max_age AND uses_expires |
30 | | - ) / COUNT(0) AS pct_using_max_age_and_expires, |
31 | | - COUNTIF( |
32 | | - uses_cache_control AND uses_expires |
33 | | - ) / COUNT(0) AS pct_using_both_cc_and_expires, |
34 | | - COUNTIF( |
35 | | - NOT uses_cache_control AND NOT uses_expires |
36 | | - ) / COUNT(0) AS pct_using_neither_cc_nor_expires, |
37 | | - COUNTIF( |
38 | | - uses_cache_control AND NOT uses_expires |
39 | | - ) / COUNT(0) AS pct_using_only_cache_control, |
40 | | - COUNTIF( |
41 | | - NOT uses_cache_control AND uses_expires |
42 | | - ) / COUNT(0) AS pct_using_only_expires |
| 20 | + COUNTIF(uses_max_age AND uses_expires) / COUNT(0) AS pct_using_max_age_and_expires, |
| 21 | + COUNTIF(uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_both_cc_and_expires, |
| 22 | + COUNTIF(NOT uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_neither_cc_nor_expires, |
| 23 | + COUNTIF(uses_cache_control AND NOT uses_expires) / COUNT(0) AS pct_using_only_cache_control, |
| 24 | + COUNTIF(NOT uses_cache_control AND uses_expires) / COUNT(0) AS pct_using_only_expires |
43 | 25 |
|
44 | 26 | FROM ( |
45 | 27 | SELECT |
46 | 28 | client, |
| 29 | + url, |
| 30 | + LOGICAL_OR(header.name = 'expires' AND header.value IS NOT NULL AND TRIM(header.value) != '') AS uses_expires, |
| 31 | + LOGICAL_OR(header.name = 'cache-control' AND header.value IS NOT NULL AND TRIM(header.value) != '') AS uses_cache_control, |
| 32 | + LOGICAL_OR(header.name = 'cache-control' AND REGEXP_CONTAINS(header.value, r'(?i)max-age\s*=\s*[0-9]+')) AS uses_max_age, |
47 | 33 |
|
48 | | - JSON_EXTRACT_SCALAR( |
49 | | - summary, '$.resp_expires' |
50 | | - ) IS NOT NULL AND TRIM( |
51 | | - JSON_EXTRACT_SCALAR(summary, '$.resp_expires') |
52 | | - ) != '' AS uses_expires, |
53 | | - JSON_EXTRACT_SCALAR( |
54 | | - summary, '$.resp_cache_control' |
55 | | - ) IS NOT NULL AND TRIM( |
56 | | - JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control') |
57 | | - ) != '' AS uses_cache_control, |
58 | | - REGEXP_CONTAINS( |
59 | | - JSON_EXTRACT_SCALAR(summary, '$.resp_cache_control'), |
60 | | - r'(?i)max-age\s*=\s*[0-9]+' |
61 | | - ) AS uses_max_age, |
| 34 | + LOGICAL_OR(header.name = 'etag' AND (header.value IS NULL OR TRIM(header.value) = '')) AS uses_no_etag, |
| 35 | + LOGICAL_OR(header.name = 'etag' AND header.value IS NOT NULL AND TRIM(header.value) != '') AS uses_etag, |
| 36 | + LOGICAL_OR(header.name = 'last-modified' AND header.value IS NOT NULL AND TRIM(header.value) != '') AS uses_last_modified, |
62 | 37 |
|
63 | | - JSON_EXTRACT_SCALAR( |
64 | | - summary, '$.resp_etag' |
65 | | - ) IS NULL OR TRIM( |
66 | | - JSON_EXTRACT_SCALAR(summary, '$.resp_etag') |
67 | | - ) = '' AS uses_no_etag, |
68 | | - JSON_EXTRACT_SCALAR( |
69 | | - summary, '$.resp_etag' |
70 | | - ) IS NOT NULL AND TRIM( |
71 | | - JSON_EXTRACT_SCALAR(summary, '$.resp_etag') |
72 | | - ) != '' AS uses_etag, |
73 | | - JSON_EXTRACT_SCALAR( |
74 | | - summary, '$.resp_last_modified' |
75 | | - ) IS NOT NULL AND TRIM( |
76 | | - JSON_EXTRACT_SCALAR(summary, '$.resp_last_modified') |
77 | | - ) != '' AS uses_last_modified, |
78 | | - |
79 | | - REGEXP_CONTAINS( |
80 | | - TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^W/".*"' |
81 | | - ) AS uses_weak_etag, |
82 | | - REGEXP_CONTAINS( |
83 | | - TRIM(JSON_EXTRACT_SCALAR(summary, '$.resp_etag')), '^".*"' |
84 | | - ) AS uses_strong_etag |
| 38 | + LOGICAL_OR(header.name = 'etag' AND REGEXP_CONTAINS(TRIM(header.value), '^W/".*"')) AS uses_weak_etag, |
| 39 | + LOGICAL_OR(header.name = 'etag' AND REGEXP_CONTAINS(TRIM(header.value), '^".*"')) AS uses_strong_etag |
85 | 40 |
|
86 | 41 | FROM |
87 | | - `httparchive.crawl.requests` |
| 42 | + `httparchive.crawl.requests`, |
| 43 | + UNNEST(response_headers) AS header |
88 | 44 | WHERE |
89 | | - date = '2025-06-01' |
| 45 | + date = '2025-07-01' |
| 46 | + GROUP BY |
| 47 | + client, |
| 48 | + url |
90 | 49 | ) |
91 | 50 |
|
92 | 51 | GROUP BY |
|
0 commit comments