|
| 1 | +#standardSQL |
| 2 | +# The distribution of cache header adoption on websites by client. |
| 3 | + |
| 4 | +SELECT |
| 5 | + client, |
| 6 | + COUNT(0) AS total_requests, |
| 7 | + |
| 8 | + COUNTIF(uses_cache_control) AS total_using_cache_control, |
| 9 | + COUNTIF(uses_max_age) AS total_using_max_age, |
| 10 | + COUNTIF(uses_expires) AS total_using_expires, |
| 11 | + COUNTIF(uses_max_age AND uses_expires) AS total_using_max_age_and_expires, |
| 12 | + COUNTIF( |
| 13 | + uses_cache_control AND uses_expires |
| 14 | + ) AS total_using_both_cc_and_expires, |
| 15 | + COUNTIF( |
| 16 | + NOT uses_cache_control AND NOT uses_expires |
| 17 | + ) AS total_using_neither_cc_and_expires, |
| 18 | + COUNTIF( |
| 19 | + uses_cache_control AND NOT uses_expires |
| 20 | + ) AS total_using_only_cache_control, |
| 21 | + COUNTIF( |
| 22 | + NOT uses_cache_control AND uses_expires |
| 23 | + ) AS total_using_only_expires, |
| 24 | + |
| 25 | + COUNTIF(uses_cache_control) / COUNT(0) AS pct_cache_control, |
| 26 | + COUNTIF(uses_max_age) / COUNT(0) AS pct_using_max_age, |
| 27 | + COUNTIF(uses_expires) / COUNT(0) AS pct_using_expires, |
| 28 | + COUNTIF( |
| 29 | + uses_max_age AND uses_expires |
| 30 | + ) / COUNT(0) AS pct_using_max_age_and_expires, |
| 31 | + COUNTIF( |
| 32 | + uses_cache_control AND uses_expires |
| 33 | + ) / COUNT(0) AS pct_using_both_cc_and_expires, |
| 34 | + COUNTIF( |
| 35 | + NOT uses_cache_control AND NOT uses_expires |
| 36 | + ) / COUNT(0) AS pct_using_neither_cc_nor_expires, |
| 37 | + COUNTIF( |
| 38 | + uses_cache_control AND NOT uses_expires |
| 39 | + ) / COUNT(0) AS pct_using_only_cache_control, |
| 40 | + COUNTIF( |
| 41 | + NOT uses_cache_control AND uses_expires |
| 42 | + ) / COUNT(0) AS pct_using_only_expires |
| 43 | + |
| 44 | +FROM ( |
| 45 | + SELECT |
| 46 | + client, |
| 47 | + url, |
| 48 | + LOGICAL_OR( |
| 49 | + header.name = 'expires' AND header.value IS NOT NULL AND TRIM( |
| 50 | + header.value |
| 51 | + ) != '' |
| 52 | + ) AS uses_expires, |
| 53 | + LOGICAL_OR( |
| 54 | + header.name = 'cache-control' AND |
| 55 | + header.value IS NOT NULL AND |
| 56 | + TRIM(header.value) != '' |
| 57 | + ) AS uses_cache_control, |
| 58 | + LOGICAL_OR( |
| 59 | + header.name = 'cache-control' AND REGEXP_CONTAINS( |
| 60 | + header.value, r'(?i)max-age\s*=\s*[0-9]+' |
| 61 | + ) |
| 62 | + ) AS uses_max_age, |
| 63 | + |
| 64 | + LOGICAL_OR( |
| 65 | + header.name = 'etag' AND ( |
| 66 | + header.value IS NULL OR TRIM(header.value) = '' |
| 67 | + ) |
| 68 | + ) AS uses_no_etag, |
| 69 | + LOGICAL_OR( |
| 70 | + header.name = 'etag' AND header.value IS NOT NULL AND TRIM( |
| 71 | + header.value |
| 72 | + ) != '' |
| 73 | + ) AS uses_etag, |
| 74 | + LOGICAL_OR( |
| 75 | + header.name = 'last-modified' AND |
| 76 | + header.value IS NOT NULL AND |
| 77 | + TRIM(header.value) != '' |
| 78 | + ) AS uses_last_modified, |
| 79 | + |
| 80 | + LOGICAL_OR( |
| 81 | + header.name = 'etag' AND REGEXP_CONTAINS( |
| 82 | + TRIM(header.value), '^W/".*"' |
| 83 | + ) |
| 84 | + ) AS uses_weak_etag, |
| 85 | + LOGICAL_OR( |
| 86 | + header.name = 'etag' AND REGEXP_CONTAINS( |
| 87 | + TRIM(header.value), '^".*"' |
| 88 | + ) |
| 89 | + ) AS uses_strong_etag |
| 90 | + |
| 91 | + FROM |
| 92 | + `httparchive.crawl.requests`, |
| 93 | + UNNEST(response_headers) AS header |
| 94 | + WHERE |
| 95 | + date = '2025-07-01' |
| 96 | + GROUP BY |
| 97 | + client, |
| 98 | + url |
| 99 | +) |
| 100 | + |
| 101 | +GROUP BY |
| 102 | + client |
| 103 | +ORDER BY |
| 104 | + client; |
0 commit comments