@@ -21,9 +21,9 @@ WITH base AS (
2121 SAFE_CAST(JSON_VALUE(custom_metrics .robots_txt , ' $.status' ) AS INT64) AS status,
2222 JSON_QUERY(custom_metrics .robots_txt , ' $.record_counts.by_useragent' ) AS byua
2323 FROM ` httparchive.crawl.pages`
24- WHERE date IN (' 2019-07-01' ,' 2020-08-01' ,' 2021-07-01' ,' 2022-06-01' ,' 2024-06-01' ,' 2025-07-01' )
25- AND client = ' mobile'
26- AND is_root_page
24+ WHERE date IN (' 2019-07-01' , ' 2020-08-01' , ' 2021-07-01' , ' 2022-06-01' , ' 2024-06-01' , ' 2025-07-01' ) AND
25+ client = ' mobile' AND
26+ is_root_page
2727),
2828
2929-- Extract UA keys present on each site
@@ -32,7 +32,7 @@ ua_keys AS (
3232 b .date , b .client , b .rank , b .root_page ,
3333 LOWER (agent) AS agent
3434 FROM base AS b,
35- UNNEST(REGEXP_EXTRACT_ALL(TO_JSON_STRING(b .byua ), r' "([^"]+)":\{ ' )) AS agent
35+ UNNEST(REGEXP_EXTRACT_ALL(TO_JSON_STRING(b .byua ), r' "([^"]+)":\{ ' )) AS agent
3636),
3737
3838-- Look up that agent’s counts on that site
@@ -49,11 +49,11 @@ ua_presence AS (
4949ua_scored AS (
5050 SELECT
5151 date , client, rank, root_page, agent, status,
52- COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.allow' ) AS INT64), 0 ) +
53- COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.disallow' ) AS INT64), 0 ) +
52+ COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.allow' ) AS INT64), 0 ) +
53+ COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.disallow' ) AS INT64), 0 ) +
5454 COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.crawl_delay' ) AS INT64), 0 ) +
55- COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.noindex' ) AS INT64), 0 ) +
56- COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.other' ) AS INT64), 0 ) AS rules_sum
55+ COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.noindex' ) AS INT64), 0 ) +
56+ COALESCE(SAFE_CAST(JSON_VALUE(agent_obj, ' $.other' ) AS INT64), 0 ) AS rules_sum
5757 FROM ua_presence
5858),
5959
@@ -63,6 +63,7 @@ totals_all AS (
6363 FROM base
6464 GROUP BY date , client, rank
6565),
66+
6667totals_200 AS (
6768 SELECT date , client, rank, COUNT (DISTINCT root_page) AS total_sites_200
6869 FROM base
@@ -89,10 +90,10 @@ SELECT
8990 t2 .total_sites_200 ,
9091 n .sites_with_agent ,
9192 n .sites_with_agent_among_200 ,
92- SAFE_DIVIDE(n .sites_with_agent , t .total_sites ) AS pct_of_all_sites,
93+ SAFE_DIVIDE(n .sites_with_agent , t .total_sites ) AS pct_of_all_sites,
9394 SAFE_DIVIDE(n .sites_with_agent_among_200 , t2 .total_sites_200 ) AS pct_of_sites_with_200
9495FROM numerators n
95- JOIN totals_all t USING (date , client, rank)
96+ JOIN totals_all t USING (date , client, rank)
9697JOIN totals_200 t2 USING (date , client, rank)
9798WHERE n .sites_with_agent >= 100
9899ORDER BY n .date , n .rank , pct_of_all_sites DESC ;
0 commit comments