@@ -923,9 +923,17 @@ pages AS (
923923
924924-- Filter out rows without any score to shrink downstream work
925925pages_scored AS (
926- SELECT *
926+ SELECT
927+ page,
928+ LOWER (NET .HOST (page)) AS host,
929+ ANY_VALUE(is_root_page) AS is_root_page,
930+ MAX (perf) AS perf,
931+ MAX (a11y) AS a11y,
932+ MAX (bp) AS bp,
933+ MAX (seo) AS seo
927934 FROM pages
928935 WHERE perf IS NOT NULL OR a11y IS NOT NULL OR bp IS NOT NULL OR seo IS NOT NULL
936+ GROUP BY page, host
929937),
930938
931939-- 4) Exact ENDS_WITH matches against curated suffix list
@@ -973,6 +981,42 @@ ranked AS (
973981 FROM all_matches
974982),
975983
984+ -- Canadian province code lookup (code → province name)
985+ ca_prov_map AS (
986+ SELECT * FROM UNNEST([
987+ STRUCT(' ab' AS code, ' Alberta' AS province),
988+ (' bc' ,' British Columbia' ), (' mb' ,' Manitoba' ), (' nb' ,' New Brunswick' ),
989+ (' nl' ,' Newfoundland and Labrador' ), (' ns' ,' Nova Scotia' ),
990+ (' nt' ,' Northwest Territories' ), (' nu' ,' Nunavut' ), (' on' ,' Ontario' ),
991+ (' pe' ,' Prince Edward Island' ), (' qc' ,' Quebec' ), (' sk' ,' Saskatchewan' ),
992+ (' yt' ,' Yukon' ), (' yk' ,' Yukon' ) -- accept either token
993+ ])
994+ ),
995+
996+ -- Look for dot-or-hyphen delimited province tokens inside *.gc.ca hosts
997+ ca_prov_from_gc AS (
998+ SELECT
999+ p .page , p .host ,
1000+ LOWER (
1001+ REGEXP_EXTRACT(
1002+ p .host ,
1003+ r' (?i)(?:^|[.-])(ab|bc|mb|nb|nl|ns|nt|nu|on|pe|qc|sk|yt|yk)(?:[.-])'
1004+ )
1005+ ) AS prov_code
1006+ FROM pages_scored p
1007+ WHERE ENDS_WITH(p .host , ' .gc.ca' )
1008+ ),
1009+
1010+ -- Canadian domain overrides for branded/legacy provincial portals
1011+ ca_province_classified AS (
1012+ SELECT
1013+ g .page , g .host ,
1014+ m .province AS ca_province
1015+ FROM ca_prov_from_gc g
1016+ LEFT JOIN ca_prov_map m
1017+ ON g .prov_code = m .code
1018+ ),
1019+
9761020-- US state code lookup (code → state name)
9771021us_code_map AS (
9781022 SELECT * FROM UNNEST([
@@ -1070,6 +1114,25 @@ domain_scores AS (
10701114 FROM final_best
10711115)
10721116
1117+ -- Final SELECT
1118+ SELECT DISTINCT
1119+ ds .bucket AS country,
1120+ COALESCE(usc .us_state , cpc .ca_province ) AS subnational, -- single column
1121+ ds .gov_domain ,
1122+ ds .page ,
1123+ ds .is_root_page ,
1124+ ds .performance_score ,
1125+ ds .accessibility_score ,
1126+ ds .best_practices_score ,
1127+ ds .seo_score
1128+ FROM domain_scores ds
1129+ LEFT JOIN us_state_classified usc
1130+ ON usc .page = ds .page AND usc .host = ds .host
1131+ LEFT JOIN ca_province_classified cpc
1132+ ON cpc .page = ds .page AND cpc .host = ds .host
1133+ ORDER BY country, subnational, gov_domain, page;
1134+
1135+ /*
10731136SELECT
10741137 ds.bucket AS country,
10751138 usc.us_state,
@@ -1086,3 +1149,4 @@ LEFT JOIN us_state_classified usc
10861149-- optional
10871150-- WHERE ds.bucket = 'United States (USA)'
10881151ORDER BY country, us_state, gov_domain, page;
1152+ */
0 commit comments