Skip to content

Commit e83f37f

Browse files
authored
Update lighthouse_score_by_government_with_urls.sql - adding provinces
1 parent 4e9971e commit e83f37f

1 file changed

Lines changed: 65 additions & 1 deletion

File tree

sql/2025/accessibility/lighthouse_score_by_government_with_urls.sql

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -923,9 +923,17 @@ pages AS (
923923

924924
-- Filter out rows without any score to shrink downstream work
925925
pages_scored AS (
926-
SELECT *
926+
SELECT
927+
page,
928+
LOWER(NET.HOST(page)) AS host,
929+
ANY_VALUE(is_root_page) AS is_root_page,
930+
MAX(perf) AS perf,
931+
MAX(a11y) AS a11y,
932+
MAX(bp) AS bp,
933+
MAX(seo) AS seo
927934
FROM pages
928935
WHERE perf IS NOT NULL OR a11y IS NOT NULL OR bp IS NOT NULL OR seo IS NOT NULL
936+
GROUP BY page, host
929937
),
930938

931939
-- 4) Exact ENDS_WITH matches against curated suffix list
@@ -973,6 +981,42 @@ ranked AS (
973981
FROM all_matches
974982
),
975983

984+
-- Canadian province code lookup (code → province name)
985+
ca_prov_map AS (
986+
SELECT * FROM UNNEST([
987+
STRUCT('ab' AS code, 'Alberta' AS province),
988+
('bc','British Columbia'), ('mb','Manitoba'), ('nb','New Brunswick'),
989+
('nl','Newfoundland and Labrador'), ('ns','Nova Scotia'),
990+
('nt','Northwest Territories'), ('nu','Nunavut'), ('on','Ontario'),
991+
('pe','Prince Edward Island'), ('qc','Quebec'), ('sk','Saskatchewan'),
992+
('yt','Yukon'), ('yk','Yukon') -- accept either token
993+
])
994+
),
995+
996+
-- Look for dot-or-hyphen delimited province tokens inside *.gc.ca hosts
997+
ca_prov_from_gc AS (
998+
SELECT
999+
p.page, p.host,
1000+
LOWER(
1001+
REGEXP_EXTRACT(
1002+
p.host,
1003+
r'(?i)(?:^|[.-])(ab|bc|mb|nb|nl|ns|nt|nu|on|pe|qc|sk|yt|yk)(?:[.-])'
1004+
)
1005+
) AS prov_code
1006+
FROM pages_scored p
1007+
WHERE ENDS_WITH(p.host, '.gc.ca')
1008+
),
1009+
1010+
-- Canadian domain overrides for branded/legacy provincial portals
1011+
ca_province_classified AS (
1012+
SELECT
1013+
g.page, g.host,
1014+
m.province AS ca_province
1015+
FROM ca_prov_from_gc g
1016+
LEFT JOIN ca_prov_map m
1017+
ON g.prov_code = m.code
1018+
),
1019+
9761020
-- US state code lookup (code → state name)
9771021
us_code_map AS (
9781022
SELECT * FROM UNNEST([
@@ -1070,6 +1114,25 @@ domain_scores AS (
10701114
FROM final_best
10711115
)
10721116

1117+
-- Final SELECT
1118+
SELECT DISTINCT
1119+
ds.bucket AS country,
1120+
COALESCE(usc.us_state, cpc.ca_province) AS subnational, -- single column
1121+
ds.gov_domain,
1122+
ds.page,
1123+
ds.is_root_page,
1124+
ds.performance_score,
1125+
ds.accessibility_score,
1126+
ds.best_practices_score,
1127+
ds.seo_score
1128+
FROM domain_scores ds
1129+
LEFT JOIN us_state_classified usc
1130+
ON usc.page = ds.page AND usc.host = ds.host
1131+
LEFT JOIN ca_province_classified cpc
1132+
ON cpc.page = ds.page AND cpc.host = ds.host
1133+
ORDER BY country, subnational, gov_domain, page;
1134+
1135+
/*
10731136
SELECT
10741137
ds.bucket AS country,
10751138
usc.us_state,
@@ -1086,3 +1149,4 @@ LEFT JOIN us_state_classified usc
10861149
-- optional
10871150
-- WHERE ds.bucket = 'United States (USA)'
10881151
ORDER BY country, us_state, gov_domain, page;
1152+
*/

0 commit comments

Comments
 (0)