Skip to content

Commit dcd4f25

Browse files
authored
Update lighthouse_score_by_government_with_urls.sql
more province / state data
1 parent e83f37f commit dcd4f25

1 file changed

Lines changed: 91 additions & 27 deletions

File tree

sql/2025/accessibility/lighthouse_score_by_government_with_urls.sql

Lines changed: 91 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,83 @@ ranked AS (
981981
FROM all_matches
982982
),
983983

984-
-- Canadian province code lookup (code → province name)
984+
-- UK: Look for dot-or-hyphen delimited province tokens inside *.uk hosts
985+
uk_nation_from_domain AS (
986+
SELECT
987+
p.page, p.host,
988+
CASE
989+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)[a-z0-9-]+\.gov\.scot$') THEN 'Scotland'
990+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)[a-z0-9-]+\.nhs\.scot$') THEN 'Scotland'
991+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)parliament\.scot$') THEN 'Scotland'
992+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)police\.scot$') THEN 'Scotland'
993+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)[a-z0-9-]+\.gov\.wales$') THEN 'Wales'
994+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)[a-z0-9-]+\.llyw\.cymru$') THEN 'Wales'
995+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)[a-z0-9-]+\.nhs\.wales$') THEN 'Wales'
996+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)senedd\.(wales|cymru)$') THEN 'Wales'
997+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)nidirect\.gov\.uk$') THEN 'Northern Ireland'
998+
ELSE NULL
999+
END AS uk_nation
1000+
FROM pages_scored p
1001+
),
1002+
1003+
-- Australia: Look for dot-or-hyphen delimited province tokens inside *.au hosts
1004+
au_state_map AS (
1005+
SELECT * FROM UNNEST([
1006+
STRUCT('nsw' AS code, 'New South Wales' AS state),
1007+
('vic','Victoria'), ('qld','Queensland'), ('sa','South Australia'),
1008+
('wa','Western Australia'), ('tas','Tasmania'),
1009+
('act','Australian Capital Territory'), ('nt','Northern Territory')
1010+
])
1011+
),
1012+
au_state_from_gov_au AS (
1013+
SELECT
1014+
p.page, p.host,
1015+
m.state AS au_state
1016+
FROM pages_scored p
1017+
JOIN au_state_map m
1018+
ON LOWER(REGEXP_EXTRACT(p.host, r'(?i)(?:^|\.)(nsw|vic|qld|sa|wa|tas|act|nt)\.gov\.au$')) = m.code
1019+
),
1020+
1021+
-- Brazil: Look for dot-or-hyphen delimited province tokens inside *.br hosts
1022+
br_state_map AS (
1023+
SELECT * FROM UNNEST([
1024+
STRUCT('ac' AS code, 'Acre' AS state),
1025+
('al','Alagoas'), ('ap','Amapá'), ('am','Amazonas'), ('ba','Bahia'),
1026+
('ce','Ceará'), ('df','Distrito Federal'), ('es','Espírito Santo'),
1027+
('go','Goiás'), ('ma','Maranhão'), ('mt','Mato Grosso'),
1028+
('ms','Mato Grosso do Sul'), ('mg','Minas Gerais'), ('pa','Pará'),
1029+
('pb','Paraíba'), ('pr','Paraná'), ('pe','Pernambuco'),
1030+
('pi','Piauí'), ('rj','Rio de Janeiro'), ('rn','Rio Grande do Norte'),
1031+
('rs','Rio Grande do Sul'), ('ro','Rondônia'), ('rr','Roraima'),
1032+
('sc','Santa Catarina'), ('sp','São Paulo'), ('se','Sergipe'), ('to','Tocantins')
1033+
])
1034+
),
1035+
br_state_from_gov_br AS (
1036+
SELECT
1037+
p.page, p.host,
1038+
m.state AS br_state
1039+
FROM pages_scored p
1040+
JOIN br_state_map m
1041+
ON LOWER(REGEXP_EXTRACT(
1042+
p.host,
1043+
'(?i)(?:^|\\.)(ac|al|ap|am|ba|ce|df|es|go|ma|mt|ms|mg|pa|pb|pr|pe|pi|rj|rn|rs|ro|rr|sc|sp|se|to)\\.gov\\.br$'
1044+
)) = m.code
1045+
),
1046+
1047+
-- Spain: Look for dot-or-hyphen delimited province tokens inside *.es hosts
1048+
es_region_from_known AS (
1049+
SELECT p.page, p.host,
1050+
CASE
1051+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)gencat\.cat$') THEN 'Catalonia'
1052+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)euskadi\.eus$') THEN 'Basque Country'
1053+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)xunta\.gal$') THEN 'Galicia'
1054+
WHEN REGEXP_CONTAINS(p.host, r'(?i)(^|\.)comunidad\.madrid$') THEN 'Community of Madrid'
1055+
ELSE NULL
1056+
END AS es_region
1057+
FROM pages_scored p
1058+
),
1059+
1060+
-- Canadian: province code lookup (code → province name)
9851061
ca_prov_map AS (
9861062
SELECT * FROM UNNEST([
9871063
STRUCT('ab' AS code, 'Alberta' AS province),
@@ -992,8 +1068,6 @@ ca_prov_map AS (
9921068
('yt','Yukon'), ('yk','Yukon') -- accept either token
9931069
])
9941070
),
995-
996-
-- Look for dot-or-hyphen delimited province tokens inside *.gc.ca hosts
9971071
ca_prov_from_gc AS (
9981072
SELECT
9991073
p.page, p.host,
@@ -1117,7 +1191,14 @@ domain_scores AS (
11171191
-- Final SELECT
11181192
SELECT DISTINCT
11191193
ds.bucket AS country,
1120-
COALESCE(usc.us_state, cpc.ca_province) AS subnational, -- single column
1194+
COALESCE(
1195+
uk.uk_nation,
1196+
cpc.ca_province,
1197+
usc.us_state,
1198+
au.au_state,
1199+
br.br_state,
1200+
es.es_region
1201+
) AS subnational,
11211202
ds.gov_domain,
11221203
ds.page,
11231204
ds.is_root_page,
@@ -1126,27 +1207,10 @@ SELECT DISTINCT
11261207
ds.best_practices_score,
11271208
ds.seo_score
11281209
FROM domain_scores ds
1129-
LEFT JOIN us_state_classified usc
1130-
ON usc.page = ds.page AND usc.host = ds.host
1131-
LEFT JOIN ca_province_classified cpc
1132-
ON cpc.page = ds.page AND cpc.host = ds.host
1210+
LEFT JOIN uk_nation_from_domain uk ON uk.page = ds.page AND uk.host = ds.host
1211+
LEFT JOIN ca_province_classified cpc ON cpc.page = ds.page AND cpc.host = ds.host
1212+
LEFT JOIN us_state_classified usc ON usc.page = ds.page AND usc.host = ds.host
1213+
LEFT JOIN au_state_from_gov_au au ON au.page = ds.page AND au.host = ds.host
1214+
LEFT JOIN br_state_from_gov_br br ON br.page = ds.page AND br.host = ds.host
1215+
LEFT JOIN es_region_from_known es ON es.page = ds.page AND es.host = ds.host
11331216
ORDER BY country, subnational, gov_domain, page;
1134-
1135-
/*
1136-
SELECT
1137-
ds.bucket AS country,
1138-
usc.us_state,
1139-
ds.gov_domain,
1140-
ds.page,
1141-
ds.is_root_page,
1142-
ds.performance_score,
1143-
ds.accessibility_score,
1144-
ds.best_practices_score,
1145-
ds.seo_score
1146-
FROM domain_scores ds
1147-
LEFT JOIN us_state_classified usc
1148-
ON usc.page = ds.page AND usc.host = ds.host
1149-
-- optional
1150-
-- WHERE ds.bucket = 'United States (USA)'
1151-
ORDER BY country, us_state, gov_domain, page;
1152-
*/

0 commit comments

Comments
 (0)