Skip to content

Commit 4deb219

Browse files
mgiffordtunetheweb
andauthored
Accessibility queries 2025 (#4211)
* Create a11y_frontend_technology.sql Adding updated sql. * Create a11y_overall_tech_usage_by_domain_rank.sql * Create a11y_technology_usage.sql upgrading crawl * Update a11y_technology_usage.sql removing sampling * Create a11y_technology_usage_by_domain_rank.sql * Update a11y_technology_usage_by_domain_rank.sql Updating to scan whole domain * Update a11y_technology_usage_by_domain_rank.sql - update * Create alt_ending_in_image_extension.sql * Update alt_ending_in_image_extension.sql - update * Update alt_ending_in_image_extension.sql - FULL RUN * Create anchors_with_role_button.sql * Update anchors_with_role_button.sql - update to avoid blanks 0 and null values were returned incorrectly. * pulling apart root and non-root values * dividing up the is_root_page and not * Create audio_track_usage.sql - update * Create button_name_sources.sql - update * Update button_name_sources.sql - update for better % * Create captcha_usage.sql - update * Update captcha_usage.sql fixing percentages * Create color_contrast.sql * Create common_alt_text_length.sql * Update common_alt_text_length.sql updating to get better data * Update common_alt_text_length.sql noting error with false element * Create common_aria_role.sql Probably broken * Create common_element_attributes.sql * Update common_element_attributes.sql - updating for consistency this wasn't in the same format as the other 2024 query * Update common_aria_role.sql Update to resolve broken results * Update common_alt_text_length.sql correcting for formatting and the object / array issue. * Create focus_outline_0.sql * Update focus_outline_0.sql update * bringing over basic SQL with new date and with crawl.pages * Update focus_visible.sql - update * Update form_input_name_sources.sql * Update form_required_controls.sql - update * Update landmark_elements_and_roles.sql - update * Update lighthouse_a11y_audits.sql - update * Update lighthouse_a11y_audits_by_cms.sql - Update * Update lighthouse_a11y_audits_by_cms.sql removing cap on responses from cms * Update lighthouse_a11y_audits_by_cms.sql - update there were more null values than were needed * Update lighthouse_a11y_score.sql - update * Update lighthouse_a11y_score.sql - missed 2024 reference * Update lighthouse_score_by_cms.sql - update * Update lighthouse_score_by_country.sql - update * Update lighthouse_score_by_frontend.sql - update * Update lighthouse_score_by_tld.sql - update * Update lighthouse_score_by_tld.sql - excluding port * Update media_query_features.sql - update * Update media_query_features.sql - simplifying output * Update page_title.sql - update * Update pages_with_search_input.sql - update * Update pages_with_search_input.sql - header for docs * Update placeholder_but_no_label.sql - update * Update sites_using_role.sql - update * Update skip_links.sql - update * Update sr_only_classes.sql - update * Update table_stats.sql - updated * Update units_properties.sql - update * Update units_properties.sql - update for missing column * Update valid_html_lang.sql - updated * Update video_track_usage.sql - update * Update viewport_zoom_scale.sql - updated * Update viewport_zoom_scale_by_domain_rank.sql updated * Update tabindex_usage_and_values.sql - updated * Update page_title.sql - updated * Update lighthouse_a11y_score.sql catching non-root pages * Update lighthouse_a11y_score.sql percentages * Update anchors_with_role_button.sql - percentage * Update a11y_frontend_technology.sql - updating for percentages * Update lighthouse_score_by_cms.sql - percentages * Update lighthouse_score_by_government.sql - initial start * Update lighthouse_score_by_government.sql updates from github * Update lighthouse_score_by_government.sql - more updates * Update lighthouse_score_by_government.sql - reorder regex * Update lighthouse_score_by_government.sql - leveraging tld * Update lighthouse_score_by_government.sql - reorganized * Update lighthouse_score_by_government.sql - fixing redundancy * Update lighthouse_score_by_government.sql - including states * Update lighthouse_score_by_government.sql - level setting * Update lighthouse_score_by_government_with_urls.sql - updating urls * Update lighthouse_score_by_government_with_urls.sql - adding provinces * Update lighthouse_score_by_government_with_urls.sql more province / state data * Update lighthouse_score_by_government_with_urls.sql avoiding *canada.ca and allowing for *.canada.ca * Update lighthouse_score_by_government_with_urls.sql avoid gapcanada.ca * Update lighthouse_score_by_government_with_urls.sql triming the code and reducing replication * Update lighthouse_score_by_government_with_urls.sql Update with more countries and city/state codes * Update lighthouse_score_by_government.sql Making it clearer to see * Update lighthouse_score_by_government_with_urls.sql Aligning with parallel version * Update lighthouse_score_by_government_with_urls.sql Updates thanks to Ralf Koller! * Update lighthouse_score_by_government_with_urls.sql Updating Germany * Update lighthouse_score_by_government_with_urls.sql updates for .lu thanks to Alain * Update lighthouse_score_by_government_with_urls.sql Updating sweden * Update lighthouse_score_by_government_with_urls.sql Updating Netherlands data * Update lighthouse_score_by_government_with_urls.sql Missing comma * Update lighthouse_score_by_government_with_urls.sql Updating France and Canada * Update lighthouse_score_by_government_with_urls.sql Updating UK domains to better catch sub-queries * Update lighthouse_score_by_government_with_urls.sql Improving the consistentcy of the regex expressions * Update lighthouse_score_by_government_with_urls.sql Fixing characters like Turkey. * Update lighthouse_score_by_government_with_urls.sql adding more flexibility * Update lighthouse_score_by_government_with_urls.sql Adding limiter for the size of the query. * Update lighthouse_score_by_government_with_urls.sql More tweaking Getting rid of "-" before gov domains. * Update lighthouse_score_by_government_with_urls.sql Fixing issue with: Cannot parse regular expression: bad repetition operator: *+? * Update lighthouse_score_by_government_with_urls.sql Updating France's cities * Update lighthouse_score_by_government_with_urls.sql adding more regex. * Update lighthouse_score_by_government_with_urls.sql Updating for Sweden * Update lighthouse_score_by_government_with_urls.sql Updated EU domains * Update lighthouse_score_by_government_with_urls.sql extending canada * Update lighthouse_score_by_government_with_urls.sql Missing comma * Update lighthouse_score_by_government_with_urls.sql excluding zoom.us and others * Update lighthouse_score_by_government_with_urls.sql fixing sql * Update lighthouse_score_by_government_with_urls.sql Adding exclusion of .eu from generic fallback * Update a11y_overall_tech_usage_by_domain_rank.sql Update to get percentages correct. * Update a11y_technology_usage.sql Updating SQL for more consistent output * Update lighthouse_score_by_government_with_urls.sql Excluding some NL sites which weren't government. * Update a11y_technology_usage_by_domain_rank.sql Updating after discussion with @tunetheweb * Update a11y_technology_usage_by_domain_rank.sql Reverting to 2024's code * Update alt_ending_in_image_extension.sql align with 2024 Update to align to 2024, thanks @tunetheweb * Update lighthouse_score_by_government_with_urls.sql Luxembourg update * Update audio_track_usage.sql - resetting to 2024 structure Thanks @tunetheweb for the pointers here. * Update a11y_frontend_technology.sql Update to more closely align * Update a11y_overall_tech_usage_by_domain_rank.sql standardizing * Update a11y_technology_usage.sql Update to align with 2024 * Update a11y_technology_usage_by_domain_rank.sql adjusting comments * Update alt_ending_in_image_extension.sql Update to 2024 * Update lighthouse_score_by_government_with_urls.sql - Germany Updating values for Germany. * Update anchors_with_role_button.sql optimizing for 2024 data * Update audio_track_usage.sql documentation * Update button_name_sources.sql - 2024 UPdate to align to 2024 * Update captcha_usage.sql - 2024 better alinging with 2024 * Update color_contrast.sql 2024 standardization * Update common_alt_text_length.sql 2024 standardization * Update common_aria_role.sql - 2024 standardization * Update common_element_attributes.sql 2024 standardization * Update focus_outline_0.sql 2004 No JS UDF version * Update focus_visible.sql 2024 standardizing format * Update form_input_name_sources.sql - 2024 alignment Review * Update form_required_controls.sql 2024 standardization * Update form_input_name_sources.sql docs * Update landmark_elements_and_roles.sql 2024 update * Update landmark_elements_and_roles.sql element_pct issue * Update lighthouse_a11y_audits.sql - update * Update lighthouse_a11y_audits.sql 2024 update * Uploading changes thanks to Barry Pollard Thanks @tunetheweb * More files updated thanks to Barry Thanks for this @tunetheweb for updating these. * Update sites_using_role.sql fixing typo * Update video_track_usage.sql fixing typo * Update lighthouse_score_by_government.sql * Update lighthouse_score_by_government_with_urls.sql adding the cbvs.sr central bank * Update lighthouse_score_by_government.sql update I wanted to skip running this, but needed it for the report. * Linting * Linting --------- Co-authored-by: Barry Pollard <barrypollard@google.com>
1 parent dc87982 commit 4deb219

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+5709
-1
lines changed

sql/.sqlfluff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ exclude_rules = AL01,AL04,AL07,AL09,AM03,AM05,AM08,CP02,CP03,CV02,CV12,LT05,LT09
2828
# ST08 - Sometimes clearer to include brackets for complex COUNT(DISTINCT) cases
2929
# ST11 - Doesn't consider wildcards in SELECT. Issue: https://github.com/sqlfluff/sqlfluff/issues/6511
3030

31-
large_file_skip_byte_limit = 40000
31+
large_file_skip_byte_limit = 1120000
3232
# CPU processes to use while linting.
3333
# If positive, just implies number of processes.
3434
# If negative or zero, implies number_of_cpus - specifed_number.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
lighthouse_score_by_government.sql
2+
lighthouse_score_by_government_with_urls.sql
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# 2024 Accessibility queries
2+
3+
<!--
4+
This directory contains all of the 2024 Accessibility chapter queries.
5+
6+
Each query should have a corresponding `metric_name.sql` file.
7+
Note that readers are linked to this directory, so try to make the SQL file names descriptive for easy browsing.
8+
9+
Analysts: if helpful, you can use this README to give additional info about the queries.
10+
-->
11+
12+
## Resources
13+
14+
- [📄 Planning doc][~google-doc]
15+
- [📊 Results sheet][~google-sheets]
16+
- [📝 Markdown file][~chapter-markdown]
17+
18+
[~google-doc]: https://docs.google.com/document/d/1anCSQk9g_YDfZP6GtjqdC-vCfnCNZAUEQwjSr8AzqTw/edit
19+
[~google-sheets]: https://docs.google.com/spreadsheets/d/1btB1r9QpdgTyToPhn7glcGAdMFs7eq4UcQSVIHBqiYQ/edit#gid=1778117656
20+
[~chapter-markdown]: https://github.com/HTTPArchive/almanac.httparchive.org/tree/main/src/content/en/2024/accessibility.md
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#standardSQL
2+
-- Web Almanac — Lighthouse category scores by framework (2025-07-01)
3+
-- Google Sheet: a11y_frontend_technology
4+
--
5+
-- Purpose
6+
-- • Extract Lighthouse category scores (performance, accessibility,
7+
-- best-practices, SEO) from JSON in the crawl dataset.
8+
-- • Associate each crawled page with detected frontend frameworks or JS libraries.
9+
-- • Limit to root pages only for consistency.
10+
-- • De-duplicate multiple {page, framework} rows caused by UNNEST, by averaging
11+
-- scores per page before computing framework-level averages.
12+
--
13+
-- Method
14+
-- 1. Extract scores with JSON_EXTRACT_SCALAR, cast to FLOAT64.
15+
-- 2. Filter to categories: Web frameworks, JavaScript libraries,
16+
-- Frontend frameworks, JavaScript frameworks.
17+
-- 3. Aggregate in two steps:
18+
-- a. Per {client, page, framework}, average scores to remove duplicates.
19+
-- b. Global averages per {client, framework}.
20+
--
21+
-- Output columns
22+
-- client — "desktop" | "mobile"
23+
-- framework — detected framework or JS library
24+
-- avg_performance_score — average Lighthouse performance score (0–1)
25+
-- avg_accessibility_score — average Lighthouse accessibility score (0–1)
26+
-- avg_best_practices_score — average Lighthouse best-practices score (0–1)
27+
-- avg_seo_score — average Lighthouse SEO score (0–1)
28+
-- total_pages — distinct page count per {client, framework}
29+
--
30+
-- Notes
31+
-- • Scores remain in 0–1 float scale (not percentages).
32+
-- • `is_root_page = TRUE` ensures only root URLs are included.
33+
-- • Optional: enable TABLESAMPLE for faster smoke testing.
34+
WITH score_data AS (
35+
SELECT
36+
client,
37+
page,
38+
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.performance.score') AS FLOAT64) AS performance_score,
39+
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.accessibility.score') AS FLOAT64) AS accessibility_score,
40+
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.best-practices.score') AS FLOAT64) AS best_practices_score,
41+
CAST(JSON_EXTRACT_SCALAR(lighthouse, '$.categories.seo.score') AS FLOAT64) AS seo_score,
42+
t.technology AS framework
43+
FROM
44+
`httparchive.crawl.pages`,
45+
-- TABLESAMPLE SYSTEM (0.1 PERCENT) -- ← optional: cheap smoke test
46+
UNNEST(technologies) AS t
47+
WHERE
48+
date = '2025-07-01' AND
49+
lighthouse IS NOT NULL AND
50+
-- lighthouse != '{}' AND
51+
is_root_page = TRUE AND
52+
('Web frameworks' IN UNNEST(t.categories) OR 'JavaScript libraries' IN UNNEST(t.categories) OR 'Frontend frameworks' IN UNNEST(t.categories) OR 'JavaScript frameworks' IN UNNEST(t.categories)) AND
53+
t.technology IS NOT NULL
54+
)
55+
56+
SELECT
57+
client,
58+
framework,
59+
AVG(performance_score) AS avg_performance_score,
60+
AVG(accessibility_score) AS avg_accessibility_score,
61+
AVG(best_practices_score) AS avg_best_practices_score,
62+
AVG(seo_score) AS avg_seo_score,
63+
COUNT(DISTINCT page) AS total_pages
64+
FROM (
65+
SELECT
66+
client,
67+
page,
68+
framework,
69+
AVG(performance_score) AS performance_score, # All scores are the same for one page (we have multiple rows due to unnest), we could also take the first instead of the average
70+
AVG(accessibility_score) AS accessibility_score,
71+
AVG(best_practices_score) AS best_practices_score,
72+
AVG(seo_score) AS seo_score
73+
FROM
74+
score_data
75+
GROUP BY
76+
client,
77+
page,
78+
framework
79+
)
80+
GROUP BY
81+
client,
82+
framework
83+
ORDER BY
84+
total_pages DESC;
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#standardSQL
2+
-- Accessibility Technology (A11y) Usage by Domain Rank (2025-07-01)
3+
-- Google Sheet: a11y_overall_tech_usage_by_domain_rank
4+
--
5+
-- Purpose
6+
-- • Quantify adoption of accessibility-related technologies (e.g., overlays)
7+
-- across websites, segmented by domain rank tiers.
8+
-- • Provide both absolute counts of sites using A11y tech and percentages
9+
-- within each rank grouping.
10+
--
11+
-- Dataset
12+
-- • Source: `httparchive.crawl.pages`
13+
-- • Crawl date: 2025-07-01
14+
-- • Technologies: extracted from `technologies` and `categories` arrays.
15+
-- • Rank groupings: [1K, 10K, 100K, 1M, 10M, 100M].
16+
--
17+
-- Method
18+
-- 1. Subquery A:
19+
-- – Expand rank thresholds with UNNEST.
20+
-- – Select distinct {client, page, is_root_page, rank_grouping}
21+
-- where `category = 'Accessibility'`.
22+
-- 2. Subquery B:
23+
-- – Count all sites per {client, rank_grouping} as denominators
24+
-- (total sites in each rank band).
25+
-- 3. Join Subquery A with Subquery B on {client, rank_grouping}.
26+
-- 4. Aggregate results to compute distinct site counts and percentages.
27+
--
28+
-- Output columns
29+
-- client — "desktop" | "mobile"
30+
-- is_root_page — TRUE if page is a root URL
31+
-- rank_grouping — maximum rank threshold (e.g., 1000, 10000, …)
32+
-- total_in_rank — total number of sites within the rank group
33+
-- sites_with_a11y_tech — count of distinct sites using A11y technology
34+
-- pct_sites_with_a11y_tech — fraction of sites in rank group using A11y tech
35+
--
36+
-- Notes
37+
-- • Percentages are relative to the total sites in each rank grouping.
38+
-- • Multiple rank thresholds allow trend analysis across different scales
39+
-- of the web (top 1K → top 100M).
40+
-- • `is_root_page` is preserved to allow filtering on root vs non-root pages.
41+
SELECT
42+
client,
43+
is_root_page,
44+
rank_grouping, # Grouping of domains by their rank (e.g., top 1000, top 10000, etc.)
45+
total_in_rank, # Total number of sites within the rank grouping
46+
COUNT(DISTINCT page) AS sites_with_a11y_tech, # Number of unique sites that use accessibility technology
47+
COUNT(DISTINCT page) / total_in_rank AS pct_sites_with_a11y_tech # Percentage of sites using accessibility technology within the rank grouping
48+
FROM
49+
(
50+
# Subquery to filter and extract relevant pages with A11Y technology
51+
SELECT DISTINCT
52+
client,
53+
is_root_page,
54+
page,
55+
rank_grouping,
56+
category
57+
FROM
58+
`httparchive.crawl.pages`,
59+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping, # Expand rank_grouping to cover different rank categories
60+
UNNEST(technologies) AS tech,
61+
UNNEST(categories) AS category
62+
WHERE
63+
date = '2025-07-01' AND
64+
category = 'Accessibility' AND
65+
rank <= rank_grouping # Include only sites within the specified rank grouping
66+
)
67+
JOIN
68+
(
69+
# Subquery to count total sites in each rank grouping for each client
70+
SELECT
71+
client,
72+
rank_grouping,
73+
COUNT(0) AS total_in_rank
74+
FROM
75+
`httparchive.crawl.pages`,
76+
UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping
77+
WHERE
78+
date = '2025-07-01' AND
79+
rank <= rank_grouping
80+
GROUP BY
81+
client,
82+
rank_grouping
83+
)
84+
USING (client, rank_grouping)
85+
GROUP BY
86+
client,
87+
is_root_page,
88+
rank_grouping,
89+
total_in_rank
90+
ORDER BY
91+
client,
92+
is_root_page,
93+
rank_grouping
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#standardSQL
2+
-- Accessibility Technology (A11y) Usage by Client (2025-07-01)
3+
-- Google Sheets: a11y_technology_usage
4+
--
5+
-- Purpose
6+
-- • Measure the adoption of accessibility-related technologies (e.g., overlays)
7+
-- across websites, segmented by client type (desktop vs mobile).
8+
-- • Provide absolute counts of sites with A11y tech and their percentage share
9+
-- relative to all sites.
10+
--
11+
-- Dataset
12+
-- • Source: `httparchive.crawl.pages`
13+
-- • Crawl date: 2025-07-01
14+
-- • Technologies: extracted via `UNNEST(technologies)` and `UNNEST(categories)`.
15+
--
16+
-- Method
17+
-- 1. Count distinct sites (pages) per {client, is_root_page}.
18+
-- 2. Count distinct sites where `category = 'Accessibility'`.
19+
-- 3. Compute percentage as (# sites with A11y tech / total sites).
20+
--
21+
-- Output columns
22+
-- client — "desktop" | "mobile"
23+
-- is_root_page — TRUE if page is a root URL
24+
-- total_sites — number of distinct sites per client
25+
-- sites_with_a11y_tech — number of distinct sites with Accessibility technology
26+
-- pct_sites_with_a11y_tech — fraction of sites using A11y tech (0–1 float)
27+
--
28+
-- Notes
29+
-- • `DISTINCT page` prevents double-counting when a site has multiple technologies.
30+
-- • Percentages are per client (desktop/mobile) and root-page grouping.
31+
-- • Useful for high-level comparison of A11y tech adoption across clients.
32+
SELECT
33+
client, # Client domain
34+
is_root_page,
35+
COUNT(DISTINCT page) AS total_sites, # Total number of unique sites for the client
36+
COUNT(DISTINCT IF(category = 'Accessibility', page, NULL)) AS sites_with_a11y_tech, # Number of unique sites that use accessibility technology
37+
COUNT(DISTINCT IF(category = 'Accessibility', page, NULL)) / COUNT(DISTINCT page) AS pct_sites_with_a11y_tech # Percentage of sites using accessibility technology
38+
FROM
39+
`httparchive.crawl.pages`,
40+
UNNEST(technologies) AS tech,
41+
UNNEST(categories) AS category
42+
WHERE
43+
date = '2025-07-01' # Specific date for data extraction
44+
GROUP BY
45+
client,
46+
is_root_page
47+
ORDER BY
48+
client,
49+
is_root_page;
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#standardSQL
2+
# Purpose
3+
# Measure adoption of specific Accessibility-related technologies (apps/overlays)
4+
# across domain rank buckets in the 2025-07-01 HTTP Archive crawl.
5+
# Google Sheet: a11y_technology_usage_by_domain_rank
6+
#
7+
# Output columns
8+
# • client = desktop or mobile
9+
# • is_root_page = TRUE if page is the root of the site
10+
# • rank_grouping = domain rank bucket (1k, 10k, …, 100M)
11+
# • total_in_rank = total number of unique pages in the rank bucket
12+
# • app = specific Accessibility technology detected (Wappalyzer name)
13+
# • sites_with_app = number of unique pages using that technology
14+
# • pct_sites_with_app = share of pages in the rank bucket using that technology
15+
#
16+
# Method
17+
# 1. Assign each page to a rank_grouping based on its domain rank.
18+
# 2. Compute totals per client / root flag / rank grouping (denominator).
19+
# 3. Expand technologies and categories, keeping only category = 'Accessibility'.
20+
# 4. Count distinct pages per technology and divide by the rank total.
21+
#
22+
# Notes
23+
# • Unit of analysis = page URL, not host/site.
24+
# • Percentages are returned as numeric fractions (0–1). Use FORMAT() if a
25+
# human-readable percent string is needed.
26+
# • Rank groupings are aligned with prior reporting thresholds (1k → 100M).
27+
WITH ranked_sites AS (
28+
-- Get the total number of sites within each rank grouping
29+
SELECT
30+
client,
31+
is_root_page,
32+
page,
33+
rank,
34+
technologies, -- Include technologies field here
35+
CASE
36+
WHEN rank <= 1000 THEN 1000
37+
WHEN rank <= 10000 THEN 10000
38+
WHEN rank <= 100000 THEN 100000
39+
WHEN rank <= 1000000 THEN 1000000
40+
WHEN rank <= 10000000 THEN 10000000
41+
WHEN rank <= 100000000 THEN 100000000
42+
END AS rank_grouping
43+
FROM
44+
`httparchive.crawl.pages`
45+
WHERE
46+
date = '2025-07-01' -- Use the relevant date for analysis
47+
),
48+
49+
rank_totals AS (
50+
-- Calculate total sites in each rank grouping
51+
SELECT
52+
client,
53+
is_root_page,
54+
rank_grouping,
55+
COUNT(DISTINCT page) AS total_in_rank
56+
FROM
57+
ranked_sites
58+
GROUP BY
59+
client,
60+
is_root_page,
61+
rank_grouping
62+
)
63+
64+
SELECT
65+
r.client,
66+
r.is_root_page,
67+
r.rank_grouping,
68+
rt.total_in_rank, -- Total number of unique sites within the rank grouping
69+
tech.technology AS app, -- Accessibility technology used
70+
COUNT(DISTINCT r.page) AS sites_with_app, -- Number of sites using the specific accessibility technology
71+
SAFE_DIVIDE(COUNT(DISTINCT r.page), rt.total_in_rank) AS pct_sites_with_app -- Percentage of sites using the accessibility technology
72+
FROM
73+
ranked_sites r
74+
JOIN
75+
UNNEST(r.technologies) AS tech -- Expand technologies array to individual rows
76+
JOIN
77+
rank_totals rt -- Join to get the total number of sites per rank grouping
78+
ON r.client = rt.client AND
79+
r.is_root_page = rt.is_root_page AND
80+
r.rank_grouping = rt.rank_grouping
81+
JOIN
82+
UNNEST(tech.categories) AS category -- Unnest the categories array to filter for accessibility
83+
WHERE
84+
category = 'Accessibility' -- Filter to include only accessibility-related technologies
85+
GROUP BY
86+
r.client,
87+
r.is_root_page,
88+
r.rank_grouping,
89+
rt.total_in_rank,
90+
tech.technology
91+
ORDER BY
92+
tech.technology, -- Order results by technology (app)
93+
r.rank_grouping, -- Order results by rank grouping
94+
r.client,
95+
r.is_root_page;

0 commit comments

Comments
 (0)