diff --git a/sql/2025/fonts/.gitignore b/sql/2025/fonts/.gitignore new file mode 100644 index 00000000000..afed0735dc9 --- /dev/null +++ b/sql/2025/fonts/.gitignore @@ -0,0 +1 @@ +*.csv diff --git a/sql/2025/fonts/README.md b/sql/2025/fonts/README.md index 39847fa76e3..d7464af7c6c 100644 --- a/sql/2025/fonts/README.md +++ b/sql/2025/fonts/README.md @@ -1,20 +1,84 @@ -# 2025 Fonts queries +# Fonts - +## Structure -## Resources +The queries are split by the section where they are used: + +* `design/` is about foundries and families, +* `development/` is about tools and technologies, and +* `performance/` is about hosting and serving. + +Each file name starts with one of the following prefixes indicating the primary subject of the corresponding analysis: + +* `fonts_` is about font files, +* `pages_` is about HTML pages, +* `scripts_` is about JavaScript scripts, and +* `styles_` is about CSS style sheets. + +The prefix is followed by the property studied given in singular, potentially extended one or several suffixes narrowing down the scope, as in `fonts_size_by_table.sql` and `pages_link_relation.sql`. + +## Content + +Each query starts with a preamble indicating the section, question, and normalization type, as illustrated below: + +```sql +-- Section: Performance +-- Question: What is the distribution of the file size broken down by table? +-- Normalization: Pages +``` + +Many queries rely on temporary functions for convenience and clarity. The functions that appear in several queries are extracted into a common file called `common.sql`. Whenever any of the functions defined in `common.sql` is used by a query, the query has the following pseudo-directive at the top: + +```sql +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql +``` + +The pseudo-directive has to be replaced with the content of `common.sql` prior to executing the query in question. + +In addition, queries generally have parameters, as in `@date`, so as to be able to run them for different configurations. The values for the parameters will have to be supplied upon execution. + +All the above is taken take of automatically if the queries are executed using `execute.py`, which we discuss next. + +## Execution + +The queries can be executed using the `execute.py` script. The results are first saved in local CSV files sitting next to the SQL files and then uploaded to the spreadsheet. In the spreadsheet, for each query, a separate sheet is created and named after the question the query answers, which is given in its preamble. If the CSV file already exists, the corresponding query is not executed. If cell A1 is already populated, the corresponding sheet is not updated. + +First, ensure that the Application Default Credentials authorization strategy is configured, and that the HTTP Archive project is used as the quota project: + +```shell +gcloud auth application-default login \ + --scopes https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/spreadsheets +gcloud auth application-default set-quota-project httparchive +``` + +Second, install the Python prerequisites for the script: + +```shell +pip install -r requirements.txt +``` + +The script can be run for all or a subset of the queries as illustrated below: + +```shell +python execute.py +python execute.py design/*.sql +python execute.py development/fonts_*.sql +``` + +By default, it operates in a dry-run mode: it does not run the queries but prints an estimate of the amount of data that would be processed by each query. To actually run the queries, pass the `--no-dry-run` option as follows: -- [📄 Planning doc][~google-doc] -- [📊 Results sheet][~google-sheets] -- [📝 Markdown file][~chapter-markdown] +```shell +python execute.py --no-dry-run +python execute.py --no-dry-run design/*.sql +python execute.py --no-dry-run development/fonts_*.sql +``` -[~google-doc]: https://docs.google.com/document/d/1jVc0vgmAY_lBxryItRBguXxEq77mvbaQ3UpbTweUoSI/ -[~google-sheets]: https://docs.google.com/spreadsheets/d/1otdu4p_CCI70B4FVzw6k02frStsPMrQoFu7jUim_0Bg/edit -[~chapter-markdown]: https://github.com/HTTPArchive/almanac.httparchive.org/tree/main/src/content/en/2025/fonts.md +[Planning document]: https://docs.google.com/document/d/1jVc0vgmAY_lBxryItRBguXxEq77mvbaQ3UpbTweUoSI +[Results sheet]: https://docs.google.com/spreadsheets/d/1otdu4p_CCI70B4FVzw6k02frStsPMrQoFu7jUim_0Bg +[Chapter content]: https://github.com/HTTPArchive/almanac.httparchive.org/tree/main/src/content/en/2025/fonts.md diff --git a/sql/2025/fonts/common.sql b/sql/2025/fonts/common.sql new file mode 100644 index 00000000000..6f71b504040 --- /dev/null +++ b/sql/2025/fonts/common.sql @@ -0,0 +1,149 @@ +-- Normalize a family name. Used in FAMILY_INNER. +CREATE TEMPORARY FUNCTION FAMILY_INNER_INNER(name STRING) AS ( + CASE + WHEN REGEXP_CONTAINS(name, r'(?i)font\s?awesome') THEN 'Font Awesome' + ELSE IF(LENGTH(TRIM(name)) < 3, NULL, NULLIF(TRIM(name), '')) + END +); + +-- Normalize a family name. Used in FAMILY. +CREATE TEMPORARY FUNCTION FAMILY_INNER(name STRING) AS ( + FAMILY_INNER_INNER( + REGEXP_REPLACE( + name, + r'(?i)([\s-]?(black|bold|book|cond(ensed)?|demi|ex(tra)?|heavy|italic|light|medium|narrow|regular|semi|thin|ultra|wide|\d00|\d+pt))+$', + '' + ) + ) +); + +-- Extract the family name from a payload. +CREATE TEMPORARY FUNCTION FAMILY(payload JSON) AS ( + FAMILY_INNER( + COALESCE( + STRING(payload._font_details.names[16]), + STRING(payload._font_details.names[1]) + ) + ) +); + +-- Extract the file format from an extension and a MIME type. +CREATE TEMPORARY FUNCTION FILE_FORMAT(extension STRING, type STRING) AS ( + LOWER(IFNULL(REGEXP_EXTRACT(type, '/(?:x-)?(?:font-)?(.*)'), extension)) +); + +-- Normalize a foundry name. Used in FOUNDRY. +CREATE TEMPORARY FUNCTION FOUNDRY_INNER(name STRING) AS ( + CASE UPPER(name) + WHEN 'ADBO' THEN 'ADBE' + WHEN 'PFED' THEN 'AWSM' + ELSE NULLIF(TRIM(REGEXP_REPLACE(name, r'[[:cntrl:]]+', '')), '') + END +); + +-- Extract the foundry name from a payload. +CREATE TEMPORARY FUNCTION FOUNDRY(payload JSON) AS ( + FOUNDRY_INNER(STRING(payload._font_details.OS2.achVendID)) +); + +-- Infer scripts from codepoints. Used in SCRIPTS. +CREATE TEMPORARY FUNCTION SCRIPTS_INNER(codepoints JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = ["gs://httparchive/lib/text-utils.js"]) +AS r""" +if (codepoints && codepoints.length) { + return detectWritingScript(codepoints.map((character) => parseInt(character, 10)), 0.05); +} else { + return []; +} +"""; + +-- Infer scripts from a payload. +CREATE TEMPORARY FUNCTION SCRIPTS(payload JSON) AS ( + SCRIPTS_INNER(payload._font_details.cmap.codepoints) +); + +-- Infer the service from a URL. +CREATE TEMPORARY FUNCTION SERVICE(url STRING) AS ( + CASE + WHEN REGEXP_CONTAINS(url, r'(fonts|use)\.typekit\.(net|com)') THEN 'Adobe' + WHEN REGEXP_CONTAINS(url, r'cloud\.typenetwork\.com') THEN 'typenetwork.com' + WHEN REGEXP_CONTAINS(url, r'cloud\.typography\.com') THEN 'typography.com' + WHEN REGEXP_CONTAINS(url, r'cloud\.webtype\.com') THEN 'webtype.com' + WHEN REGEXP_CONTAINS(url, r'f\.fontdeck\.com') THEN 'fontdeck.com' + WHEN REGEXP_CONTAINS(url, r'fast\.fonts\.(com|net)\/(jsapi|cssapi)') THEN 'fonts.com' + WHEN REGEXP_CONTAINS(url, r'fnt\.webink\.com') THEN 'webink.com' + WHEN REGEXP_CONTAINS(url, r'fontawesome\.com') THEN 'fontawesome.com' + WHEN REGEXP_CONTAINS(url, r'fonts\.(gstatic|googleapis)\.com|themes.googleusercontent.com/static/fonts|ssl.gstatic.com/fonts') THEN 'Google' + WHEN REGEXP_CONTAINS(url, r'fonts\.typonine\.com') THEN 'typonine.com' + WHEN REGEXP_CONTAINS(url, r'fonts\.typotheque\.com') THEN 'typotheque.com' + WHEN REGEXP_CONTAINS(url, r'kernest\.com') THEN 'kernest.com' + WHEN REGEXP_CONTAINS(url, r'typefront\.com') THEN 'typefront.com' + WHEN REGEXP_CONTAINS(url, r'typesquare\.com') THEN 'typesquare.com' + WHEN REGEXP_CONTAINS(url, r'use\.edgefonts\.net|webfonts\.creativecloud\.com') THEN 'edgefonts.net' + WHEN REGEXP_CONTAINS(url, r'webfont\.fontplus\.jp') THEN 'fontplus.jp' + WHEN REGEXP_CONTAINS(url, r'webfonts\.fontslive\.com') THEN 'fontslive.com' + WHEN REGEXP_CONTAINS(url, r'webfonts\.fontstand\.com') THEN 'fontstand.com' + WHEN REGEXP_CONTAINS(url, r'webfonts\.justanotherfoundry\.com') THEN 'justanotherfoundry.com' + ELSE 'self-hosted' + END +); + +-- Extract the color formats from a formats payload and remove spurious entries +-- via a table-sizes payload. +-- +-- When nonempty, it is expected that +-- +-- * `CBDT` is larger than 2 + 2 bytes, +-- * `COLR` is larger than 2 + 2 + 4 + 4 + 2 (+ 4 + 4 + 4 + 4 + 4) bytes, +-- * `SVG ` is larger than 2 + 4 + 4 + 2 bytes, and +-- * `sbix` is larger than 2 + 2 + 4 + 4 bytes. +-- +-- For simplicity, the threshold is set to 50 bytes. +CREATE TEMPORARY FUNCTION COLOR_FORMATS_INNER(formats JSON, table_sizes JSON) +RETURNS ARRAY +LANGUAGE js AS ''' +try { + return formats.filter((format) => { + const table = `${format} `.slice(0, 4); + return table_sizes[table] > 50; + }); +} catch (e) { + return []; +} +'''; + +-- Extract the color formats from a payload. +CREATE TEMPORARY FUNCTION COLOR_FORMATS(payload JSON) AS ( + COLOR_FORMATS_INNER( + payload._font_details.color.formats, + payload._font_details.table_sizes + ) +); + +-- Check if the font is a color font given its payload. +CREATE TEMPORARY FUNCTION IS_COLOR(payload JSON) AS ( + ARRAY_LENGTH(COLOR_FORMATS(payload)) > 0 +); + +-- Check if the font was successfully parsed given its payload. +CREATE TEMPORARY FUNCTION IS_PARSED(payload JSON) AS ( + payload._font_details.table_sizes IS NOT NULL +); + +-- Check if the font is a variable font given its payload. +CREATE TEMPORARY FUNCTION IS_VARIABLE(payload JSON) AS ( + REGEXP_CONTAINS( + TO_JSON_STRING(payload._font_details.table_sizes), + '(?i)gvar|CFF2' + ) +); + +-- Extract the variable formats from a payload. +CREATE TEMPORARY FUNCTION VARIABLE_FORMATS(payload JSON) AS ( + REGEXP_EXTRACT_ALL( + TO_JSON_STRING(payload._font_details.table_sizes), + '(?i)glyf|CFF2' + ) +); diff --git a/sql/2025/fonts/design/fonts_designer.sql b/sql/2025/fonts/design/fonts_designer.sql new file mode 100644 index 00000000000..242d59955d3 --- /dev/null +++ b/sql/2025/fonts/design/fonts_designer.sql @@ -0,0 +1,54 @@ +-- Section: Design +-- Question: Which designers are popular? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +designers AS ( + SELECT + client, + NULLIF(TRIM(STRING(payload._font_details.names[9])), '') AS designer, + COUNT(DISTINCT page) AS count, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(DISTINCT page) DESC) AS rank + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + designer + QUALIFY + rank <= 100 +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + designer, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + designers +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/design/fonts_family_by_foundry.sql b/sql/2025/fonts/design/fonts_family_by_foundry.sql new file mode 100644 index 00000000000..48cb2390f90 --- /dev/null +++ b/sql/2025/fonts/design/fonts_family_by_foundry.sql @@ -0,0 +1,42 @@ +-- Section: Design +-- Question: Which families are used broken down by foundry? +-- Normalization: Requests (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +requests AS ( + SELECT + client, + FOUNDRY(payload) AS foundry, + FAMILY(payload) AS family, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + IS_PARSED(payload) AND + is_root_page +) + +SELECT + client, + foundry, + family, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(0) DESC) AS rank +FROM + requests +GROUP BY + client, + foundry, + family, + total +QUALIFY + rank <= 100 +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/design/fonts_family_by_script.sql b/sql/2025/fonts/design/fonts_family_by_script.sql new file mode 100644 index 00000000000..c9076c5586e --- /dev/null +++ b/sql/2025/fonts/design/fonts_family_by_script.sql @@ -0,0 +1,46 @@ +-- Section: Design +-- Question: Which families are used broken down by script? +-- Normalization: Requests (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +requests AS ( + SELECT + client, + SCRIPTS(payload) AS scripts, + FAMILY(payload) AS family, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) +) + +SELECT + client, + script, + family, + COUNT(0) AS count, + total AS total, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROW_NUMBER() OVER (PARTITION BY client, script ORDER BY COUNT(0) DESC) AS rank +FROM + requests, + UNNEST(scripts) AS script +WHERE + family != 'Adobe Blank' +GROUP BY + client, + script, + family, + requests.total +QUALIFY + rank <= 10 +ORDER BY + client, + script, + count DESC diff --git a/sql/2025/fonts/design/fonts_foundry.sql b/sql/2025/fonts/design/fonts_foundry.sql new file mode 100644 index 00000000000..04c4dd342ed --- /dev/null +++ b/sql/2025/fonts/design/fonts_foundry.sql @@ -0,0 +1,54 @@ +-- Section: Design +-- Question: Which foundries are popular? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +foundries AS ( + SELECT + client, + FOUNDRY(payload) AS foundry, + COUNT(DISTINCT page) AS count, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(DISTINCT page) DESC) AS rank + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + foundry + QUALIFY + rank <= 100 +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + foundry, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + foundries +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/design/fonts_license.sql b/sql/2025/fonts/design/fonts_license.sql new file mode 100644 index 00000000000..c5b307d8e94 --- /dev/null +++ b/sql/2025/fonts/design/fonts_license.sql @@ -0,0 +1,57 @@ +-- Section: Design +-- Question: Which licenses are used? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION LICENSE(value STRING) AS ( + CASE + WHEN REGEXP_CONTAINS(value, 'adobe|typekit') THEN 'Adobe' + WHEN REGEXP_CONTAINS(value, 'apache') THEN 'Apache' + WHEN REGEXP_CONTAINS(value, 'fontawesome') THEN 'Font Awesome' + WHEN REGEXP_CONTAINS(value, 'linotype|monotype|myfonts') THEN 'Monotype' + WHEN REGEXP_CONTAINS(value, r'(?i)(ofl|open\s?font\s?license|sil\.org)') THEN 'OFL' + ELSE NULLIF(NULLIF(TRIM(value), ''), '-') + END +); + +WITH +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + LICENSE(STRING(payload._font_details.names[14])) AS license, + COUNT(DISTINCT page) AS count, + total, + ROUND(COUNT(DISTINCT page) / total, @precision) AS proportion, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(DISTINCT page) DESC) AS rank +FROM + `httparchive.crawl.requests` +INNER JOIN + pages +USING (client) +WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) +GROUP BY + client, + license, + total +QUALIFY + rank <= 100 +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/design/fonts_metric.sql b/sql/2025/fonts/design/fonts_metric.sql new file mode 100644 index 00000000000..5598c09c52a --- /dev/null +++ b/sql/2025/fonts/design/fonts_metric.sql @@ -0,0 +1,86 @@ +-- Section: Design +-- Question: What is the distribution of metrics? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + client, + url, + INT64(ANY_VALUE(payload)._font_details.head.unitsPerEm) AS granularity, + [ + STRUCT( + 'granularity' AS name, + INT64(ANY_VALUE(payload)._font_details.head.unitsPerEm) AS value + ), + STRUCT( + 'clipping_ascender' AS name, + INT64(ANY_VALUE(payload)._font_details.OS2.usWinAscent) AS value + ), + STRUCT( + 'ascender' AS name, + INT64(ANY_VALUE(payload)._font_details.OS2.sTypoAscender) AS value + ), + STRUCT( + 'cap_height' AS name, + INT64(ANY_VALUE(payload)._font_details.OS2.sCapHeight) AS value + ), + STRUCT( + 'x_height' AS name, + INT64(ANY_VALUE(payload)._font_details.OS2.sxHeight) AS value + ), + STRUCT( + 'descender' AS name, + INT64(ANY_VALUE(payload)._font_details.OS2.sTypoDescender) AS value + ), + STRUCT( + 'clipping_descender' AS name, + -INT64(ANY_VALUE(payload)._font_details.OS2.usWinDescent) AS value + ), + STRUCT( + 'line_gap' AS name, + INT64(ANY_VALUE(payload)._font_details.OS2.sTypoLineGap) AS value + ), + STRUCT( + 'use_typographic_metrics' AS name, + INT64(ANY_VALUE(payload)._font_details.OS2.fsSelection) & 128 AS value + ) + ] AS metrics + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + url +) + +SELECT + client, + name AS metric, + percentile, + COUNT(0) AS count, + ROUND( + APPROX_QUANTILES( + IF(name = 'granularity', value, SAFE_DIVIDE(value, granularity)), + 1000 + )[OFFSET(percentile * 10)], + @precision + ) AS value +FROM + fonts, + UNNEST(metrics) AS metric, + UNNEST([10, 25, 50, 75, 90, 99]) AS percentile +GROUP BY + client, + name, + percentile +ORDER BY + client, + name, + percentile diff --git a/sql/2025/fonts/design/fonts_script.sql b/sql/2025/fonts/design/fonts_script.sql new file mode 100644 index 00000000000..2ef2f2eb521 --- /dev/null +++ b/sql/2025/fonts/design/fonts_script.sql @@ -0,0 +1,41 @@ +-- Section: Design +-- Question: Which scripts does one design for? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + client, + url, + SCRIPTS(ANY_VALUE(payload)) AS scripts, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + url +) + +SELECT + client, + script, + COUNT(DISTINCT url) AS count, + total, + ROUND(COUNT(DISTINCT url) / total, @precision) AS proportion +FROM + fonts, + UNNEST(scripts) AS script +GROUP BY + client, + script, + total +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/design/styles_family.sql b/sql/2025/fonts/design/styles_family.sql new file mode 100644 index 00000000000..0d6addf1631 --- /dev/null +++ b/sql/2025/fonts/design/styles_family.sql @@ -0,0 +1,72 @@ +-- Section: Design +-- Question: Which families are popular in CSS? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION FAMILIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = ["gs://httparchive/lib/css-font-parser.js", "gs://httparchive/lib/css-utils.js"]) +AS ''' +try { + const result = []; + walkDeclarations(css, (declaration) => { + result.push(parseFontFamilyProperty(declaration.value)[0]); + }, { + properties: 'font-family', + rules: (rule) => rule.type.toLowerCase() === 'font-face' + }); + return result; +} catch (e) { + return []; +} +'''; + +WITH +families AS ( + SELECT + client, + FAMILY_INNER(family) AS family, + COUNT(DISTINCT page) AS count, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(DISTINCT page) DESC) AS rank + FROM + `httparchive.crawl.parsed_css`, + UNNEST(FAMILIES(css)) AS family + WHERE + date = @date AND + is_root_page + GROUP BY + client, + family + QUALIFY + rank <= 100 +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + family, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + families +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_color.sql b/sql/2025/fonts/development/fonts_color.sql new file mode 100644 index 00000000000..5b59b410439 --- /dev/null +++ b/sql/2025/fonts/development/fonts_color.sql @@ -0,0 +1,53 @@ +-- Section: Development +-- Question: How popular are color fonts? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + date, + client, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_COLOR(payload) + GROUP BY + date, + client +), + +pages AS ( + SELECT + date, + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + is_root_page + GROUP BY + date, + client +) + +SELECT + date, + client, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + fonts +JOIN + pages +USING (date, client) +ORDER BY + date, + count DESC diff --git a/sql/2025/fonts/development/fonts_color_color.sql b/sql/2025/fonts/development/fonts_color_color.sql new file mode 100644 index 00000000000..7dd32e2a116 --- /dev/null +++ b/sql/2025/fonts/development/fonts_color_color.sql @@ -0,0 +1,62 @@ +-- Section: Development +-- Question: What is the distribution of color palettes? +-- Normalization: Fonts (color only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION COLORS(palettes JSON) +RETURNS ARRAY +LANGUAGE js +AS ''' +function toHex(value) { + return ('0' + (value & 0xFF).toString(16)).slice(-2); +} + +try { + const result = new Set(); + for (const palette of palettes) { + for (const [blue, green, red, alpha] of palette) { + result.add(`#${toHex(red)}${toHex(green)}${toHex(blue)}${toHex(alpha)}`); + } + } + return Array.from(result); +} catch (e) { + return []; +} +'''; + +WITH +fonts AS ( + SELECT + client, + url, + COLORS(ANY_VALUE(payload)._font_details.color.palettes) AS colors, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_COLOR(payload) + GROUP BY + client, + url +) + +SELECT + client, + color, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + fonts, + UNNEST(colors) AS color +GROUP BY + client, + color, + total +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_color_emoji.sql b/sql/2025/fonts/development/fonts_color_emoji.sql new file mode 100644 index 00000000000..9f38f4c37b2 --- /dev/null +++ b/sql/2025/fonts/development/fonts_color_emoji.sql @@ -0,0 +1,65 @@ +-- Section: Development +-- Question: Are color fonts used for the sake of emojis? +-- Normalization: Requests (color only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION HAS_EMOJI(codepoints JSON) +RETURNS BOOL +LANGUAGE js +OPTIONS (library = ["gs://httparchive/lib/text-utils.js"]) +AS r""" +if (codepoints && codepoints.length) { + const detected = detectWritingScript(codepoints.map((character) => parseInt(character, 10)), 0.1); + const scripts = [ + 'Emoji', + 'Emoji_Component', + 'Emoji_Modifier', + 'Emoji_Modifier_Base', + 'Emoji_Presentation' + ]; + for (script of scripts) { + if (detected.includes(script)) { + return true; + } + } + return false; +} else { + return false; +} +"""; + +WITH +requests AS ( + SELECT + date, + client, + HAS_EMOJI(payload._font_details.cmap.codepoints) AS emoji, + COUNT(0) OVER (PARTITION BY date, client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_COLOR(payload) +) + +SELECT + date, + client, + emoji, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + requests +GROUP BY + date, + client, + emoji, + total +ORDER BY + date, + client, + emoji diff --git a/sql/2025/fonts/development/fonts_color_entry.sql b/sql/2025/fonts/development/fonts_color_entry.sql new file mode 100644 index 00000000000..f2f588061a9 --- /dev/null +++ b/sql/2025/fonts/development/fonts_color_entry.sql @@ -0,0 +1,40 @@ +-- Section: Development +-- Question: How many entries are there in color palettes? +-- Normalization: Fonts (color only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + client, + url, + INT64(ANY_VALUE(payload)._font_details.color.numPaletteEntries) AS entries, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_COLOR(payload) + GROUP BY + client, + url +) + +SELECT + client, + entries, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + fonts +GROUP BY + client, + entries, + total +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_color_family.sql b/sql/2025/fonts/development/fonts_color_family.sql new file mode 100644 index 00000000000..652cfc356d1 --- /dev/null +++ b/sql/2025/fonts/development/fonts_color_family.sql @@ -0,0 +1,39 @@ +-- Section: Development +-- Question: Which color families are used? +-- Normalization: Requests (color only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +requests AS ( + SELECT + client, + FAMILY(payload) AS family, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_COLOR(payload) +) + +SELECT + client, + family, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(0) DESC) AS rank +FROM + requests +GROUP BY + client, + family, + total +QUALIFY + rank <= 100 +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_color_format.sql b/sql/2025/fonts/development/fonts_color_format.sql new file mode 100644 index 00000000000..ab1b88d3e7f --- /dev/null +++ b/sql/2025/fonts/development/fonts_color_format.sql @@ -0,0 +1,46 @@ +-- Section: Development +-- Question: Which color formats are used? +-- Normalization: Fonts (color only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + date, + client, + url, + COLOR_FORMATS(ANY_VALUE(payload)) AS formats, + COUNT(0) OVER (PARTITION BY date, client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_COLOR(payload) + GROUP BY + date, + client, + url +) + +SELECT + date, + client, + format, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + fonts, + UNNEST(formats) AS format +GROUP BY + date, + client, + format, + total +ORDER BY + date, + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_color_format_by_family.sql b/sql/2025/fonts/development/fonts_color_format_by_family.sql new file mode 100644 index 00000000000..2fb7b8d59a3 --- /dev/null +++ b/sql/2025/fonts/development/fonts_color_format_by_family.sql @@ -0,0 +1,46 @@ +-- Section: Development +-- Question: Which color formats are used broken down by family? +-- Normalization: Fonts (color only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + client, + url, + COLOR_FORMATS(ANY_VALUE(payload)) AS formats, + FAMILY(ANY_VALUE(payload)) AS family, + COUNT(DISTINCT url) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_COLOR(payload) + GROUP BY + client, + url +) + +SELECT + client, + format, + family, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion, + ANY_VALUE(url) AS example +FROM + fonts, + UNNEST(formats) AS format +GROUP BY + client, + format, + family, + total +ORDER BY + client, + format, + count DESC diff --git a/sql/2025/fonts/development/fonts_color_palette.sql b/sql/2025/fonts/development/fonts_color_palette.sql new file mode 100644 index 00000000000..d2fe06e2eb3 --- /dev/null +++ b/sql/2025/fonts/development/fonts_color_palette.sql @@ -0,0 +1,40 @@ +-- Section: Development +-- Question: How many palettes are there in color fonts? +-- Normalization: Fonts (color only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + client, + url, + INT64(ANY_VALUE(payload)._font_details.color.numPalettes) AS entries, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_COLOR(payload) + GROUP BY + client, + url +) + +SELECT + client, + entries, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + fonts +GROUP BY + client, + entries, + total +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_compiler.sql b/sql/2025/fonts/development/fonts_compiler.sql new file mode 100644 index 00000000000..eae30f791da --- /dev/null +++ b/sql/2025/fonts/development/fonts_compiler.sql @@ -0,0 +1,54 @@ +-- Section: Development +-- Question: Which compilers are used? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION COMPILER(version STRING) AS ( + CASE + WHEN REGEXP_CONTAINS(version, r'(?i)(Core \d|PS \d|hotconv|makeotf)') THEN 'makeotf' + WHEN REGEXP_CONTAINS(version, r'(?i)FontCreator') THEN 'FontCreator' + WHEN REGEXP_CONTAINS(version, r'(?i)Fontself') THEN 'Fontself Maker' + WHEN REGEXP_CONTAINS(version, r'(?i)(FEAKit|Glyphs)') THEN 'Glyphs.app' + WHEN REGEXP_CONTAINS(version, r'(?i)gftools') THEN 'fontmake' + ELSE TRIM(REGEXP_EXTRACT(version, ';(.*)')) + END +); + +WITH +fonts AS ( + SELECT + client, + url, + COMPILER(STRING(ANY_VALUE(payload)._font_details.names[5])) AS compiler, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + url +) + +SELECT + client, + compiler, + COUNT(DISTINCT url) AS count, + total, + ROUND(COUNT(DISTINCT url) / total, @precision) AS proportion, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(DISTINCT url) DESC) AS rank +FROM + fonts +GROUP BY + client, + compiler, + total +QUALIFY + rank <= 100 +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_feature.sql b/sql/2025/fonts/development/fonts_feature.sql new file mode 100644 index 00000000000..74560db7117 --- /dev/null +++ b/sql/2025/fonts/development/fonts_feature.sql @@ -0,0 +1,63 @@ +-- Section: Development +-- Question: Which features are used in fonts? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION FEATURES(features JSON) +RETURNS ARRAY +LANGUAGE js +AS ''' +try { + const result = new Set(); + for (const [table, scripts] of Object.entries(features)) { + for (const [script, languages] of Object.entries(scripts)) { + for (const [language, features] of Object.entries(languages)) { + features.forEach(feature => result.add(feature)); + } + } + } + return Array.from(result); +} catch (e) { + return []; +} +'''; + +WITH +fonts AS ( + SELECT + client, + url, + FEATURES(ANY_VALUE(payload)._font_details.features) AS features, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + url +) + +SELECT + client, + feature, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(0) DESC) AS rank +FROM + fonts, + UNNEST(features) AS feature +GROUP BY + client, + feature, + total +QUALIFY + rank <= 100 +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_feature_kerning.sql b/sql/2025/fonts/development/fonts_feature_kerning.sql new file mode 100644 index 00000000000..a76426e0c2c --- /dev/null +++ b/sql/2025/fonts/development/fonts_feature_kerning.sql @@ -0,0 +1,73 @@ +-- Section: Development +-- Question: How prevalent is kerning support? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION HAS_KERNING(features JSON) +RETURNS BOOL +LANGUAGE js +AS ''' +try { + const result = new Set(); + for (const [table, scripts] of Object.entries(features)) { + for (const [script, languages] of Object.entries(scripts)) { + for (const [language, features] of Object.entries(languages)) { + features.forEach(feature => result.add(feature)); + } + } + } + return Array.from(result).includes('kern'); +} catch (e) { + return false; +} +'''; + +WITH +fonts AS ( + SELECT + date, + client, + url, + ( + HAS_KERNING(ANY_VALUE(payload)._font_details.features) OR + IFNULL( + REGEXP_CONTAINS( + TO_JSON_STRING(ANY_VALUE(payload)._font_details.table_sizes), + '(?i)kern' + ), + FALSE + ) + ) AS support, + COUNT(0) OVER (PARTITION BY date, client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + date, + client, + url +) + +SELECT + date, + client, + support, + COUNT(DISTINCT url) AS count, + total, + ROUND(COUNT(DISTINCT url) / total, @precision) AS proportion +FROM + fonts +GROUP BY + date, + client, + support, + total +ORDER BY + date, + client, + support diff --git a/sql/2025/fonts/development/fonts_feature_opentype.sql b/sql/2025/fonts/development/fonts_feature_opentype.sql new file mode 100644 index 00000000000..401ffec52b2 --- /dev/null +++ b/sql/2025/fonts/development/fonts_feature_opentype.sql @@ -0,0 +1,48 @@ +-- Section: Development +-- Question: How prevalent is OpenType support? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + date, + client, + url, + REGEXP_CONTAINS( + TO_JSON_STRING(ANY_VALUE(payload)._font_details.table_sizes), + '(?i)GPOS|GSUB' + ) AS support, + COUNT(0) OVER (PARTITION BY date, client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + date, + client, + url +) + +SELECT + date, + client, + support, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + fonts +GROUP BY + date, + client, + support, + total +ORDER BY + date, + client, + support diff --git a/sql/2025/fonts/development/fonts_format_outline.sql b/sql/2025/fonts/development/fonts_format_outline.sql new file mode 100644 index 00000000000..61a487053c9 --- /dev/null +++ b/sql/2025/fonts/development/fonts_format_outline.sql @@ -0,0 +1,49 @@ +-- Section: Development +-- Question: Which outline formats are used? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + date, + client, + url, + REGEXP_EXTRACT_ALL( + TO_JSON_STRING(ANY_VALUE(payload)._font_details.table_sizes), + '(?i)(CFF |glyf|SVG|CFF2)' + ) AS formats, + COUNT(0) OVER (PARTITION BY date, client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + date, + client, + url +) + +SELECT + date, + client, + format, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + fonts, + UNNEST(formats) AS format +GROUP BY + date, + client, + format, + total +ORDER BY + date, + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_hinting.sql b/sql/2025/fonts/development/fonts_hinting.sql new file mode 100644 index 00000000000..2d36f9a0b8f --- /dev/null +++ b/sql/2025/fonts/development/fonts_hinting.sql @@ -0,0 +1,56 @@ +-- Section: Development +-- Question: How prevalent is autohinting? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION IS_HINTED(payload JSON) AS ( + REGEXP_CONTAINS( + TO_JSON_STRING(payload._font_details.table_sizes), + '(?i)fpgm|prep' + ) +); + + +CREATE TEMPORARY FUNCTION IS_AUTOHINTED(payload JSON) AS ( + REGEXP_CONTAINS(STRING(payload._font_details.names[5]), 'autohint') +); + +WITH +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + IS_AUTOHINTED(payload) AS autohinted, + COUNT(DISTINCT page) AS count, + total, + ROUND(COUNT(DISTINCT page) / total, @precision) AS proportion +FROM + `httparchive.crawl.requests` +INNER JOIN + pages +USING (client) +WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) AND + IS_HINTED(payload) +GROUP BY + client, + autohinted, + total +ORDER BY + client, + autohinted diff --git a/sql/2025/fonts/development/fonts_variable.sql b/sql/2025/fonts/development/fonts_variable.sql new file mode 100644 index 00000000000..1e7b58aaa05 --- /dev/null +++ b/sql/2025/fonts/development/fonts_variable.sql @@ -0,0 +1,54 @@ +-- Section: Development +-- Question: How popular are variable fonts? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + date, + client, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_VARIABLE(payload) + GROUP BY + date, + client +), + +pages AS ( + SELECT + date, + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + is_root_page + GROUP BY + date, + client +) + +SELECT + date, + client, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + fonts +JOIN + pages +USING (date, client) +ORDER BY + date, + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_variable_axis.sql b/sql/2025/fonts/development/fonts_variable_axis.sql new file mode 100644 index 00000000000..8a46376544c --- /dev/null +++ b/sql/2025/fonts/development/fonts_variable_axis.sql @@ -0,0 +1,52 @@ +-- Section: Development +-- Question: Which axes are used in variable fonts? +-- Normalization: Fonts (variable only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION AXES(fvar JSON) +RETURNS ARRAY +LANGUAGE js +AS ''' +try { + return Object.keys(fvar); +} catch (e) { + return []; +} +'''; + +WITH +fonts AS ( + SELECT + client, + url, + AXES(ANY_VALUE(payload)._font_details.fvar) AS axes, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_VARIABLE(payload) + GROUP BY + client, + url +) + +SELECT + client, + axis, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + fonts, + UNNEST(axes) AS axis +GROUP BY + client, + axis, + total +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_variable_family.sql b/sql/2025/fonts/development/fonts_variable_family.sql new file mode 100644 index 00000000000..197e43ce230 --- /dev/null +++ b/sql/2025/fonts/development/fonts_variable_family.sql @@ -0,0 +1,39 @@ +-- Section: Development +-- Question: Which variable families are used? +-- Normalization: Requests (variable only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +requests AS ( + SELECT + client, + FAMILY(payload) AS family, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_VARIABLE(payload) +) + +SELECT + client, + family, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(0) DESC) AS rank +FROM + requests +GROUP BY + client, + family, + total +QUALIFY + rank <= 100 +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_variable_format.sql b/sql/2025/fonts/development/fonts_variable_format.sql new file mode 100644 index 00000000000..4ce48b1df54 --- /dev/null +++ b/sql/2025/fonts/development/fonts_variable_format.sql @@ -0,0 +1,46 @@ +-- Section: Development +-- Question: Which outline formats are used in variable fonts? +-- Normalization: Fonts (variable only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + date, + client, + url, + VARIABLE_FORMATS(ANY_VALUE(payload)) AS formats, + COUNT(0) OVER (PARTITION BY date, client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_VARIABLE(payload) + GROUP BY + date, + client, + url +) + +SELECT + date, + client, + format, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + fonts, + UNNEST(formats) AS format +GROUP BY + date, + client, + format, + total +ORDER BY + date, + client, + count DESC diff --git a/sql/2025/fonts/development/fonts_variable_range.sql b/sql/2025/fonts/development/fonts_variable_range.sql new file mode 100644 index 00000000000..659e138b7d7 --- /dev/null +++ b/sql/2025/fonts/development/fonts_variable_range.sql @@ -0,0 +1,66 @@ +-- Section: Development +-- Question: What are the distributions of axes? +-- Normalization: Fonts (variable only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION AXES(fvar JSON) +RETURNS ARRAY> +LANGUAGE js +AS ''' +try { + return Object.keys(fvar).map((name) => { + return { + name: name, + minimum: fvar[name].min, + medium: fvar[name].default, + maximum: fvar[name].max + }; + }); +} catch (e) { + return []; +} +'''; + +WITH +fonts AS ( + SELECT + client, + url, + axis.name, + ANY_VALUE(axis.minimum) AS minimum, + ANY_VALUE(axis.medium) AS medium, + ANY_VALUE(axis.maximum) AS maximum + FROM + `httparchive.crawl.requests`, + UNNEST(AXES(payload._font_details.fvar)) AS axis + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_VARIABLE(payload) + GROUP BY + client, + url, + name +) + +SELECT + client, + name, + percentile, + COUNT(0) AS count, + ROUND(APPROX_QUANTILES(minimum, 1000)[OFFSET(percentile * 10)], @precision) AS minimum, + ROUND(APPROX_QUANTILES(medium, 1000)[OFFSET(percentile * 10)], @precision) AS medium, + ROUND(APPROX_QUANTILES(maximum, 1000)[OFFSET(percentile * 10)], @precision) AS maximum +FROM + fonts, + UNNEST([10, 25, 50, 75, 90, 99]) AS percentile +GROUP BY + client, + name, + percentile +ORDER BY + client, + name, + percentile diff --git a/sql/2025/fonts/development/fonts_variable_service.sql b/sql/2025/fonts/development/fonts_variable_service.sql new file mode 100644 index 00000000000..ccaf6958c72 --- /dev/null +++ b/sql/2025/fonts/development/fonts_variable_service.sql @@ -0,0 +1,46 @@ +-- Section: Development +-- Question: Who is serving variable fonts? +-- Normalization: Requests (variable only) and fonts (variable only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +requests AS ( + SELECT + date, + client, + url, + SERVICE(url) AS service, + COUNT(0) OVER (PARTITION BY date, client) AS total, + COUNT(DISTINCT url) OVER (PARTITION BY date, client) AS total_secondary + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page AND + IS_VARIABLE(payload) +) + +SELECT + date, + client, + service, + COUNT(0) AS count, + COUNT(DISTINCT url) AS count_secondary, + total, + total_secondary, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROUND(COUNT(DISTINCT url) / total_secondary, @precision) AS proportion_secondary +FROM + requests +GROUP BY + date, + client, + service, + total, + total_secondary +ORDER BY + date, + client, + count DESC diff --git a/sql/2025/fonts/development/styles_family_system.sql b/sql/2025/fonts/development/styles_family_system.sql new file mode 100644 index 00000000000..483ca55e797 --- /dev/null +++ b/sql/2025/fonts/development/styles_family_system.sql @@ -0,0 +1,93 @@ +-- Section: Development +-- Question: Which system families are popular? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION FAMILIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = ["gs://httparchive/lib/css-font-parser.js", "gs://httparchive/lib/css-utils.js"]) +AS ''' +const system = [ + 'cursive', + 'emoji', + 'fangsong', + 'fantasy', + 'math', + 'monospace', + 'sans-serif', + 'serif', + 'system-ui', + 'ui-monospace', + 'ui-rounded', + 'ui-sans-serif', + 'ui-serif' +]; + +try { + const result = []; + walkDeclarations(css, (declaration) => { + if (declaration.property.toLowerCase() === 'font-family') { + const fonts = parseFontFamilyProperty(declaration.value); + if (fonts) { + fonts.forEach(font => result.push(font)); + } + } else if (declaration.property.toLowerCase() === 'font') { + const value = parseFontProperty(declaration.value); + if (value) { + value['font-family'].forEach((font) => result.push(font)); + } + } + }, { + properties: ['font-family', 'font'], + rules: (rule) => rule.type.toLowerCase() !== 'font-face' + }); + return result.filter((font) => system.includes(font)); +} catch (e) { + return []; +} +'''; + +WITH +families AS ( + SELECT + client, + family, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css`, + UNNEST(FAMILIES(css)) AS family + WHERE + date = @date AND + is_root_page + GROUP BY + client, + family +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + family, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + families +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_feature_control.sql b/sql/2025/fonts/development/styles_feature_control.sql new file mode 100644 index 00000000000..6d8682bf3c5 --- /dev/null +++ b/sql/2025/fonts/development/styles_feature_control.sql @@ -0,0 +1,83 @@ +-- Section: Development +-- Question: How are features used in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +function compute(tree) { + const result = {}; + walkDeclarations(tree, ({ property, value }) => { + const name = property.toLowerCase(); + if ( + name.startsWith('font-variant-') && + value.toLowerCase() !== 'none' && + value.toLowerCase() !== 'normal' + ) { + incrementByKey(result, 'font-variant'); + } else if ( + name === 'font-feature-settings' && + value.toLowerCase() !== 'normal' + ) { + incrementByKey(result, 'font-feature-settings'); + } + }); + return sortObject(result); +} + +try { + const properties = compute(css); + return Object.entries(properties).flatMap(([name, count]) => { + return Array(count).fill(name); + }); +} catch (e) { + return []; +} +'''; + +WITH +properties AS ( + SELECT + client, + property, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property + WHERE + date = @date AND + is_root_page + GROUP BY + client, + property +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + property, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_font_feature_settings.sql b/sql/2025/fonts/development/styles_font_feature_settings.sql new file mode 100644 index 00000000000..467d3a654a8 --- /dev/null +++ b/sql/2025/fonts/development/styles_font_feature_settings.sql @@ -0,0 +1,82 @@ +-- Section: Development +-- Question: Which features are used via font-feature-settings in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION FEATURES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +function parseFontFeatureSettings(value) { + const features = (value || '').split(/\\s*,\\s*/); + const result = [] + for (let i = 0; i < features.length; i++) { + const match = /^"([\u0020-\u007e]{1,4})"(?:\\s+(\\d+|on|off))?$/i.exec(features[i]); + if (match) { + result.push(match[1]); + } + } + return result; +} + +try { + const result = []; + walkDeclarations(css, (declaration) => { + const tags = parseFontFeatureSettings(declaration.value); + if (tags && tags.length) { + tags.forEach((tag) => result.push(tag)); + } + }, { + properties: 'font-feature-settings', + rules: (rule) => rule.type.toLowerCase() !== 'font-face' + }); + return result; +} catch (e) { + return []; +} +'''; + +WITH +features AS ( + SELECT + client, + feature, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css`, + UNNEST(FEATURES(css)) AS feature + WHERE + date = @date AND + is_root_page + GROUP BY + client, + feature +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + feature, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + features +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_font_variable_settings_axis.sql b/sql/2025/fonts/development/styles_font_variable_settings_axis.sql new file mode 100644 index 00000000000..1a5ca5fd084 --- /dev/null +++ b/sql/2025/fonts/development/styles_font_variable_settings_axis.sql @@ -0,0 +1,66 @@ +-- Section: Development +-- Question: Which axes are used in CSS? +-- Normalization: Pages (variable only) + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY +LANGUAGE js +AS ''' +function compute(values, rule) { + if ('rules' in rule) { + return rule.rules.reduce(compute, values); + } + if (!('declarations' in rule)) { + return values; + } + return values.concat( + rule.declarations + .filter((declaration) => declaration.property.toLowerCase() === 'font-variation-settings') + .map((declaration) => declaration.value) + ); +} + +try { + return css.stylesheet.rules.reduce(compute, []); +} catch (e) { + return []; +} +'''; + +WITH +pages AS ( + SELECT + client, + page, + REGEXP_EXTRACT(chunk, r'''['"]([\w]{4})['"]''') AS axis, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property, + UNNEST(SPLIT(property, ',')) AS chunk + WHERE + date = @date AND + is_root_page + GROUP BY + client, + page, + axis + HAVING + axis IS NOT NULL +) + +SELECT + client, + axis, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion +FROM + pages +GROUP BY + client, + axis, + total +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_font_variant.sql b/sql/2025/fonts/development/styles_font_variant.sql new file mode 100644 index 00000000000..3889738d7d1 --- /dev/null +++ b/sql/2025/fonts/development/styles_font_variant.sql @@ -0,0 +1,76 @@ +-- Section: Development +-- Question: Which features are used via font-variant in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +function compute(tree) { + const result = {}; + walkDeclarations(tree, ({ property, value }) => { + const name = property.toLowerCase(); + if (name === 'font-variant') { + incrementByKey(result, 'font-variant: ' + value) + } else if (name.startsWith('font-variant-')) { + incrementByKey(result, name + ': ' + value); + } + }); + return sortObject(result); +} + +try { + const properties = compute(css); + return Object.entries(properties).flatMap(([name, count]) => { + return Array(count).fill(name); + }); +} catch (e) { + return []; +} +'''; + +WITH +properties AS ( + SELECT + client, + property, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property + WHERE + date = @date AND + is_root_page + GROUP BY + client, + property +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + property, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_hyphens.sql b/sql/2025/fonts/development/styles_hyphens.sql new file mode 100644 index 00000000000..e7c28ef5823 --- /dev/null +++ b/sql/2025/fonts/development/styles_hyphens.sql @@ -0,0 +1,73 @@ +-- Section: Development +-- Question: Which hyphenation settings are used in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +function compute(tree) { + const result = {}; + walkDeclarations(tree, ({ property, value }) => { + const name = property.toLowerCase(); + if (name === 'hyphens') { + incrementByKey(result, 'hyphens: ' + value) + } + }); + return sortObject(result); +} +try { + const properties = compute(css); + return Object.entries(properties).flatMap(([name, count]) => { + return Array(count).fill(name); + }); +} catch (e) { + return []; +} +'''; + +WITH +properties AS ( + SELECT + client, + property, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property + WHERE + date = @date AND + is_root_page + GROUP BY + client, + property +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + property, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_metric_override.sql b/sql/2025/fonts/development/styles_metric_override.sql new file mode 100644 index 00000000000..9ad3b1e1c20 --- /dev/null +++ b/sql/2025/fonts/development/styles_metric_override.sql @@ -0,0 +1,67 @@ +-- Section: Development +-- Question: How and how often is metrics override used in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +try { + const result = []; + walkDeclarations(css, (declaration) => { + result.push(declaration.property); + }, { + properties: ['size-adjust', 'ascent-override', 'descent-override', 'line-gap-override'], + rules: (rule) => rule.type.toLowerCase() === 'font-face' + }); + return result; +} catch (e) { + return []; +} +'''; + +WITH +properties AS ( + SELECT + client, + property, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property + WHERE + date = @date AND + is_root_page + GROUP BY + client, + property +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + property, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_smoothing.sql b/sql/2025/fonts/development/styles_smoothing.sql new file mode 100644 index 00000000000..71bacee6c77 --- /dev/null +++ b/sql/2025/fonts/development/styles_smoothing.sql @@ -0,0 +1,69 @@ +-- Section: Development +-- Question: How and how often is smoothing used in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +try { + const result = []; + walkDeclarations(css, (declaration) => { + result.push(`${declaration.property}: ${declaration.value}`); + }, { + properties: ['-webkit-font-smoothing', '-moz-osx-font-smoothing', 'font-smooth'] + }); + return result; +} catch (e) { + return []; +} +'''; + +WITH +properties AS ( + SELECT + client, + property, + COUNT(DISTINCT page) AS count, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(DISTINCT page) DESC) AS rank + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property + WHERE + date = @date AND + is_root_page + GROUP BY + client, + property + QUALIFY + rank <= 10 +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + property, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_text_wrap.sql b/sql/2025/fonts/development/styles_text_wrap.sql new file mode 100644 index 00000000000..ab574554367 --- /dev/null +++ b/sql/2025/fonts/development/styles_text_wrap.sql @@ -0,0 +1,73 @@ +-- Section: Development +-- Question: Which text-wrap settings are used in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +function compute(tree) { + const result = {}; + walkDeclarations(tree, ({ property, value }) => { + const name = property.toLowerCase(); + if (name === 'text-wrap') { + incrementByKey(result, 'text-wrap: ' + value) + } + }); + return sortObject(result); +} +try { + const properties = compute(css); + return Object.entries(properties).flatMap(([name, count]) => { + return Array(count).fill(name); + }); +} catch (e) { + return []; +} +'''; + +WITH +properties AS ( + SELECT + client, + property, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property + WHERE + date = @date AND + is_root_page + GROUP BY + client, + property +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + property, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/development/styles_variable_animation.sql b/sql/2025/fonts/development/styles_variable_animation.sql new file mode 100644 index 00000000000..25c3fddc530 --- /dev/null +++ b/sql/2025/fonts/development/styles_variable_animation.sql @@ -0,0 +1,79 @@ +-- Section: Development +-- Question: How popular is variable-font animimation in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION HAS_ANIMATION(css JSON) +RETURNS BOOLEAN +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +try { + let count = 0; + walkRules(css, (rule) => { + rule.keyframes.forEach((frame) => { + count += countDeclarations( + frame, + { + properties: [ + 'font-stretch', + 'font-style', + 'font-variation-settings', + 'font-weight' + ] + } + ); + }); + }, { + type: 'keyframes' + }); + count += countDeclarations(css.stylesheet.rules, { + properties: 'transition', + values: /font-stretch|font-style|font-variation-settings|font-weight/ + }); + return count > 0; +} catch (e) { + return false; +} +'''; + +WITH +properties AS ( + SELECT + client, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css` + WHERE + date = @date AND + is_root_page AND + HAS_ANIMATION(css) + GROUP BY + client +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/execute.py b/sql/2025/fonts/execute.py new file mode 100755 index 00000000000..984a8d90ef0 --- /dev/null +++ b/sql/2025/fonts/execute.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python + +import argparse +import multiprocessing +import re +from pathlib import Path +from typing import Optional + +import pandas as pd # pylint: disable=import-error +import google.auth # pylint: disable=import-error +from google.cloud import bigquery # pylint: disable=import-error +from googleapiclient.discovery import build # pylint: disable=import-error + +PROJECT_ID = "httparchive" + +QUERY_PARAMETERS = [ + # The date of the crawl used for the analysis. + bigquery.ScalarQueryParameter( + "date", + "DATE", + "2025-07-01", + ), + # A set of dates for queries studying trends over time. + bigquery.ArrayQueryParameter( + "dates", + "DATE", + ["2022-07-01", "2023-07-01", "2024-07-01", "2025-07-01"], + ), + # The number of digits after the decimal point for formatting proportions. + bigquery.ScalarQueryParameter( + "precision", + "INT64", + 4, + ), +] + +# The spreadsheet with results for the corresponding edition. +SPREADSHEET_ID = "1otdu4p_CCI70B4FVzw6k02frStsPMrQoFu7jUim_0Bg" + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("path", nargs="*", default=["*/*.sql"]) + parser.add_argument("--no-dry-run", action="store_true") + parser.add_argument("--workers", type=int, default=multiprocessing.cpu_count()) + arguments = parser.parse_args() + + paths = list( + path + for path in arguments.path + for path in Path(".").glob(path) + if path.suffix == ".sql" + ) + width = max(len(str(path)) for path in paths) + 1 + + tasks = [{"path": path, "dry_run": not arguments.no_dry_run} for path in paths] + with multiprocessing.Pool(arguments.workers) as pool: + for result in pool.imap_unordered(_process, tasks): + task = result["task"] + path = task["path"] + if result["failures"]: + messages = list( + map( + lambda failure: failure.splitlines()[0], + result["failures"], + ) + ) + elif result["successes"]: + messages = result["successes"] + else: + messages = [] + if not messages: + print(f"{str(path).ljust(width)}: skipped") + elif len(messages) == 1: + print(f"{str(path).ljust(width)}: {messages[0]}") + else: + print(f"{path}:") + for message in messages: + print(f" - {message}") + + +def _process(task: dict) -> dict: + result = {"task": task, "failures": [], "successes": []} + + query = _query_read(task["path"]) + + # If there is a CSV file, just use it without questions. + path = task["path"].with_suffix(".csv") + if not path.exists(): + try: + if task["dry_run"]: + size = _bigquery_estimate(query["content"]) + result["successes"].append(f"estimated {size:5.2f} TB") + return result + else: + data = _bigquery_read(query["content"]) + data.to_csv(path, index=False) + result["successes"].append(f"wrote {path}") + except Exception as error: + result["failures"].append(str(error)) + return result + + data = pd.read_csv(path, dtype=str) + + if task["dry_run"]: + return result + + try: + sheet = _sheets_prepare(query["metadata"]) + # If A1 is populated, skip writing. + if not _sheets_exists(sheet): + _sheets_write(data, query["metadata"], sheet) + result["successes"].append(f"updated “{sheet}”") + except Exception as error: + result["failures"].append(str(error)) + + return result + + +def _bigquery_estimate(query: str) -> float: + credentials, _ = google.auth.default() + client = bigquery.Client(credentials=credentials, project=PROJECT_ID) + config = bigquery.QueryJobConfig( + query_parameters=QUERY_PARAMETERS, + use_query_cache=False, + dry_run=True, + ) + job = client.query(query, job_config=config) + return job.total_bytes_processed / 1024 / 1024 / 1024 / 1024 + + +def _bigquery_read(query: str) -> pd.DataFrame: + credentials, _ = google.auth.default() + client = bigquery.Client( + credentials=credentials, + project=PROJECT_ID, + ) + config = bigquery.QueryJobConfig( + query_parameters=QUERY_PARAMETERS, + use_query_cache=True, + ) + job = client.query(query, job_config=config) + return job.to_dataframe() + + +def _sheets_exists(name: str) -> bool: + credentials, _ = google.auth.default() + service = build("sheets", "v4", credentials=credentials) + result = ( + service.spreadsheets() + .values() + .get(spreadsheetId=SPREADSHEET_ID, range=f"'{name}'!A1") + .execute() + ) + return "values" in result and result["values"][0][0] + + +def _sheets_prepare(metadata: dict) -> str: + credentials, _ = google.auth.default() + service = build("sheets", "v4", credentials=credentials) + name = metadata["Section"] + ": " + metadata["Question"] + _ = _sheets_find(service, name) or _sheets_create(service, name) + return name + + +def _sheets_write(data: pd.DataFrame, metadata: dict, name: str) -> dict: + credentials, _ = google.auth.default() + service = build("sheets", "v4", credentials=credentials) + data = data.where(pd.notnull(data), None) + values = [ + ["Section", metadata["Section"]], + ["Question", metadata["Question"]], + ["Normalization", metadata["Normalization"]], + [], + data.columns.tolist(), + *data.values.tolist(), + ] + service.spreadsheets().values().update( + spreadsheetId=SPREADSHEET_ID, + range=f"'{name}'!A1", + valueInputOption="RAW", + body={"values": values}, + ).execute() + + +def _sheets_create( + service: object, + name: str, + column_count: int = 20, + row_count: int = 1000, +) -> str: + request = { + "addSheet": { + "properties": { + "title": name, + "gridProperties": { + "columnCount": column_count, + "rowCount": row_count, + }, + } + } + } + response = ( + service.spreadsheets() + .batchUpdate( + spreadsheetId=SPREADSHEET_ID, + body={"requests": [request]}, + ) + .execute() + ) + return response["replies"][0]["addSheet"]["properties"]["sheetId"] + + +def _sheets_find(service: object, name: str) -> Optional[str]: + spreadsheet = service.spreadsheets().get(spreadsheetId=SPREADSHEET_ID).execute() + return next( + ( + sheet["properties"]["sheetId"] + for sheet in spreadsheet["sheets"] + if sheet["properties"]["title"] == name + ), + None, + ) + + +def _query_read(path: Path) -> dict: + lines = [] + metadata = {} + for line in path.read_text().splitlines(): + match = re.search(r"^-- (.+): (.+)$", line) + if match: + name, value = match.groups() + metadata[name] = value + match = re.search(r"^-- INCLUDE .*/([^/]+)\.sql$", line) + if match: + (name,) = match.groups() + lines.extend(Path(f"{name}.sql").read_text().splitlines()) + else: + lines.append(line) + return { + "content": "\n".join(lines), + "metadata": metadata, + } + + +if __name__ == "__main__": + main() diff --git a/sql/2025/fonts/performance/fonts.sql b/sql/2025/fonts/performance/fonts.sql new file mode 100644 index 00000000000..e781b233add --- /dev/null +++ b/sql/2025/fonts/performance/fonts.sql @@ -0,0 +1,21 @@ +-- Section: Performance +-- Question: What is the font usage over time? +-- Normalization: Pages + +SELECT + date, + client, + COUNT(DISTINCT IF(type = 'font', page, NULL)) AS count, + COUNT(DISTINCT page) AS total, + ROUND(COUNT(DISTINCT IF(type = 'font', page, NULL)) / COUNT(DISTINCT page), @precision) AS proportion +FROM + `httparchive.crawl.requests` +WHERE + date IS NOT NULL AND + is_root_page +GROUP BY + client, + date +ORDER BY + date, + client diff --git a/sql/2025/fonts/performance/fonts_family_by_service.sql b/sql/2025/fonts/performance/fonts_family_by_service.sql new file mode 100644 index 00000000000..70a03e4351b --- /dev/null +++ b/sql/2025/fonts/performance/fonts_family_by_service.sql @@ -0,0 +1,43 @@ +-- Section: Performance +-- Question: Which families are used broken down by service? +-- Normalization: Requests (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +requests AS ( + SELECT + client, + SERVICE(url) AS service, + FAMILY(payload) AS family, + COUNT(0) OVER (PARTITION BY client) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) +) + +SELECT + client, + service, + family, + COUNT(0) AS count, + total, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROW_NUMBER() OVER (PARTITION BY client, service ORDER BY COUNT(0) DESC) AS rank +FROM + requests +GROUP BY + client, + service, + family, + total +QUALIFY + rank <= 100 +ORDER BY + client, + service, + count DESC diff --git a/sql/2025/fonts/performance/fonts_format_file.sql b/sql/2025/fonts/performance/fonts_format_file.sql new file mode 100644 index 00000000000..e834027ba0e --- /dev/null +++ b/sql/2025/fonts/performance/fonts_format_file.sql @@ -0,0 +1,44 @@ +-- Section: Performance +-- Question: Which file formats are used? +-- Normalization: Requests and fonts + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +requests AS ( + SELECT + client, + url, + FILE_FORMAT(STRING(summary.ext), STRING(summary.mimeType)) AS format, + COUNT(0) OVER (PARTITION BY client) AS total, + COUNT(DISTINCT url) OVER (PARTITION BY client) AS total_secondary + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page +) + +SELECT + client, + format, + COUNT(0) AS count, + COUNT(DISTINCT url) AS count_secondary, + total, + total_secondary, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROUND(COUNT(DISTINCT url) / total_secondary, @precision) AS proportion_secondary, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(0) DESC) AS rank +FROM + requests +GROUP BY + client, + format, + total, + total_secondary +QUALIFY + rank <= 10 +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/performance/fonts_format_file_by_service.sql b/sql/2025/fonts/performance/fonts_format_file_by_service.sql new file mode 100644 index 00000000000..77d21045b33 --- /dev/null +++ b/sql/2025/fonts/performance/fonts_format_file_by_service.sql @@ -0,0 +1,48 @@ +-- Section: Performance +-- Question: Which file formats are used broken down by service? +-- Normalization: Requests and fonts + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +requests AS ( + SELECT + client, + url, + SERVICE(url) AS service, + FILE_FORMAT(STRING(summary.ext), STRING(summary.mimeType)) AS format, + COUNT(0) OVER (PARTITION BY client) AS total, + COUNT(DISTINCT url) OVER (PARTITION BY client) AS total_secondary + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page +) + +SELECT + client, + service, + format, + COUNT(0) AS count, + COUNT(DISTINCT url) AS count_secondary, + total, + total_secondary, + ROUND(COUNT(0) / total, @precision) AS proportion, + ROUND(COUNT(DISTINCT url) / total_secondary, @precision) AS proportion_secondary, + ROW_NUMBER() OVER (PARTITION BY client, service ORDER BY COUNT(0) DESC) AS rank +FROM + requests +GROUP BY + client, + service, + format, + total, + total_secondary +QUALIFY + rank <= 10 +ORDER BY + client, + service, + count DESC diff --git a/sql/2025/fonts/performance/fonts_service.sql b/sql/2025/fonts/performance/fonts_service.sql new file mode 100644 index 00000000000..c8de825d282 --- /dev/null +++ b/sql/2025/fonts/performance/fonts_service.sql @@ -0,0 +1,47 @@ +-- Section: Performance +-- Question: Which services are popular? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +pages AS ( + SELECT + date, + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + is_root_page + GROUP BY + date, + client +) + +SELECT + date, + client, + SERVICE(url) AS service, + COUNT(DISTINCT page) AS count, + total, + ROUND(COUNT(DISTINCT page) / total, @precision) AS proportion +FROM + `httparchive.crawl.requests` +INNER JOIN + pages +USING (date, client) +WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page +GROUP BY + date, + client, + service, + total +ORDER BY + date, + client, + count DESC diff --git a/sql/2025/fonts/performance/fonts_services.sql b/sql/2025/fonts/performance/fonts_services.sql new file mode 100644 index 00000000000..5098303d778 --- /dev/null +++ b/sql/2025/fonts/performance/fonts_services.sql @@ -0,0 +1,70 @@ +-- Section: Performance +-- Question: Which service combinations are popular? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +services_1 AS ( + SELECT + date, + client, + page, + STRING_AGG(DISTINCT SERVICE(url), ', ' ORDER BY SERVICE(url)) AS services + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + type = 'font' AND + is_root_page + GROUP BY + date, + client, + page +), + +services_2 AS ( + SELECT + date, + client, + services, + COUNT(DISTINCT page) AS count + FROM + services_1 + GROUP BY + date, + client, + services +), + +pages AS ( + SELECT + date, + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date IN UNNEST(@dates) AND + is_root_page + GROUP BY + date, + client +) + +SELECT + date, + client, + services, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + services_2 +JOIN + pages +USING (date, client) +ORDER BY + date, + client, + count DESC diff --git a/sql/2025/fonts/performance/fonts_size.sql b/sql/2025/fonts/performance/fonts_size.sql new file mode 100644 index 00000000000..e7b5b53acf0 --- /dev/null +++ b/sql/2025/fonts/performance/fonts_size.sql @@ -0,0 +1,38 @@ +-- Section: Performance +-- Question: What is the distribution of the file size? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + client, + url, + SAFE.INT64(ANY_VALUE(summary).respBodySize) AS size + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + url +) + +SELECT + client, + percentile, + COUNT(0) AS count, + CAST(APPROX_QUANTILES(size, 1000)[OFFSET(percentile * 10)] AS INT64) AS size +FROM + fonts, + UNNEST([10, 25, 50, 75, 90, 99]) AS percentile +GROUP BY + client, + percentile +ORDER BY + client, + percentile diff --git a/sql/2025/fonts/performance/fonts_size_by_country.sql b/sql/2025/fonts/performance/fonts_size_by_country.sql new file mode 100644 index 00000000000..57c8e07a2a6 --- /dev/null +++ b/sql/2025/fonts/performance/fonts_size_by_country.sql @@ -0,0 +1,52 @@ +-- Section: Performance +-- Question: What is the distribution of the file size broken down by country? +-- Normalization: Requests (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +countries AS ( + SELECT + IF(device = 'desktop', 'desktop', 'mobile') AS client, + NET.HOST(origin) AS domain, + `chrome-ux-report`.experimental.GET_COUNTRY(country_code) AS country + FROM + `chrome-ux-report.materialized.country_summary` + WHERE + yyyymm = CAST(FORMAT_DATE('%Y%m', @date) AS INT64) + GROUP BY + client, + domain, + country +), + +requests AS ( + SELECT + client, + NET.HOST(page) AS domain, + SAFE.INT64(summary.respBodySize) AS size + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) +) + +SELECT + client, + country, + COUNT(0) AS count, + CAST(APPROX_QUANTILES(size, 1000)[OFFSET(500)] AS INT64) AS size +FROM + requests +INNER JOIN + countries +USING (client, domain) +GROUP BY + client, + country +ORDER BY + client, + country diff --git a/sql/2025/fonts/performance/fonts_size_by_format.sql b/sql/2025/fonts/performance/fonts_size_by_format.sql new file mode 100644 index 00000000000..32ccfc3b1f5 --- /dev/null +++ b/sql/2025/fonts/performance/fonts_size_by_format.sql @@ -0,0 +1,60 @@ +-- Section: Performance +-- Question: What is the distribution of the file size broken down by format? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + client, + url, + FILE_FORMAT(STRING(ANY_VALUE(summary).ext), STRING(ANY_VALUE(summary).mimeType)) AS format, + SAFE.INT64(ANY_VALUE(summary).respBodySize) AS size + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + url +), + +formats AS ( + SELECT + client, + format, + ROW_NUMBER() OVER (PARTITION BY client ORDER BY COUNT(DISTINCT url) DESC) AS rank + FROM + fonts + GROUP BY + client, + format +) + +SELECT + client, + format, + percentile, + COUNT(DISTINCT url) AS count, + CAST(APPROX_QUANTILES(size, 1000)[OFFSET(percentile * 10)] AS INT64) AS size +FROM + fonts, + UNNEST([10, 25, 50, 75, 90, 99]) AS percentile +INNER JOIN + formats +USING (client, format) +WHERE + rank <= 10 +GROUP BY + client, + format, + rank, + percentile +ORDER BY + client, + rank, + percentile diff --git a/sql/2025/fonts/performance/fonts_size_by_service.sql b/sql/2025/fonts/performance/fonts_size_by_service.sql new file mode 100644 index 00000000000..1cee769ac3b --- /dev/null +++ b/sql/2025/fonts/performance/fonts_size_by_service.sql @@ -0,0 +1,66 @@ +-- Section: Performance +-- Question: What is the distribution of the file size broken down by service? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +WITH +fonts AS ( + SELECT + client, + url, + SERVICE(url) AS service, + FILE_FORMAT(STRING(ANY_VALUE(summary).ext), STRING(ANY_VALUE(summary).mimeType)) AS format, + SAFE.INT64(ANY_VALUE(summary).respBodySize) AS size + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + url +), + +formats AS ( + SELECT + client, + service, + format, + ROW_NUMBER() OVER (PARTITION BY client, service ORDER BY COUNT(DISTINCT url) DESC) AS rank + FROM + fonts + GROUP BY + client, + service, + format +) + +SELECT + client, + service, + format, + percentile, + COUNT(DISTINCT url) AS count, + CAST(APPROX_QUANTILES(size, 1000)[OFFSET(percentile * 10)] AS INT64) AS size +FROM + fonts, + UNNEST([10, 25, 50, 75, 90, 99]) AS percentile +INNER JOIN + formats +USING (client, service, format) +WHERE + rank <= 10 +GROUP BY + client, + service, + format, + rank, + percentile +ORDER BY + client, + service, + rank, + percentile diff --git a/sql/2025/fonts/performance/fonts_size_by_table.sql b/sql/2025/fonts/performance/fonts_size_by_table.sql new file mode 100644 index 00000000000..90751662c3f --- /dev/null +++ b/sql/2025/fonts/performance/fonts_size_by_table.sql @@ -0,0 +1,55 @@ +-- Section: Performance +-- Question: What is the distribution of the file size broken down by table? +-- Normalization: Fonts (parsed only) + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION TABLES(table_sizes JSON) +RETURNS ARRAY> +LANGUAGE js AS ''' +try { + return Object.entries(table_sizes).map(([name, size]) => ({ name, size })); +} catch (e) { + return []; +} +'''; + +WITH +fonts AS ( + SELECT + client, + url, + TABLES(ANY_VALUE(payload)._font_details.table_sizes) AS tables + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + type = 'font' AND + is_root_page AND + IS_PARSED(payload) + GROUP BY + client, + url +) + +SELECT + client, + table.name AS table, + percentile, + COUNT(0) AS count, + CAST(APPROX_QUANTILES(size, 1000)[OFFSET(percentile * 10)] AS INT64) AS size +FROM + fonts, + UNNEST(tables) AS table, + UNNEST([10, 25, 50, 75, 90, 99]) AS percentile +GROUP BY + client, + table, + percentile +HAVING + -- Filter out spurious tables. + count > 1000 +ORDER BY + client, + table, + percentile diff --git a/sql/2025/fonts/performance/pages_link_relationship.sql b/sql/2025/fonts/performance/pages_link_relationship.sql new file mode 100644 index 00000000000..eda1b32daa7 --- /dev/null +++ b/sql/2025/fonts/performance/pages_link_relationship.sql @@ -0,0 +1,95 @@ +-- Section: Performance +-- Question: What is the usage of link relationship in HTML? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION HINTS(other JSON) +RETURNS ARRAY> +LANGUAGE js AS ''' +const names = new Set([ + 'dns-prefetch', + 'preconnect', + 'prefetch', + 'preload', +]); +try { + return other.almanac['link-nodes'].nodes.reduce((results, node) => { + const name = node.rel.toLowerCase(); + if (names.has(name)) { + results.push({ + 'name': name, + 'type': node.as, + 'url': node.href + }); + } + return results; + }, []); +} catch (e) { + return []; +} +'''; + +WITH +hints AS ( + SELECT + pages.date, + pages.client, + hint.name AS hint, + COUNT(DISTINCT pages.page) AS count + FROM + `httparchive.crawl.pages` AS pages, + UNNEST(HINTS(custom_metrics.other)) AS hint + LEFT JOIN + `httparchive.crawl.requests` AS requests + ON + requests.date IN UNNEST(@dates) AND + requests.type = 'font' AND + requests.is_root_page AND + pages.page = requests.page AND + hint.url = requests.url + WHERE + pages.date IN UNNEST(@dates) AND + pages.is_root_page AND + ( + requests.url IS NOT NULL OR + LOWER(hint.type) = 'font' OR + SERVICE(hint.url) != 'self-hosted' + ) + GROUP BY + date, + client, + hint +), + +pages AS ( + SELECT + date, + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.pages` + WHERE + date IN UNNEST(@dates) AND + is_root_page + GROUP BY + date, + client +) + +SELECT + date, + client, + hint, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + hints +LEFT JOIN + pages +USING (date, client) +ORDER BY + date, + client, + count DESC diff --git a/sql/2025/fonts/performance/styles_font_display.sql b/sql/2025/fonts/performance/styles_font_display.sql new file mode 100644 index 00000000000..46eb25bb73b --- /dev/null +++ b/sql/2025/fonts/performance/styles_font_display.sql @@ -0,0 +1,69 @@ +-- Section: Performance +-- Question: What is the usage of font-display in CSS? +-- Normalization: Pages + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY +LANGUAGE js +OPTIONS (library = "gs://httparchive/lib/css-utils.js") +AS ''' +try { + const values = ['auto', 'block', 'fallback', 'optional', 'swap']; + const result = []; + walkDeclarations(css, (declaration) => { + const value = declaration.value.toLowerCase(); + result.push(values.find((other) => value.includes(other)) || 'other'); + }, { + properties: 'font-display', + rules: (rule) => rule.type.toLowerCase() === 'font-face' + }); + return result; +} catch (e) { + return []; +} +'''; + +WITH +properties AS ( + SELECT + client, + NULLIF(property, 'other') AS property, + COUNT(DISTINCT page) AS count + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property + WHERE + date = @date AND + is_root_page + GROUP BY + client, + property +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + property, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + count DESC diff --git a/sql/2025/fonts/performance/styles_font_display_by_family.sql b/sql/2025/fonts/performance/styles_font_display_by_family.sql new file mode 100644 index 00000000000..987584f2ff1 --- /dev/null +++ b/sql/2025/fonts/performance/styles_font_display_by_family.sql @@ -0,0 +1,95 @@ +-- Section: Performance +-- Question: What is the usage of font-display in CSS broken down by family? +-- Normalization: Pages + +-- INCLUDE https://github.com/HTTPArchive/almanac.httparchive.org/blob/main/sql/{year}/fonts/common.sql + +CREATE TEMPORARY FUNCTION PROPERTIES(css JSON) +RETURNS ARRAY> +LANGUAGE js +OPTIONS (library = ["gs://httparchive/lib/css-font-parser.js", "gs://httparchive/lib/css-utils.js"]) +AS ''' +try { + const values = ['auto', 'block', 'fallback', 'optional', 'swap']; + const result = []; + walkRules(css, (rule) => { + let found = false; + let family = undefined; + let display = undefined; + for (const declaration of rule.declarations) { + const name = declaration.property.toLowerCase(); + if (name === 'font-family') { + family = parseFontFamilyProperty(declaration.value)[0]; + } + if (name === 'font-display') { + found = true; + const value = declaration.value.toLowerCase(); + display = values.find((other) => value.includes(other)); + } + if (family && display) { + break; + } + } + if (found) { + result.push({ family, display }); + } + }, { + type: 'font-face' + }); + return result; +} catch (e) { + return []; +} +'''; + +WITH +properties AS ( + SELECT + client, + display AS property, + FAMILY_INNER(family) AS family, + COUNT(DISTINCT page) AS count, + ROW_NUMBER() OVER (PARTITION BY client, display ORDER BY COUNT(DISTINCT page) DESC) AS rank + FROM + `httparchive.crawl.parsed_css`, + UNNEST(PROPERTIES(css)) AS property + WHERE + date = @date AND + is_root_page + GROUP BY + client, + property, + family + QUALIFY + rank <= 10 +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT page) AS total + FROM + `httparchive.crawl.requests` + WHERE + date = @date AND + is_root_page + GROUP BY + client +) + +SELECT + client, + property, + family, + count, + total, + ROUND(count / total, @precision) AS proportion +FROM + properties +JOIN + pages +USING (client) +ORDER BY + client, + property, + count DESC diff --git a/sql/2025/fonts/requirements.txt b/sql/2025/fonts/requirements.txt new file mode 100644 index 00000000000..2a3e43fda2a --- /dev/null +++ b/sql/2025/fonts/requirements.txt @@ -0,0 +1,4 @@ +google-api-python-client +google-auth +google-cloud-bigquery +pandas