Skip to content

Commit 7383f00

Browse files
authored
Add function to analyze CSS color formats
This SQL file defines a temporary function to analyze color formats in CSS, including handling various color representations and calculating their usage statistics.
1 parent 40b5d50 commit 7383f00

1 file changed

Lines changed: 232 additions & 0 deletions

File tree

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
#standardSQL
2+
CREATE TEMPORARY FUNCTION getColorFormats(css JSON)
3+
RETURNS ARRAY<STRUCT<name STRING, value INT64>>
4+
LANGUAGE js
5+
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
6+
AS r'''
7+
try {
8+
function compute(ast) {
9+
let usage = {
10+
hex: {
11+
"3": 0, "4": 0,
12+
"6": 0, "8": 0
13+
},
14+
functions: {},
15+
alpha: {},
16+
keywords: {},
17+
system: {},
18+
currentcolor: 0,
19+
transparent: 0,
20+
args: {commas: 0, nocommas: 0},
21+
spaces: {},
22+
p3: {sRGB_in: 0, sRGB_out: 0}
23+
};
24+
25+
const keywords = [
26+
"aliceblue", "antiquewhite", "aqua", "aquamarine", "azure", "beige", "bisque", "black", "blanchedalmond", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse",
27+
"chocolate", "coral", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgray", "darkgreen", "darkgrey", "darkkhaki", "darkmagenta",
28+
"darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkslategray", "darkslategrey", "darkturquoise", "darkviolet",
29+
"deeppink", "deepskyblue", "dimgray", "dimgrey", "dodgerblue", "firebrick", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "gray",
30+
"green", "greenyellow", "grey", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral",
31+
"lightcyan", "lightgoldenrodyellow", "lightgray", "lightgreen", "lightgrey", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightslategray", "lightslategrey",
32+
"lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen",
33+
"mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace",
34+
"olive", "olivedrab", "orange", "orangered", "orchid", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum",
35+
"powderblue", "purple", "rebeccapurple", "red", "rosybrown", "royalblue", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue",
36+
"slateblue", "slategray", "slategrey", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke",
37+
"yellow", "yellowgreen"
38+
];
39+
40+
const system = [
41+
"ActiveBorder", "ActiveCaption", "AppWorkspace", "Background", "ButtonFace", "ButtonHighlight", "ButtonShadow", "ButtonText", "CaptionText",
42+
"GrayText", "Highlight", "HighlightText", "InactiveBorder", "InactiveCaption", "InactiveCaptionText", "InfoBackground", "InfoText",
43+
"Menu", "MenuText", "Scrollbar", "ThreeDDarkShadow", "ThreeDFace", "ThreeDHighlight", "ThreeDLightShadow", "ThreeDShadow", "Window", "WindowFrame", "WindowText"
44+
];
45+
46+
// Lookbehind to prevent matching on e.g. var(--color-red)
47+
const keywordRegex = RegExp(`\\b(?<!\-)(?:${keywords.join("|")})\\b`, "gi");
48+
const systemRegex = RegExp(`\\b(?<!\-)(?:${system.join("|")})\\b`, "gi");
49+
const functionNames = /^(?:rgba?|hsla?|color|lab|lch|hwb)$/gi;
50+
51+
function countMatches(haystack, needle) {
52+
let ret = 0;
53+
54+
for (let match of haystack.matchAll(needle)) {
55+
ret++;
56+
}
57+
58+
return ret;
59+
}
60+
61+
// given an array of display-p3 RGB values in range [0-1],
62+
// return true if inside sRGB gamut
63+
function P3inSRGB(coords) {
64+
let srgb = lin_P3_to_sRGB(linearize_p3(coords));
65+
// Note, we don't need to apply the sRGB transfer function
66+
// because it does not affect whether a value is out of gamut
67+
return srgb.every(c => c >= 0 && c <= 1);
68+
}
69+
70+
// given an array of display-p3 RGB values in range [0-1],
71+
// undo gamma correction to get linear light values
72+
function linearize_p3 (P3) {
73+
return P3.map(val => {
74+
if (val < 0.04045) {
75+
return val / 12.92;
76+
}
77+
return ((val + 0.055) / 1.055) ** 2.4;
78+
});
79+
}
80+
81+
// given an array of linear-light display-p3 RGB values in range [0-1],
82+
// convert to CIE XYZ and then to linear-light sRGB
83+
// The two linear operations are combined into a single matrix.
84+
// The matrix multiply is hard-coded, for efficiency
85+
function lin_P3_to_sRGB (linP3) {
86+
let [r, g, b] = linP3;
87+
88+
return [
89+
1.2247452561927687 * r + -0.22490435913073928 * g + 1.8500279863609137e-8 * b,
90+
-0.04205792199232122 * r + 1.0420810071506164 * g + -1.585738278880866e-8 * b,
91+
-0.019642279587426013 * r + -0.07865491660582305 * g + 1.098537193883219 * b
92+
];
93+
}
94+
95+
walkDeclarations(ast, ({property, value}) => {
96+
if (value.length > 1000) return;
97+
// First remove url() references to avoid them mucking the results
98+
for (let f of extractFunctionCalls(value, {names: "url"})) {
99+
let [start, end] = f.pos;
100+
value = value.substring(0, start) + "url()" + " ".repeat(end - start - 5) + value.substring(end);
101+
}
102+
103+
usage.hex[3] += countMatches(value, /#[a-f0-9]{3}\\b/gi);
104+
usage.hex[4] += countMatches(value, /#[a-f0-9]{4}\\b/gi);
105+
usage.hex[6] += countMatches(value, /#[a-f0-9]{6}\\b/gi);
106+
usage.hex[8] += countMatches(value, /#[a-f0-9]{8}\\b/gi);
107+
108+
for (let f of extractFunctionCalls(value, {names: functionNames})) {
109+
let {name, args} = f;
110+
111+
incrementByKey(usage.functions, name);
112+
incrementByKey(usage.args, (args.indexOf(",") > -1? "" : "no") + "commas");
113+
114+
switch (name) {
115+
case 'rgba':
116+
case 'hsla':
117+
// The function name implies that they use alpha.
118+
incrementByKey(usage.alpha, name)
119+
break;
120+
case 'rgb':
121+
case 'hsl':
122+
case 'color':
123+
case 'lab':
124+
case 'lch':
125+
case 'hwb':
126+
// Check if the function uses the special "/" syntax or fourth arg for alpha.
127+
if (args.includes('/') || args.trim().split(/[\s+,/]+/).length == 4) {
128+
incrementByKey(usage.alpha, name);
129+
}
130+
break;
131+
}
132+
133+
if (name === "color") {
134+
// Let's look at color() more closely
135+
let match = args.match(/^(?<space>[\w-]+)\s+(?<params>[-\d\\s.%\/]+)$/);
136+
137+
if (match) {
138+
let {space, params} = match.groups;
139+
140+
incrementByKey(usage.spaces, space);
141+
142+
if (space === "display-p3") {
143+
let percents = params.indexOf("%") > -1;
144+
let coords = params.trim().split(/\s+/).map(c => parseFloat(c) / (percents? 100 : 1));
145+
146+
usage.p3["sRGB_" + (P3inSRGB(coords)? "in" : "out")]++;
147+
}
148+
}
149+
}
150+
}
151+
152+
for (let match of value.matchAll(keywordRegex)) {
153+
incrementByKey(usage.keywords, match[0].toLowerCase());
154+
}
155+
156+
for (let match of value.matchAll(systemRegex)) {
157+
incrementByKey(usage.system, system.find(kw => kw.toLowerCase() == match[0].toLowerCase()));
158+
}
159+
160+
for (let match of value.matchAll(/\b(?<!\-)(?:currentColor|transparent)\b/gi)) {
161+
incrementByKey(usage, match[0].toLowerCase());
162+
}
163+
}, {
164+
properties: /^--|color$|^border|^background(-image)?$|\-shadow$|filter$/
165+
});
166+
167+
usage.keywords = sortObject(usage.keywords);
168+
usage.system = sortObject(usage.system);
169+
170+
return usage;
171+
}
172+
173+
let color = compute(css);
174+
return [
175+
{name: 'rgb()', value: color.alpha.rgb},
176+
{name: 'rgba()', value: color.alpha.rgba},
177+
{name: 'hsl()', value: color.alpha.hsl},
178+
{name: 'hsla()', value: color.alpha.hsla},
179+
{name: 'color()', value: color.alpha.color},
180+
{name: 'lch()', value: color.alpha.lch},
181+
{name: 'hwb()', value: color.alpha.hwb},
182+
{name: 'lab()', value: color.alpha.lab}
183+
];
184+
} catch (e) {
185+
return [];
186+
}
187+
''';
188+
189+
WITH totals AS (
190+
SELECT
191+
client,
192+
COUNT(0) AS total_pages
193+
FROM
194+
`httparchive.crawl.pages`
195+
WHERE
196+
date = '2025-07-01' AND
197+
is_root_page -- remove if wanna look at home pages AND inner pages. Old tables only had home pages.
198+
GROUP BY
199+
client
200+
)
201+
202+
SELECT
203+
client,
204+
name AS format,
205+
COUNT(DISTINCT page) AS pages,
206+
ANY_VALUE(total_pages) AS total_pages,
207+
COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages,
208+
SUM(value) AS freq,
209+
SUM(SUM(value)) OVER (PARTITION BY client) AS total,
210+
SUM(value) / SUM(SUM(value)) OVER (PARTITION BY client) AS pct
211+
FROM (
212+
SELECT
213+
client,
214+
page,
215+
format.name,
216+
format.value
217+
FROM
218+
`httparchive.crawl.parsed_css`,
219+
UNNEST(getColorFormats(css)) AS format
220+
WHERE
221+
date = '2025-07-01' AND
222+
rank <= 1000000 AND
223+
format.value IS NOT NULL
224+
)
225+
JOIN
226+
totals
227+
USING (client)
228+
GROUP BY
229+
client,
230+
format
231+
ORDER BY
232+
pct DESC

0 commit comments

Comments
 (0)