|
| 1 | +#standardSQL |
| 2 | +CREATE TEMPORARY FUNCTION getP3Usage(css JSON) |
| 3 | +RETURNS ARRAY<STRUCT<name STRING, value INT64>> |
| 4 | +LANGUAGE js |
| 5 | +OPTIONS (library = "gs://httparchive/lib/css-utils.js") |
| 6 | +AS ''' |
| 7 | +try { |
| 8 | + function compute(ast) { |
| 9 | + let usage = { |
| 10 | + hex: { |
| 11 | + "3": 0, "4": 0, |
| 12 | + "6": 0, "8": 0 |
| 13 | + }, |
| 14 | + functions: {}, |
| 15 | + keywords: {}, |
| 16 | + system: {}, |
| 17 | + currentcolor: 0, |
| 18 | + transparent: 0, |
| 19 | + args: {commas: 0, nocommas: 0}, |
| 20 | + spaces: {}, |
| 21 | + p3: {sRGB_in: 0, sRGB_out: 0} |
| 22 | + }; |
| 23 | +
|
| 24 | + const keywords = [ |
| 25 | + "aliceblue", "antiquewhite", "aqua", "aquamarine", "azure", "beige", "bisque", "black", "blanchedalmond", "blue", "blueviolet", "brown", "burlywood", "cadetblue", "chartreuse", |
| 26 | + "chocolate", "coral", "cornflowerblue", "cornsilk", "crimson", "cyan", "darkblue", "darkcyan", "darkgoldenrod", "darkgray", "darkgreen", "darkgrey", "darkkhaki", "darkmagenta", |
| 27 | + "darkolivegreen", "darkorange", "darkorchid", "darkred", "darksalmon", "darkseagreen", "darkslateblue", "darkslategray", "darkslategrey", "darkturquoise", "darkviolet", |
| 28 | + "deeppink", "deepskyblue", "dimgray", "dimgrey", "dodgerblue", "firebrick", "floralwhite", "forestgreen", "fuchsia", "gainsboro", "ghostwhite", "gold", "goldenrod", "gray", |
| 29 | + "green", "greenyellow", "grey", "honeydew", "hotpink", "indianred", "indigo", "ivory", "khaki", "lavender", "lavenderblush", "lawngreen", "lemonchiffon", "lightblue", "lightcoral", |
| 30 | + "lightcyan", "lightgoldenrodyellow", "lightgray", "lightgreen", "lightgrey", "lightpink", "lightsalmon", "lightseagreen", "lightskyblue", "lightslategray", "lightslategrey", |
| 31 | + "lightsteelblue", "lightyellow", "lime", "limegreen", "linen", "magenta", "maroon", "mediumaquamarine", "mediumblue", "mediumorchid", "mediumpurple", "mediumseagreen", |
| 32 | + "mediumslateblue", "mediumspringgreen", "mediumturquoise", "mediumvioletred", "midnightblue", "mintcream", "mistyrose", "moccasin", "navajowhite", "navy", "oldlace", |
| 33 | + "olive", "olivedrab", "orange", "orangered", "orchid", "palegoldenrod", "palegreen", "paleturquoise", "palevioletred", "papayawhip", "peachpuff", "peru", "pink", "plum", |
| 34 | + "powderblue", "purple", "rebeccapurple", "red", "rosybrown", "royalblue", "saddlebrown", "salmon", "sandybrown", "seagreen", "seashell", "sienna", "silver", "skyblue", |
| 35 | + "slateblue", "slategray", "slategrey", "snow", "springgreen", "steelblue", "tan", "teal", "thistle", "tomato", "turquoise", "violet", "wheat", "white", "whitesmoke", |
| 36 | + "yellow", "yellowgreen" |
| 37 | + ]; |
| 38 | +
|
| 39 | + const system = [ |
| 40 | + "ActiveBorder", "ActiveCaption", "AppWorkspace", "Background", "ButtonFace", "ButtonHighlight", "ButtonShadow", "ButtonText", "CaptionText", |
| 41 | + "GrayText", "Highlight", "HighlightText", "InactiveBorder", "InactiveCaption", "InactiveCaptionText", "InfoBackground", "InfoText", |
| 42 | + "Menu", "MenuText", "Scrollbar", "ThreeDDarkShadow", "ThreeDFace", "ThreeDHighlight", "ThreeDLightShadow", "ThreeDShadow", "Window", "WindowFrame", "WindowText" |
| 43 | + ]; |
| 44 | +
|
| 45 | + // Lookbehind to prevent matching on e.g. var(--color-red) |
| 46 | + const keywordRegex = RegExp(`\\\\b(?<!\\-)(?:${keywords.join("|")})\\\\b`, "gi"); |
| 47 | + const systemRegex = RegExp(`\\\\b(?<!\\-)(?:${system.join("|")})\\\\b`, "gi"); |
| 48 | + const functionNames = /^(?:rgba?|hsla?|color|lab|lch|hwb)$/gi; |
| 49 | +
|
| 50 | + function countMatches(haystack, needle) { |
| 51 | + let ret = 0; |
| 52 | +
|
| 53 | + for (let match of haystack.matchAll(needle)) { |
| 54 | + ret++; |
| 55 | + } |
| 56 | +
|
| 57 | + return ret; |
| 58 | + } |
| 59 | +
|
| 60 | + // given an array of display-p3 RGB values in range [0-1], |
| 61 | + // return true if inside sRGB gamut |
| 62 | + function P3inSRGB(coords) { |
| 63 | + let srgb = lin_P3_to_sRGB(linearize_p3(coords)); |
| 64 | + // Note, we don't need to apply the sRGB transfer function |
| 65 | + // because it does not affect whether a value is out of gamut |
| 66 | + return srgb.every(c => c >= 0 && c <= 1); |
| 67 | + } |
| 68 | + |
| 69 | + // given an array of display-p3 RGB values in range [0-1], |
| 70 | + // undo gamma correction to get linear light values |
| 71 | + function linearize_p3 (P3) { |
| 72 | + return P3.map(val => { |
| 73 | + if (val < 0.04045) { |
| 74 | + return val / 12.92; |
| 75 | + } |
| 76 | + return ((val + 0.055) / 1.055) ** 2.4; |
| 77 | + }); |
| 78 | + } |
| 79 | + |
| 80 | + // given an array of linear-light display-p3 RGB values in range [0-1], |
| 81 | + // convert to CIE XYZ and then to linear-light sRGB |
| 82 | + // The two linear operations are combined into a single matrix. |
| 83 | + // The matrix multiply is hard-coded, for efficiency |
| 84 | + function lin_P3_to_sRGB (linP3) { |
| 85 | + let [r, g, b] = linP3; |
| 86 | + |
| 87 | + return [ |
| 88 | + 1.2247452561927687 * r + -0.22490435913073928 * g + 1.8500279863609137e-8 * b, |
| 89 | + -0.04205792199232122 * r + 1.0420810071506164 * g + -1.585738278880866e-8 * b, |
| 90 | + -0.019642279587426013 * r + -0.07865491660582305 * g + 1.098537193883219 * b |
| 91 | + ]; |
| 92 | + } |
| 93 | + |
| 94 | + walkDeclarations(ast, ({property, value}) => { |
| 95 | + if (value.length > 1000) return; |
| 96 | + // First remove url() references to avoid them mucking the results |
| 97 | + for (let f of extractFunctionCalls(value, {names: "url"})) { |
| 98 | + let [start, end] = f.pos; |
| 99 | + value = value.substring(0, start) + "url()" + " ".repeat(end - start - 5) + value.substring(end); |
| 100 | + } |
| 101 | + |
| 102 | + usage.hex[3] += countMatches(value, /#[a-f0-9]{3}\\b/gi); |
| 103 | + usage.hex[4] += countMatches(value, /#[a-f0-9]{4}\\b/gi); |
| 104 | + usage.hex[6] += countMatches(value, /#[a-f0-9]{6}\\b/gi); |
| 105 | + usage.hex[8] += countMatches(value, /#[a-f0-9]{8}\\b/gi); |
| 106 | + |
| 107 | + for (let f of extractFunctionCalls(value, {names: functionNames})) { |
| 108 | + let {name, args} = f; |
| 109 | + |
| 110 | + incrementByKey(usage.functions, name); |
| 111 | + incrementByKey(usage.args, (args.indexOf(",") > -1? "" : "no") + "commas"); |
| 112 | + |
| 113 | + if (name === "color") { |
| 114 | + // Let's look at color() more closely |
| 115 | + let match = args.match(/^(?<space>[\\w-]+)\\s+(?<params>[-\\d\\s.%\\/]+)$/); |
| 116 | +
|
| 117 | + if (match) { |
| 118 | + let {space, params} = match.groups; |
| 119 | +
|
| 120 | + incrementByKey(usage.spaces, space); |
| 121 | +
|
| 122 | + if (space === "display-p3") { |
| 123 | + let percents = params.indexOf("%") > -1; |
| 124 | + let coords = params.trim().split(/\\s+/).map(c => parseFloat(c) / (percents? 100 : 1)); |
| 125 | +
|
| 126 | + usage.p3["sRGB_" + (P3inSRGB(coords)? "in" : "out")]++; |
| 127 | + } |
| 128 | + } |
| 129 | + } |
| 130 | + } |
| 131 | +
|
| 132 | + for (let match of value.matchAll(keywordRegex)) { |
| 133 | + incrementByKey(usage.keywords, match[0].toLowerCase()); |
| 134 | + } |
| 135 | +
|
| 136 | + for (let match of value.matchAll(systemRegex)) { |
| 137 | + incrementByKey(usage.system, system.find(kw => kw.toLowerCase() == match[0].toLowerCase())); |
| 138 | + } |
| 139 | +
|
| 140 | + for (let match of value.matchAll(/\\b(?<!\\-)(?:currentColor|transparent)\\b/gi)) { |
| 141 | + incrementByKey(usage, match[0].toLowerCase()); |
| 142 | + } |
| 143 | + }, { |
| 144 | + properties: /^--|color$|^border|^background(-image)?$|\\-shadow$|filter$/ |
| 145 | + }); |
| 146 | +
|
| 147 | + usage.keywords = sortObject(usage.keywords); |
| 148 | + usage.system = sortObject(usage.system); |
| 149 | +
|
| 150 | + return usage; |
| 151 | + } |
| 152 | +
|
| 153 | + let color = compute(css); |
| 154 | + if (!color.p3.sRGB_in && !color.p3.sRGB_out) return []; |
| 155 | + return Object.entries(color.p3).map(([name, value]) => ({name, value})); |
| 156 | +} catch (e) { |
| 157 | + return []; |
| 158 | +} |
| 159 | +'''; |
| 160 | + |
| 161 | +WITH totals AS ( |
| 162 | + SELECT |
| 163 | + client, |
| 164 | + COUNT(0) AS total_pages |
| 165 | + FROM |
| 166 | + `httparchive.crawl.pages` |
| 167 | + WHERE |
| 168 | + date = '2025-07-01' AND |
| 169 | + rank <= 1000000 AND |
| 170 | + is_root_page -- remove if wanna look at home pages AND inner pages. Old tables only had home pages. |
| 171 | + GROUP BY |
| 172 | + client |
| 173 | +) |
| 174 | + |
| 175 | +SELECT |
| 176 | + client, |
| 177 | + name AS p3, |
| 178 | + COUNT(DISTINCT page) AS pages, |
| 179 | + ANY_VALUE(total_pages) AS total_pages, |
| 180 | + COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages, |
| 181 | + SUM(value) AS freq, |
| 182 | + SUM(SUM(value)) OVER (PARTITION BY client) AS total, |
| 183 | + SAFE_DIVIDE(SUM(value), SUM(SUM(value)) OVER (PARTITION BY client)) AS pct |
| 184 | +FROM ( |
| 185 | + SELECT |
| 186 | + client, |
| 187 | + page, |
| 188 | + p3.name, |
| 189 | + p3.value |
| 190 | + FROM |
| 191 | + `httparchive.crawl.parsed_css`, |
| 192 | + UNNEST(getP3Usage(css)) AS p3 |
| 193 | + WHERE |
| 194 | + date = '2025-07-01' AND |
| 195 | + rank <= 1000000 |
| 196 | +) |
| 197 | +JOIN |
| 198 | + totals |
| 199 | +USING (client) |
| 200 | +GROUP BY |
| 201 | + client, |
| 202 | + p3 |
| 203 | +ORDER BY |
| 204 | + pct DESC |
0 commit comments