-
-
Notifications
You must be signed in to change notification settings - Fork 209
Expand file tree
/
Copy pathprint_page_pseudo_classes.sql
More file actions
84 lines (76 loc) · 1.73 KB
/
print_page_pseudo_classes.sql
File metadata and controls
84 lines (76 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#standardSQL
CREATE TEMPORARY FUNCTION getSelectorParts(css STRING)
RETURNS ARRAY<STRING> LANGUAGE js
OPTIONS (library = "gs://httparchive/lib/css-utils.js")
AS '''
try {
function compute(ast) {
let ret = {
"pseudo-class": {}
};
walkRules(ast, rule => {
walkSelectors(rule, selector => {
let sast = parsel.parse(selector, {list: false});
parsel.walk(sast, node => {
if (node.type in ret) {
incrementByKey(ret[node.type], node.name);
}
}, {subtree: true});
});
}, {type: 'page'});
for (let type in ret) {
ret[type] = sortObject(ret[type]);
}
return ret;
}
function unzip(obj) {
return Object.entries(obj).filter(([name, value]) => {
return !isNaN(value);
}).map(([name, value]) => name);
}
const ast = JSON.parse(css);
let parts = compute(ast);
return unzip(parts['pseudo-class']);
} catch (e) {
return null;
}
''';
WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total_pages
FROM
`httparchive.summary_pages.2022_07_01_*` -- noqa: CV09
GROUP BY
client
)
SELECT
client,
pseudo_class,
COUNT(DISTINCT page) AS pages,
ANY_VALUE(total_pages) AS total_pages,
COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total_freq,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct_freq
FROM (
SELECT DISTINCT
client,
page,
pseudo_class
FROM
`httparchive.almanac.parsed_css`
LEFT JOIN
UNNEST(getSelectorParts(css)) AS pseudo_class
WHERE
date = '2022-07-01' AND
pseudo_class IS NOT NULL
)
JOIN
totals
USING (client)
GROUP BY
client,
pseudo_class
ORDER BY
pct_pages DESC