-
-
Notifications
You must be signed in to change notification settings - Fork 209
Expand file tree
/
Copy pathimage_dimension_popularity.sql
More file actions
55 lines (55 loc) · 1.15 KB
/
image_dimension_popularity.sql
File metadata and controls
55 lines (55 loc) · 1.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#standardSQL
# CSS-initiated image px dimension popularity
SELECT
client,
height,
width,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM (
SELECT
client,
page,
url AS img_url,
JSON_VALUE(payload, '$._initiator') AS css_url
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'image'
)
JOIN (
SELECT
client,
page,
url AS css_url
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'css'
)
USING (client, page, css_url)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
JSON_EXTRACT_SCALAR(image, '$.url') AS img_url,
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalHeight') AS INT64) AS height,
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalWidth') AS INT64) AS width
FROM
`httparchive.pages.2021_07_01_*`,
UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._Images'), '$')) AS image
)
USING (client, page, img_url)
WHERE
height IS NOT NULL AND
width IS NOT NULL
GROUP BY
client,
height,
width
ORDER BY
pct DESC
LIMIT 500