Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ jobs:
VALIDATE_MARKDOWN: true
VALIDATE_PYTHON_PYLINT: true
VALIDATE_PYTHON_FLAKE8: true
VALIDATE_SQLFLUFF: true
VALIDATE_YAML: true

dependabot:
Expand Down Expand Up @@ -73,7 +72,6 @@ jobs:
steps.metadata.outputs.update-type == 'version-update:semver-minor'
) && (
contains(steps.metadata.outputs.dependency-names, 'prettier') ||
contains(steps.metadata.outputs.dependency-names, 'sqlfluff') ||
contains(steps.metadata.outputs.dependency-names, 'super-linter')
)
run: gh pr merge --admin --squash "$PR_URL"
Expand Down
50 changes: 44 additions & 6 deletions .github/workflows/lintsql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ on:
workflow_dispatch:
pull_request:
paths:
src/requirements.txt
- src/requirements.txt
- 'sql/**.sql'
jobs:
lint:
name: Lint SQL
Expand All @@ -23,9 +24,46 @@ jobs:
with:
python-version: '3.12'
- name: Lint SQL code
if: |
github.event_name == 'workflow_dispatch' ||
startsWith(github.event.pull_request.title,'Bump sqlfluff') == true
run: |
pip install -r src/requirements.txt
sqlfluff lint sql -p 4
pip install -r src/requirements.txt -q
if [ "${{ github.event_name }}" == "workflow_dispatch" ] || \
[[ "${{ github.event.pull_request.title }}" == Bump\ sqlfluff* ]]; then
# Lint all SQL files for workflow_dispatch or sqlfluff bumps
sqlfluff lint sql -p 4
else
# Lint only changed SQL files
git diff --name-only --diff-filter=ACMRT origin/${{ github.base_ref }}...HEAD \
| grep '\.sql$' \
| xargs -r sqlfluff lint
fi


dependabot:
name: Dependabot auto-merge
runs-on: ubuntu-latest
needs: lint
if: |
github.event.pull_request.user.login == 'dependabot[bot]' &&
github.repository == 'HTTPArchive/almanac.httparchive.org'

permissions:
contents: write
pull-requests: write

steps:
- name: Dependabot metadata
id: metadata
uses: dependabot/fetch-metadata@v2
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"

- name: Enable auto-merge for Dependabot PRs
if: |
(
steps.metadata.outputs.update-type == 'version-update:semver-patch' ||
steps.metadata.outputs.update-type == 'version-update:semver-minor'
) && contains(steps.metadata.outputs.dependency-names, 'sqlfluff')
run: gh pr merge --admin --squash "$PR_URL"
env:
PR_URL: ${{github.event.pull_request.html_url}}
GH_TOKEN: ${{secrets.GITHUB_TOKEN}}
34 changes: 15 additions & 19 deletions sql/2019/fonts/06_32.sql
Original file line number Diff line number Diff line change
@@ -1,23 +1,19 @@
#standardSQL
# 06_32: Top font hosts
SELECT
*
FROM (
SELECT
client,
NET.HOST(url) AS host,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct
FROM
`httparchive.almanac.requests`
WHERE
date = '2019-07-01' AND
type = 'font'
GROUP BY
client,
host
ORDER BY
freq / total DESC
)
client,
NET.HOST(url) AS host,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY client), 2) AS pct
FROM
`httparchive.almanac.requests`
WHERE
date = '2019-07-01' AND
type = 'font'
GROUP BY
client,
host
ORDER BY
freq / total DESC
LIMIT 100
Comment thread
tunetheweb marked this conversation as resolved.
96 changes: 46 additions & 50 deletions sql/2021/css/image_dimension_popularity.sql
Original file line number Diff line number Diff line change
@@ -1,59 +1,55 @@
#standardSQL
# CSS-initiated image px dimension popularity
SELECT
*
client,
height,
width,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM (
SELECT
client,
height,
width,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM (
SELECT
client,
page,
url AS img_url,
JSON_VALUE(payload, '$._initiator') AS css_url
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'image'
)
JOIN (
SELECT
client,
page,
url AS css_url
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'css'
)
USING (client, page, css_url)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
JSON_EXTRACT_SCALAR(image, '$.url') AS img_url,
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalHeight') AS INT64) AS height,
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalWidth') AS INT64) AS width
FROM
`httparchive.pages.2021_07_01_*`,
UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._Images'), '$')) AS image
)
USING (client, page, img_url)
page,
url AS img_url,
JSON_VALUE(payload, '$._initiator') AS css_url
FROM
`httparchive.almanac.requests`
WHERE
height IS NOT NULL AND
width IS NOT NULL
GROUP BY
date = '2021-07-01' AND
type = 'image'
)
JOIN (
SELECT
client,
height,
width
ORDER BY
pct DESC
page,
url AS css_url
FROM
`httparchive.almanac.requests`
WHERE
date = '2021-07-01' AND
type = 'css'
)
USING (client, page, css_url)
JOIN (
SELECT
_TABLE_SUFFIX AS client,
url AS page,
JSON_EXTRACT_SCALAR(image, '$.url') AS img_url,
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalHeight') AS INT64) AS height,
SAFE_CAST(JSON_EXTRACT_SCALAR(image, '$.naturalWidth') AS INT64) AS width
FROM
`httparchive.pages.2021_07_01_*`,
UNNEST(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._Images'), '$')) AS image
)
LIMIT 500
USING (client, page, img_url)
WHERE
height IS NOT NULL AND
width IS NOT NULL
GROUP BY
client,
height,
width
ORDER BY
pct DESC
Comment thread
max-ostapenko marked this conversation as resolved.
LIMIT 500
Comment thread
tunetheweb marked this conversation as resolved.
Outdated
52 changes: 24 additions & 28 deletions sql/2021/css/keyframes_positions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,36 +24,32 @@ try {
''';

SELECT
*
FROM (
client,
position,
COUNT(DISTINCT page) AS pages,
ANY_VALUE(total) AS total_pages,
COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getKeyframePositions(css)) AS position
JOIN (
SELECT
client,
position,
COUNT(DISTINCT page) AS pages,
ANY_VALUE(total) AS total_pages,
COUNT(DISTINCT page) / ANY_VALUE(total) AS pct_pages,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY client) AS total,
COUNT(0) / SUM(COUNT(0)) OVER (PARTITION BY client) AS pct
_TABLE_SUFFIX AS client,
COUNT(0) AS total
FROM
`httparchive.almanac.parsed_css`,
UNNEST(getKeyframePositions(css)) AS position
JOIN (
SELECT
_TABLE_SUFFIX AS client,
COUNT(0) AS total
FROM
`httparchive.summary_pages.2021_07_01_*`
GROUP BY
client
)
USING (client)
WHERE
date = '2021-07-01'
`httparchive.summary_pages.2021_07_01_*`
GROUP BY
client,
position
ORDER BY
pct DESC
client
)
USING (client)
WHERE
date = '2021-07-01'
GROUP BY
client,
position
ORDER BY
pct DESC
Comment thread
max-ostapenko marked this conversation as resolved.
LIMIT 500
2 changes: 1 addition & 1 deletion sql/2022/jamstack/jamstack_random_1000_urls.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ FROM
WHERE
methodology = '2022' AND
date = '2022-06-01'
LIMIT 1000
LIMIT 1000 -- noqa: AM09
2 changes: 1 addition & 1 deletion sql/2024/jamstack/hugo_astro_next.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
UNNEST(response_headers) AS header
WHERE
LOWER(header.name) = 'cache-control'
LIMIT 1
LIMIT 1 -- noqa: AM09
),
r'max-age=(\d+)'
) AS INT64
Expand Down
2 changes: 1 addition & 1 deletion sql/2024/jamstack/jamstack-overview.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
UNNEST(response_headers) AS header
WHERE
LOWER(header.name) = 'cache-control'
LIMIT 1
LIMIT 1 -- noqa: AM09
),
r'max-age=(\d+)'
) AS INT64
Expand Down
2 changes: 1 addition & 1 deletion sql/2024/jamstack/jamstack_distribution_by_rank.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
UNNEST(response_headers) AS header
WHERE
LOWER(header.name) = 'cache-control'
LIMIT 1
LIMIT 1 -- noqa: AM09
),
r'max-age=(\d+)'
) AS INT64
Expand Down
2 changes: 1 addition & 1 deletion sql/2024/jamstack/js_frameworks.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
UNNEST(response_headers) AS header
WHERE
LOWER(header.name) = 'cache-control'
LIMIT 1
LIMIT 1 -- noqa: AM09
),
r'max-age=(\d+)'
) AS INT64
Expand Down
2 changes: 1 addition & 1 deletion sql/2024/jamstack/paas.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
UNNEST(response_headers) AS header
WHERE
LOWER(header.name) = 'cache-control'
LIMIT 1
LIMIT 1 -- noqa: AM09
),
r'max-age=(\d+)'
) AS INT64
Expand Down
2 changes: 1 addition & 1 deletion sql/2024/jamstack/ssg.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CREATE TEMPORARY FUNCTION GET_MAX_AGE(response_headers ARRAY<STRUCT<name STRING,
UNNEST(response_headers) AS header
WHERE
LOWER(header.name) = 'cache-control'
LIMIT 1
LIMIT 1 -- noqa: AM09
),
r'max-age=(\d+)'
) AS INT64
Expand Down
2 changes: 1 addition & 1 deletion sql/2024/markup/content_encoding.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RETURNS STRING AS (
UNNEST(response_headers) AS header
WHERE
LOWER(header.name) = 'content-encoding'
LIMIT 1
LIMIT 1 -- noqa: AM09
)
);

Expand Down
1 change: 0 additions & 1 deletion sql/2024/sustainability/video_autoplay_values.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ WITH video_data AS (
WHERE
date = '2024-06-01' AND -- Updated date
is_root_page
LIMIT 10000 -- Limit the number of rows processed for faster testing
)

SELECT
Expand Down
Loading