Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
05c2be5
init
max-ostapenko Jan 12, 2026
73efcee
update chart formatting script
max-ostapenko Jan 13, 2026
40149d5
text
max-ostapenko Jan 14, 2026
79bfd6c
Merge branch 'main' into privacy-markdown-2025
max-ostapenko Jan 14, 2026
e7314a9
chapter + tools
max-ostapenko Jan 14, 2026
941752b
Optimised images with calibre/image-actions
github-actions[bot] Jan 14, 2026
44bf6f3
lint
max-ostapenko Jan 14, 2026
3f6c1e1
Merge branch 'privacy-markdown-2025' of https://github.com/HTTPArchiv…
max-ostapenko Jan 14, 2026
585fe72
lint
max-ostapenko Jan 14, 2026
8bb1809
lint
max-ostapenko Jan 14, 2026
92c584b
Merge branch 'main' into privacy-markdown-2025
max-ostapenko Jan 14, 2026
b6f4b85
readme for chart tools
max-ostapenko Jan 15, 2026
0f6ee30
revert changes
max-ostapenko Jan 15, 2026
eabf2c9
lint
max-ostapenko Jan 15, 2026
635cc18
fix
max-ostapenko Jan 15, 2026
aee4df9
CodeQL fix
max-ostapenko Jan 15, 2026
cfc3dbb
Update src/content/en/2025/privacy.md
max-ostapenko Jan 15, 2026
6a57098
Merge branch 'privacy-markdown-2025' of https://github.com/HTTPArchiv…
max-ostapenko Jan 15, 2026
c944424
jannis's suggestion
max-ostapenko Jan 15, 2026
59e77b8
Apply suggestion from @JannisBush
max-ostapenko Jan 15, 2026
ea1c907
nrllh as 3rd author
max-ostapenko Jan 15, 2026
160abc0
copilot review
max-ostapenko Jan 15, 2026
f06f529
Merge branch 'main' into privacy-markdown-2025
max-ostapenko Jan 15, 2026
b64a48a
Update src/content/en/2025/privacy.md
max-ostapenko Jan 15, 2026
abd35c4
Update src/content/en/2025/privacy.md
max-ostapenko Jan 15, 2026
1a35552
Metadata cleanup
tunetheweb Jan 15, 2026
8e8e86a
Interationalise links with no translations
tunetheweb Jan 15, 2026
6fa352f
Code formatting and smart quotes
tunetheweb Jan 15, 2026
23b356c
Headings
tunetheweb Jan 15, 2026
4cbc9b1
Big number
tunetheweb Jan 15, 2026
6d40b07
Misc edits
tunetheweb Jan 15, 2026
1aaec86
Chart title for Clients Hints
tunetheweb Jan 15, 2026
08e3bfb
Merge branch 'main' into privacy-markdown-2025
max-ostapenko Jan 15, 2026
011f26b
Update src/content/en/2025/privacy.md
max-ostapenko Jan 15, 2026
c696dba
featured stats
max-ostapenko Jan 15, 2026
a2154e1
new images
max-ostapenko Jan 15, 2026
abdc018
Optimised images with calibre/image-actions
github-actions[bot] Jan 15, 2026
721ba3f
capitalized client names for charts
max-ostapenko Jan 15, 2026
c76e91d
lint
max-ostapenko Jan 15, 2026
1b74a27
mention the UA reduction upgrade
max-ostapenko Jan 15, 2026
98281db
fix
max-ostapenko Jan 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sql/2025/privacy/bounce_domains_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,6 @@ FROM bounce_sequences
ANY_VALUE(websites_pct) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop, cnt_mobile AS mobile_count, cnt_desktop AS desktop_count
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop, cnt_mobile AS mobile_count, cnt_desktop AS desktop_count
|> ORDER BY COALESCE(mobile_count, 0) + COALESCE(desktop_count, 0) DESC
|> LIMIT 100
2 changes: 1 addition & 1 deletion sql/2025/privacy/client_hints_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ GROUP BY client, value
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/client_hints_usage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,4 @@ GROUP BY all_accept_ch.client
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
4 changes: 1 addition & 3 deletions sql/2025/privacy/cookie_domains_third_party_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ GROUP BY client, cookie_domain
ANY_VALUE(pct_domains) AS pct_domains
FOR client IN ('desktop', 'mobile')
)
|> RENAME
pct_domains_mobile AS mobile,
pct_domains_desktop AS desktop
|> RENAME pct_domains_mobile AS Mobile, pct_domains_desktop AS Desktop
|> ORDER BY COALESCE(domain_count_mobile, 0) + COALESCE(domain_count_desktop, 0) DESC
|> LIMIT 1000
4 changes: 1 addition & 3 deletions sql/2025/privacy/cookies_first_party_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ GROUP BY client, cookie_name
ANY_VALUE(pct_domains) AS pct_domains
FOR client IN ('desktop', 'mobile')
)
|> RENAME
pct_domains_mobile AS mobile,
pct_domains_desktop AS desktop
|> RENAME pct_domains_mobile AS Mobile, pct_domains_desktop AS Desktop
|> ORDER BY COALESCE(domain_count_mobile, 0) + COALESCE(domain_count_desktop, 0) DESC
|> LIMIT 1000
4 changes: 1 addition & 3 deletions sql/2025/privacy/cookies_third_party_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ GROUP BY client, cookie_details
ANY_VALUE(pct_domains) AS pct_domains
FOR client IN ('desktop', 'mobile')
)
|> RENAME
pct_domains_mobile AS mobile,
pct_domains_desktop AS desktop
|> RENAME pct_domains_mobile AS Mobile, pct_domains_desktop AS Desktop
|> ORDER BY COALESCE(domain_count_mobile, 0) + COALESCE(domain_count_desktop, 0) DESC
|> LIMIT 1000
2 changes: 1 addition & 1 deletion sql/2025/privacy/dnt_usage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ FROM `httparchive.blink_features.usage`
ANY_VALUE(pct_urls) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY rank ASC
2 changes: 1 addition & 1 deletion sql/2025/privacy/fingerprinting_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ GROUP BY client, technology.technology
ANY_VALUE(websites_pct) AS websites_pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME websites_pct_mobile AS mobile, websites_pct_desktop AS desktop
|> RENAME websites_pct_mobile AS Mobile, websites_pct_desktop AS Desktop
|> ORDER BY websites_count_mobile + websites_count_desktop DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/iab_tcf_v2_cmps_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ GROUP BY client, cmpId
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/iab_tcf_v2_countries_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ FROM base_data
ANY_VALUE(pct_of_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/iab_usage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,5 @@ FROM aggregated,
ANY_VALUE(number_of_websites) AS websites_count
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY websites_count_desktop + websites_count_mobile DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/iab_usp_strings_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ FROM `httparchive.crawl.pages`
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/referrer_policy_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ FROM referrer_policy_custom_metrics
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/referrer_policy_usage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,5 @@ FROM aggregated,
ANY_VALUE(pct) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY mobile + desktop DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/related_origin_trials_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,5 @@ FROM aggregated
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/tracker_categories_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ FROM `httparchive.crawl.pages`,
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/tracker_distribution.sql
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,5 @@ FROM tracker_counts
ANY_VALUE(ccdf) AS ccdf
FOR client IN ('desktop', 'mobile')
)
|> RENAME ccdf_mobile AS mobile, ccdf_desktop AS desktop
|> RENAME ccdf_mobile AS Mobile, ccdf_desktop AS Desktop
|> ORDER BY number_of_trackers
2 changes: 1 addition & 1 deletion sql/2025/privacy/tracker_technologies_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ FROM `httparchive.crawl.pages`,
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/whotracksme_categories_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,5 @@ FROM aggregated
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
2 changes: 1 addition & 1 deletion sql/2025/privacy/whotracksme_trackers_top.sql
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ FROM `httparchive.crawl.requests`
ANY_VALUE(pct_websites) AS pct
FOR client IN ('desktop', 'mobile')
)
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
|> RENAME pct_mobile AS Mobile, pct_desktop AS Desktop
|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,39 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Authenticate the user\n",
"import sys\n",
"\n",
"if 'google.colab' in sys.modules:\n",
" from google.colab import auth\n",
"\n",
" auth.authenticate_user()\n",
" credentials = auth.get_user_credentials()\n",
"else:\n",
" import google.auth\n",
"\n",
" SCOPES = [\n",
" 'https://www.googleapis.com/auth/spreadsheets'\n",
" ]\n",
" credentials, project = google.auth.default(scopes=SCOPES)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "cOcbpC6qRou_"
},
"outputs": [],
"source": [
"from google.colab import auth\n",
"from googleapiclient.discovery import build\n",
"\n",
"sheets_service = build('sheets', 'v4', cache_discovery=False, credentials=credentials)\n",
"\n",
"def update_chart_size(spreadsheet_id, is_dry_run=False, target_width=600, target_height=371):\n",
" response = sheets_service.spreadsheets().get(spreadsheetId=spreadsheet_id, includeGridData=False).execute()\n",
" sheets = response.get('sheets', [])\n",
Expand Down Expand Up @@ -54,26 +78,22 @@
" print(f\"\"\"sheet: {sheet['properties']['title']},\n",
"chart: {chart['spec']['title']},\n",
"dimensions: {chart['position']['overlayPosition']['widthPixels']} x {chart['position']['overlayPosition'].get('heightPixels', 'N/A')}\n",
" \"\"\")\n",
"\n",
"# Authenticate the user\n",
"auth.authenticate_user()\n",
"sheets_service = build('sheets', 'v4', cache_discovery=False)"
" \"\"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {
"id": "vp1izUBSLxp9"
},
"outputs": [],
"source": [
"# Replace this with the ID of your Google Sheets file\n",
"SPREADSHEET_ID = '18r8cT6x9lPdM-rXvXjsqx84W7ZDdTDYGD59xr0UGOwg'\n",
"SPREADSHEET_ID = '1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4'\n",
"\n",
"# Call the function to update the chart width\n",
"update_chart_size(SPREADSHEET_ID, target_height=None, is_dry_run=True)"
"update_chart_size(SPREADSHEET_ID, target_height=None, is_dry_run=False)"
]
}
],
Expand All @@ -85,11 +105,21 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.14.2"
}
},
"nbformat": 4,
Expand Down
80 changes: 80 additions & 0 deletions sql/util/generate_figure_markup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import re
import os
from googleapiclient.discovery import build # pylint: disable=import-error
import google.auth # pylint: disable=import-error

# Configuration
SPREADSHEET_ID = '1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4'
PUBCHART_ID = '2PACX-1vRC5wrzy5NEsWNHn9w38RLsMURRScnP4jgjO1mDiVhsfFCY55tujlTUZhUaEWzmPtJza0QA7w8S4uK5'
SQL_DIR = '../2025/privacy' # Relative to this script's location

SCOPES = ['https://www.googleapis.com/auth/spreadsheets']


def get_sql_to_sheet_map(sql_dir):
mapping = {}
if not os.path.exists(sql_dir):
print(f"Directory not found: {sql_dir}")
return mapping
for filename in os.listdir(sql_dir):
if filename.endswith(".sql"):
# Generate sheet name from filename using the regex:
# re.sub(r'(\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()
sheet_name = re.sub(r'(\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()
mapping[sheet_name] = filename
return mapping


def generate_figure_markup(spreadsheet_id, sql_dir):
try:
credentials, project = google.auth.default(scopes=SCOPES)
sheets_service = build('sheets', 'v4', cache_discovery=False, credentials=credentials)
except Exception as e:
print(f"Authentication failed: {e}")
print("Please ensure you have application default credentials set up.")
return

sql_map = get_sql_to_sheet_map(sql_dir)
response = sheets_service.spreadsheets().get(spreadsheetId=spreadsheet_id, includeGridData=False).execute()
sheets = response.get('sheets', [])

for sheet in sheets:
sheet_name = sheet['properties']['title']
sheet_id = sheet['properties']['sheetId']
charts = sheet.get('charts', [])

sql_file = sql_map.get(sheet_name)
if not sql_file:
# Try to match case-insensitively or show warning
sql_file = "TODO.sql"

for chart in charts:
title = chart['spec'].get('title', 'Untitled Chart')
chart_id = chart['chartId']

# Slugify for image name
image_name = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-') + ".png"

# Construct markup
markup = f"""{{{{ figure_markup(
image="{image_name}",
caption="{title}",
description="",
chart_url="https://docs.google.com/spreadsheets/d/e/{PUBCHART_ID}/pubchart?oid={chart_id}&format=interactive",
sheets_gid="{sheet_id}",
sql_file="{sql_file}"
)
}}}}"""
print(markup)
print()


if __name__ == "__main__":
# Resolve relative SQL_DIR based on script location
script_dir = os.path.dirname(os.path.abspath(__file__))
absolute_sql_dir = os.path.normpath(os.path.join(script_dir, SQL_DIR))

print(f"Processing Spreadsheet: {SPREADSHEET_ID}")
print(f"SQL Directory: {absolute_sql_dir}\n")

generate_figure_markup(SPREADSHEET_ID, absolute_sql_dir)
2 changes: 2 additions & 0 deletions sql/util/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
pandas==2.3.3
google-cloud-bigquery==3.40.0
google-api-python-client==2.188.0
google-auth==2.47.0
requests==2.32.5
tabulate==0.9.0
gspread==6.2.1
Expand Down
1 change: 1 addition & 0 deletions src/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ templates/sitemap.xml
static/html/
static/js/web-vitals.js
.coverage
eng.traineddata
Loading
Loading