Skip to content

Commit 864fddd

Browse files
committed
formatting
1 parent 9ab94bd commit 864fddd

3 files changed

Lines changed: 59 additions & 56 deletions

File tree

sql/2025/privacy/tracker_technologies_top.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,5 @@ FROM `httparchive.crawl.pages`,
2929
ANY_VALUE(pct_websites) AS pct
3030
FOR client IN ('desktop', 'mobile')
3131
)
32-
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop,
32+
|> RENAME pct_mobile AS mobile, pct_desktop AS desktop
3333
|> ORDER BY websites_count_desktop + websites_count_mobile DESC

sql/util/bq_to_sheets.ipynb

Lines changed: 58 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@
9292
"name": "stdout",
9393
"output_type": "stream",
9494
"text": [
95-
"✓ Connected to spreadsheet with 26 existing sheets\n"
95+
"✓ Connected to spreadsheet with 25 existing sheets\n"
9696
]
9797
}
9898
],
@@ -122,7 +122,7 @@
122122
},
123123
{
124124
"cell_type": "code",
125-
"execution_count": null,
125+
"execution_count": 8,
126126
"metadata": {
127127
"cellView": "form",
128128
"colab": {
@@ -137,53 +137,61 @@
137137
"name": "stdout",
138138
"output_type": "stream",
139139
"text": [
140-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
141-
"| Query | TB Billed | Sheet | Status/Skip Reason |\n",
142-
"+===========================================================================+===============+===============+==========================+\n",
143-
"| _cname_domains_top.sql | | | Filename filter mismatch |\n",
144-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
145-
"| bounce_domains_top.sql | | | Filename filter mismatch |\n",
146-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
147-
"| client_hints_top.sql | | | Filename filter mismatch |\n",
148-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
149-
"| client_hints_usage.sql | | | Filename filter mismatch |\n",
150-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
151-
"| cookies_first_party_top.sql | | | Filename filter mismatch |\n",
152-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
153-
"| cookies_third_party_top.sql | | | Filename filter mismatch |\n",
154-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
155-
"| dnt_usage.sql | | | Filename filter mismatch |\n",
156-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
157-
"| fingerprinting_top.sql | | | Filename filter mismatch |\n",
158-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
159-
"| iab_tcf_v2_cmps_top.sql | | | Filename filter mismatch |\n",
160-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
161-
"| iab_tcf_v2_countries_top.sql | | | Filename filter mismatch |\n",
162-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
163-
"| iab_usage.sql | | | Filename filter mismatch |\n",
164-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
165-
"| iab_usp_strings_top.sql | | | Filename filter mismatch |\n",
166-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
167-
"| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n",
168-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
169-
"| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n",
170-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
171-
"| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n",
172-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
173-
"| referrer_policy_top.sql | | | Filename filter mismatch |\n",
174-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
175-
"| referrer_policy_usage.sql | | | Filename filter mismatch |\n",
176-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
177-
"| related_origin_trials_top.sql | | | Filename filter mismatch |\n",
178-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
179-
"| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n",
180-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
181-
"| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n",
182-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
183-
"| tracker_categories_top.sql | | | Filename filter mismatch |\n",
184-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n",
185-
"| tracker_distribution.sql | Processing... | Processing... | Processing... |\n",
186-
"+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n"
140+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
141+
"| Query | TB Billed | Sheet | Status/Skip Reason |\n",
142+
"+============================================================================+=============+==========================+==========================+\n",
143+
"| _cname_domains_top.sql | | | Filename filter mismatch |\n",
144+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
145+
"| _number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n",
146+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
147+
"| _number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n",
148+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
149+
"| _privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n",
150+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
151+
"| _top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n",
152+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
153+
"| _top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n",
154+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
155+
"| bounce_domains_top.sql | 5.131 | Bounce Domains Top | ✓ Uploaded |\n",
156+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
157+
"| client_hints_top.sql | 1.338 | Client Hints Top | ✓ Uploaded |\n",
158+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
159+
"| client_hints_usage.sql | | | Filename filter mismatch |\n",
160+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
161+
"| cookies_first_party_top.sql | | | Filename filter mismatch |\n",
162+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
163+
"| cookies_third_party_top.sql | | | Filename filter mismatch |\n",
164+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
165+
"| dnt_usage.sql | | | Filename filter mismatch |\n",
166+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
167+
"| fingerprinting_top.sql | | | Filename filter mismatch |\n",
168+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
169+
"| iab_tcf_v2_cmps_top.sql | 0.02 | Iab Tcf V2 Cmps Top | ✓ Uploaded |\n",
170+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
171+
"| iab_tcf_v2_countries_top.sql | 0.02 | Iab Tcf V2 Countries Top | ✓ Uploaded |\n",
172+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
173+
"| iab_usage.sql | | | Filename filter mismatch |\n",
174+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
175+
"| iab_usp_strings_top.sql | | | Filename filter mismatch |\n",
176+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
177+
"| referrer_policy_top.sql | | | Filename filter mismatch |\n",
178+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
179+
"| referrer_policy_usage.sql | | | Filename filter mismatch |\n",
180+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
181+
"| related_origin_trials_top.sql | | | Filename filter mismatch |\n",
182+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
183+
"| tracker_categories_top.sql | | | Filename filter mismatch |\n",
184+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
185+
"| tracker_distribution.sql | | | Filename filter mismatch |\n",
186+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
187+
"| tracker_technologies_top.sql | | | Filename filter mismatch |\n",
188+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
189+
"| whotracksme_categories_top.sql | | | Filename filter mismatch |\n",
190+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
191+
"| whotracksme_trackers_top.sql | | | Filename filter mismatch |\n",
192+
"+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n",
193+
"\n",
194+
"✓ Processed 25 queries\n"
187195
]
188196
}
189197
],
@@ -197,7 +205,7 @@
197205
"\n",
198206
"\n",
199207
"# Query filters and options\n",
200-
"filename_match = '(tracker_distribution).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
208+
"filename_match = '(iab_tcf_v2_cmps_top|iab_tcf_v2_countries_top|client_hints_top|bounce_domains_top).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
201209
"filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n",
202210
"dry_run = False # @param {type: \"boolean\"}\n",
203211
"overwrite_sheets = True # @param {type: \"boolean\"}\n",

sql/util/haveibeenpwned.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,19 @@
11
"""
22
Retrieves breach data from the Have I Been Pwned API and loads it into BigQuery.
3-
43
"""
54

65
import pandas as pd
76
import requests # pylint: disable=import-error
87
from bq_writer import bigquery, write_to_bq
98

10-
# Fetch breach data from API
119
response = requests.get("https://haveibeenpwned.com/api/v2/breaches", timeout=10)
1210
breaches = response.json()
1311
df = pd.DataFrame(breaches)
1412

15-
# Convert date fields
1613
df["BreachDate"] = pd.to_datetime(df["BreachDate"], errors="coerce")
1714
df["AddedDate"] = pd.to_datetime(df["AddedDate"], errors="coerce")
1815
df["ModifiedDate"] = pd.to_datetime(df["ModifiedDate"], errors="coerce")
1916

20-
# Define BigQuery schema
2117
schema = [
2218
bigquery.SchemaField("Name", "STRING"),
2319
bigquery.SchemaField("Title", "STRING"),
@@ -41,5 +37,4 @@
4137
bigquery.SchemaField("DisclosureUrl", "STRING"),
4238
]
4339

44-
# Write to BigQuery
4540
write_to_bq(df, "httparchive.almanac.breaches", schema, write_disposition="WRITE_TRUNCATE")

0 commit comments

Comments
 (0)