From 6aebd3572f1fb1fea4cf7a1eddbfd1f8a14ec20a Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Fri, 1 Aug 2025 16:25:16 +0200 Subject: [PATCH 01/27] dates updated --- sql/2025/privacy/ccpa_most_common_phrases.sql | 31 ++ sql/2025/privacy/ccpa_prevalence.sql | 27 ++ .../privacy/cookies_top_first_party_names.sql | 37 ++ .../cookies_top_third_party_domains.sql | 37 ++ .../privacy/cookies_top_third_party_names.sql | 37 ++ .../privacy/easylist-tracker-detection.sql | 41 ++ .../fingerprinting_most_common_apis.sql | 36 ++ .../fingerprinting_most_common_scripts.sql | 23 + .../privacy/fingerprinting_script_count.sql | 21 + .../privacy/most_common_bounce_domains.sql | 89 ++++ sql/2025/privacy/most_common_client_hints.sql | 52 +++ .../most_common_cmps_for_iab_tcf_v2.sql | 27 ++ .../privacy/most_common_cname_domains.sql | 92 ++++ .../most_common_countries_for_iab_tcf_v2.sql | 43 ++ .../privacy/most_common_referrer_policy.sql | 65 +++ .../most_common_strings_for_iab_usp.sql | 27 ++ .../most_common_tracker_categories.sql | 65 +++ ...stered_by_third_parties_and_publishers.sql | 94 ++++ ...er_of_privacy_sandbox_attested_domains.sql | 44 ++ .../number_of_websites_per_technology.sql | 34 ++ ...er_of_websites_per_technology_category.sql | 22 + ..._of_websites_using_each_fingerprinting.sql | 32 ++ .../number_of_websites_with_client_hints.sql | 44 ++ .../privacy/number_of_websites_with_dnt.sql | 34 ++ .../privacy/number_of_websites_with_gpc.sql | 34 ++ .../privacy/number_of_websites_with_iab.sql | 61 +++ .../number_of_websites_with_nb_trackers.sql | 96 ++++ ...number_of_websites_with_referrerpolicy.sql | 88 ++++ ...of_websites_with_related_origin_trials.sql | 105 +++++ ..._of_websites_with_whotracksme_trackers.sql | 43 ++ ...doption-by-third-parties-by-publishers.sql | 150 +++++++ ...inations_registered_by_most_publishers.sql | 83 ++++ ...tions_registered_by_most_third_parties.sql | 83 ++++ sql/util/bq_to_sheets.ipynb | 418 +++++++++--------- 34 files changed, 2006 insertions(+), 209 deletions(-) create mode 100644 sql/2025/privacy/ccpa_most_common_phrases.sql create mode 100644 sql/2025/privacy/ccpa_prevalence.sql create mode 100644 sql/2025/privacy/cookies_top_first_party_names.sql create mode 100644 sql/2025/privacy/cookies_top_third_party_domains.sql create mode 100644 sql/2025/privacy/cookies_top_third_party_names.sql create mode 100644 sql/2025/privacy/easylist-tracker-detection.sql create mode 100644 sql/2025/privacy/fingerprinting_most_common_apis.sql create mode 100644 sql/2025/privacy/fingerprinting_most_common_scripts.sql create mode 100644 sql/2025/privacy/fingerprinting_script_count.sql create mode 100644 sql/2025/privacy/most_common_bounce_domains.sql create mode 100644 sql/2025/privacy/most_common_client_hints.sql create mode 100644 sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql create mode 100644 sql/2025/privacy/most_common_cname_domains.sql create mode 100644 sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql create mode 100644 sql/2025/privacy/most_common_referrer_policy.sql create mode 100644 sql/2025/privacy/most_common_strings_for_iab_usp.sql create mode 100644 sql/2025/privacy/most_common_tracker_categories.sql create mode 100644 sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql create mode 100644 sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql create mode 100644 sql/2025/privacy/number_of_websites_per_technology.sql create mode 100644 sql/2025/privacy/number_of_websites_per_technology_category.sql create mode 100644 sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql create mode 100644 sql/2025/privacy/number_of_websites_with_client_hints.sql create mode 100644 sql/2025/privacy/number_of_websites_with_dnt.sql create mode 100644 sql/2025/privacy/number_of_websites_with_gpc.sql create mode 100644 sql/2025/privacy/number_of_websites_with_iab.sql create mode 100644 sql/2025/privacy/number_of_websites_with_nb_trackers.sql create mode 100644 sql/2025/privacy/number_of_websites_with_referrerpolicy.sql create mode 100644 sql/2025/privacy/number_of_websites_with_related_origin_trials.sql create mode 100644 sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql create mode 100644 sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql create mode 100644 sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql create mode 100644 sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql diff --git a/sql/2025/privacy/ccpa_most_common_phrases.sql b/sql/2025/privacy/ccpa_most_common_phrases.sql new file mode 100644 index 00000000000..ae38070672d --- /dev/null +++ b/sql/2025/privacy/ccpa_most_common_phrases.sql @@ -0,0 +1,31 @@ +WITH pages_with_phrase AS ( + SELECT + client, + rank_grouping, + page, + COUNT(DISTINCT page) OVER (PARTITION BY client, rank_grouping) AS total_pages_with_phrase_in_rank_group, + JSON_QUERY_ARRAY(custom_metrics, '$.privacy.ccpa_link.CCPALinkPhrases') AS ccpa_link_phrases + FROM `httparchive.crawl.pages`, --TABLESAMPLE SYSTEM (0.01 PERCENT) + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping + WHERE date = '2025-07-01' AND + is_root_page = true AND + rank <= rank_grouping AND + array_length(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.ccpa_link.CCPALinkPhrases')) > 0 +) + +SELECT + client, + rank_grouping, + link_phrase, + COUNT(DISTINCT page) AS num_pages, + COUNT(DISTINCT page) / any_value(total_pages_with_phrase_in_rank_group) AS pct_pages +FROM pages_with_phrase, + UNNEST(ccpa_link_phrases) AS link_phrase +GROUP BY + link_phrase, + rank_grouping, + client +ORDER BY + rank_grouping, + client, + num_pages DESC diff --git a/sql/2025/privacy/ccpa_prevalence.sql b/sql/2025/privacy/ccpa_prevalence.sql new file mode 100644 index 00000000000..c51db559ae7 --- /dev/null +++ b/sql/2025/privacy/ccpa_prevalence.sql @@ -0,0 +1,27 @@ +WITH pages AS ( + SELECT + client, + rank_grouping, + page, + JSON_VALUE(custom_metrics, '$.privacy.ccpa_link.hasCCPALink') AS has_ccpa_link + FROM `httparchive.crawl.pages`, -- TABLESAMPLE SYSTEM (0.0025 PERCENT) + UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping + WHERE date = '2025-07-01' AND + is_root_page = true AND + rank <= rank_grouping +) + +SELECT + client, + rank_grouping, + has_ccpa_link, + COUNT(DISTINCT page) AS num_pages +FROM pages +GROUP BY + has_ccpa_link, + rank_grouping, + client +ORDER BY + rank_grouping, + client, + has_ccpa_link diff --git a/sql/2025/privacy/cookies_top_first_party_names.sql b/sql/2025/privacy/cookies_top_first_party_names.sql new file mode 100644 index 00000000000..5b310e6fb75 --- /dev/null +++ b/sql/2025/privacy/cookies_top_first_party_names.sql @@ -0,0 +1,37 @@ +-- Most common cookie names, by number of domains on which they appear. Goal is to identify common trackers that use first-party cookies across sites. + +WITH pages AS ( + SELECT + client, + root_page, + custom_metrics, + COUNT(DISTINCT net.host(root_page)) OVER (PARTITION BY client) AS total_domains + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' +), + +cookies AS ( + SELECT + client, + cookie, + NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host, + NET.HOST(root_page) AS firstparty_host, + total_domains + FROM pages, + UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie +) + +SELECT + client, + COUNT(DISTINCT firstparty_host) AS domain_count, + COUNT(DISTINCT firstparty_host) / any_value(total_domains) AS pct_domains, + JSON_VALUE(cookie, '$.name') AS cookie_name +FROM cookies +WHERE firstparty_host LIKE '%' || cookie_host +GROUP BY + client, + cookie_name +ORDER BY + domain_count DESC, + client DESC +LIMIT 500 diff --git a/sql/2025/privacy/cookies_top_third_party_domains.sql b/sql/2025/privacy/cookies_top_third_party_domains.sql new file mode 100644 index 00000000000..c8feb446e42 --- /dev/null +++ b/sql/2025/privacy/cookies_top_third_party_domains.sql @@ -0,0 +1,37 @@ +WITH pages AS ( + SELECT + page, + client, + root_page, + custom_metrics, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' +), + +cookies AS ( + SELECT + client, + page, + cookie, + NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host, + NET.HOST(root_page) AS firstparty_host, + total_pages + FROM pages, + UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie +) + +SELECT + client, + cookie_host, + COUNT(DISTINCT page) AS page_count, + COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages +FROM cookies +WHERE firstparty_host NOT LIKE '%' || cookie_host +GROUP BY + client, + cookie_host +ORDER BY + page_count DESC, + client +LIMIT 500 diff --git a/sql/2025/privacy/cookies_top_third_party_names.sql b/sql/2025/privacy/cookies_top_third_party_names.sql new file mode 100644 index 00000000000..981a77da56d --- /dev/null +++ b/sql/2025/privacy/cookies_top_third_party_names.sql @@ -0,0 +1,37 @@ +-- Most common cookie names, by number of domains on which they appear. Goal is to identify common trackers that set cookies using many domains. + +WITH pages AS ( + SELECT + client, + root_page, + custom_metrics, + COUNT(DISTINCT net.host(root_page)) OVER (PARTITION BY client) AS total_domains + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' +), + +cookies AS ( + SELECT + client, + cookie, + NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host, + NET.HOST(root_page) AS firstparty_host, + total_domains + FROM pages, + UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie +) + +SELECT + client, + COUNT(DISTINCT firstparty_host) AS domain_count, + COUNT(DISTINCT firstparty_host) / any_value(total_domains) AS pct_domains, + JSON_VALUE(cookie, '$.name') AS cookie_name +FROM cookies +WHERE firstparty_host NOT LIKE '%' || cookie_host +GROUP BY + client, + cookie_name +ORDER BY + domain_count DESC, + client DESC +LIMIT 500 diff --git a/sql/2025/privacy/easylist-tracker-detection.sql b/sql/2025/privacy/easylist-tracker-detection.sql new file mode 100644 index 00000000000..15a9e2f5115 --- /dev/null +++ b/sql/2025/privacy/easylist-tracker-detection.sql @@ -0,0 +1,41 @@ +CREATE TEMP FUNCTION +CheckDomainInURL(url STRING, domain STRING) +RETURNS INT64 +LANGUAGE js AS """ + return url.includes(domain) ? 1 : 0; +"""; + +-- We need to use the `easylist_adservers.csv` to populate the table to get the list of domains to block +-- https://github.com/easylist/easylist/blob/master/easylist/easylist_adservers.txt +WITH easylist_data AS ( + SELECT string_field_0 + FROM `httparchive.almanac.easylist_adservers` +), + +requests_data AS ( + SELECT url + FROM `httparchive.all.requests` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +), + +block_status AS ( + SELECT + r.url, + MAX( + CASE + WHEN CheckDomainInURL(r.url, e.string_field_0) = 1 THEN 1 + ELSE 0 + END + ) AS should_block + FROM requests_data r + LEFT JOIN easylist_data e + ON CheckDomainInURL(r.url, e.string_field_0) = 1 + GROUP BY r.url +) + +SELECT + COUNT(0) AS blocked_url_count +FROM block_status +WHERE should_block = 1; diff --git a/sql/2025/privacy/fingerprinting_most_common_apis.sql b/sql/2025/privacy/fingerprinting_most_common_apis.sql new file mode 100644 index 00000000000..f7d952e0ad5 --- /dev/null +++ b/sql/2025/privacy/fingerprinting_most_common_apis.sql @@ -0,0 +1,36 @@ +CREATE TEMP FUNCTION getFingerprintingTypes(input STRING) +RETURNS ARRAY +LANGUAGE js AS """ +if (input) { + try { + return Object.keys(JSON.parse(input)) + } catch (e) { + return [] + } +} else { + return [] +} +"""; + +WITH pages AS ( + SELECT + client, + page, + fingerprinting_type, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages + FROM `httparchive.crawl.pages`, + UNNEST(getFingerprintingTypes(JSON_EXTRACT(custom_metrics, '$.privacy.fingerprinting.counts'))) AS fingerprinting_type + WHERE date = '2025-07-01' +) + +SELECT + client, + fingerprinting_type, + COUNT(DISTINCT page) AS page_count, + COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages +FROM pages +GROUP BY + client, + fingerprinting_type +ORDER BY + page_count DESC diff --git a/sql/2025/privacy/fingerprinting_most_common_scripts.sql b/sql/2025/privacy/fingerprinting_most_common_scripts.sql new file mode 100644 index 00000000000..316c07b50d8 --- /dev/null +++ b/sql/2025/privacy/fingerprinting_most_common_scripts.sql @@ -0,0 +1,23 @@ +WITH pages AS ( + SELECT + page, + client, + custom_metrics, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' +) + +SELECT + client, + script, + COUNT(DISTINCT page) AS page_count, + COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages +FROM pages, + UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.fingerprinting.likelyFingerprintingScripts')) AS script +GROUP BY + client, + script +ORDER BY + page_count DESC +LIMIT 100; diff --git a/sql/2025/privacy/fingerprinting_script_count.sql b/sql/2025/privacy/fingerprinting_script_count.sql new file mode 100644 index 00000000000..3ca08b05326 --- /dev/null +++ b/sql/2025/privacy/fingerprinting_script_count.sql @@ -0,0 +1,21 @@ +WITH pages AS ( + SELECT + page, + client, + ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.fingerprinting.likelyFingerprintingScripts')) AS script_count, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' +) + +SELECT + script_count, + client, + COUNT(DISTINCT page) AS page_count, + COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages +FROM pages +GROUP BY + script_count, + client +ORDER BY + script_count ASC; diff --git a/sql/2025/privacy/most_common_bounce_domains.sql b/sql/2025/privacy/most_common_bounce_domains.sql new file mode 100644 index 00000000000..91f007d26f2 --- /dev/null +++ b/sql/2025/privacy/most_common_bounce_domains.sql @@ -0,0 +1,89 @@ +-- Detection logic explained: +-- https://github.com/privacycg/proposals/issues/6 +-- https://github.com/privacycg/nav-tracking-mitigations/blob/main/bounce-tracking-explainer.md +WITH redirect_requests AS ( + SELECT + client, + url, + index, + response_headers, + page + FROM `httparchive.crawl.requests` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + type NOT IN ('css', 'image', 'font', 'video', 'audio') AND + ROUND(INT64(summary.status) / 100) = 3 AND + index <= 2 +), + +navigation_redirect AS ( + -- Find the first navigation redirect + SELECT + client, + url, + page, + response_header.value AS navigation_redirect_location + FROM redirect_requests, + UNNEST(response_headers) AS response_header + WHERE + index = 1 AND + LOWER(response_header.name) = 'location' AND + NET.REG_DOMAIN(response_header.value) != NET.REG_DOMAIN(page) +), + +bounce_redirect AS ( + -- Find the second navigation redirect + SELECT + client, + url, + page, + response_header.value AS bounce_redirect_location, + response_headers + FROM redirect_requests, + UNNEST(response_headers) AS response_header + WHERE + index = 2 AND + LOWER(response_header.name) = 'location' +), + +bounce_sequences AS ( + -- Combine the first and second navigation redirects + SELECT + nav.client, + NET.REG_DOMAIN(navigation_redirect_location) AS bounce_hostname, + COUNT(DISTINCT nav.page) AS number_of_pages + --ARRAY_AGG(bounce.bounce_tracking_cookies) AS bounce_tracking_cookies + FROM navigation_redirect AS nav + LEFT JOIN bounce_redirect AS bounce + ON + nav.client = bounce.client AND + nav.page = bounce.page AND + nav.navigation_redirect_location = bounce.url + WHERE bounce_redirect_location IS NOT NULL + GROUP BY + nav.client, + bounce_hostname +), + +pages_total AS ( + SELECT + client, + COUNT(DISTINCT page) AS total_pages + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' AND + is_root_page + GROUP BY client +) + +-- Count the number of websites with bounce tracking per bounce hostname +SELECT + client, + bounce_hostname, + number_of_pages, + number_of_pages / total_pages AS pct_pages +FROM bounce_sequences +JOIN pages_total +USING (client) +ORDER BY number_of_pages DESC +LIMIT 100 diff --git a/sql/2025/privacy/most_common_client_hints.sql b/sql/2025/privacy/most_common_client_hints.sql new file mode 100644 index 00000000000..88c2267abf2 --- /dev/null +++ b/sql/2025/privacy/most_common_client_hints.sql @@ -0,0 +1,52 @@ +# Pages that use Client Hints +WITH response_headers AS ( + SELECT + client, + page, + LOWER(response_header.name) AS header_name, + LOWER(response_header.value) AS header_value, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites + FROM `httparchive.all.requests`, + UNNEST(response_headers) response_header + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + is_main_document = TRUE +), + +meta_tags AS ( + SELECT + client, + page, + LOWER(JSON_VALUE(meta_node, '$.http-equiv')) AS tag_name, + LOWER(JSON_VALUE(meta_node, '$.content')) AS tag_value + FROM ( + SELECT + client, + page, + JSON_QUERY(custom_metrics, '$.almanac') AS metrics + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE + ), + UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node + WHERE JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL +) + +SELECT + client, + IF(header_name = 'accept-ch', header_value, tag_value) AS value, + COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages, + COUNT(DISTINCT page) AS number_of_pages +FROM response_headers +FULL OUTER JOIN meta_tags +USING (client, page) +WHERE + header_name = 'accept-ch' OR + tag_name = 'accept-ch' +GROUP BY + client, + value +ORDER BY pct_pages DESC +LIMIT 200 diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql new file mode 100644 index 00000000000..53f76c63a2f --- /dev/null +++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql @@ -0,0 +1,27 @@ +# Counts of CMPs using IAB Transparency & Consent Framework +# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata +# CMP vendor list: https://iabeurope.eu/cmp-list/ + +WITH cmps AS ( + SELECT + client, + page, + JSON_VALUE(custom_metrics, '$.privacy.iab_tcf_v2.data.cmpId') AS cmpId, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +) + +SELECT + client, + cmpId, + COUNT(0) / ANY_VALUE(total_pages) AS pct_pages, + COUNT(0) AS number_of_pages +FROM cmps +GROUP BY + client, + cmpId +ORDER BY + pct_pages DESC diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql new file mode 100644 index 00000000000..b260273dc7c --- /dev/null +++ b/sql/2025/privacy/most_common_cname_domains.sql @@ -0,0 +1,92 @@ +# Most common CNAME domains +CREATE TEMP FUNCTION convert_cname_json(json_str STRING) +RETURNS ARRAY> +LANGUAGE js AS """ +try { + const obj = JSON.parse(json_str); + const result = []; + for (const key in obj) { + result.push({ + origin: key, + cname: obj[key] + }); + } + return result; +} catch (e) { + return []; +} +"""; + +# Adguard CNAME Trackers source: +# https://github.com/AdguardTeam/cname-trackers/blob/master/script/src/cloaked-trackers.json +WITH adguard_trackers AS ( + SELECT + domain + FROM UNNEST(['cz.affilbox.cz', 'pl02.prolitteris.2cnt.net', 'a8.net', 'mm.actionlink.jp', 'mr-in.com', 'ebis.ne.jp', '0i0i0i0.com', 'ads.bid', 'at-o.net', 'actonservice.com', 'actonsoftware.com', '2o7.net', 'data.adobedc.net', 'sc.adobedc.net', 'sc.omtrdc.net', 'adocean.pl', 'aquaplatform.com', 'cdn18685953.ahacdn.me', 'thirdparty.bnc.lt', 'api.clickaine.com', 'tagcommander.com', 'track.sp.crdl.io', 'dnsdelegation.io', 'storetail.io', 'e.customeriomail.com', 'dataunlocker.com', 'monopoly-drain.ga', 'friendly-community.tk', 'nc0.co', 'customer.etracker.com', 'eulerian.net', 'extole.com', 'extole.io', 'fathomdns.com', 'genieespv.jp', 'ad-cloud.jp', 'goatcounter.com', 'heleric.com', 'iocnt.net', 'affex.org', 'k.keyade.com', 'ghochv3eng.trafficmanager.net', 'online-metrix.net', 'logly.co.jp', 'mailgun.org', 'ab1n.net', 'ntv.io', 'ntvpforever.com', 'postrelease.com', 'non.li', 'tracking.bp01.net', 't.eloqua.com', 'oghub.io', 'go.pardot.com', 'parsely.com', 'custom.plausible.io', 'popcashjs.b-cdn.net', 'rdtk.io', 'sailthru.com', 'exacttarget.com', 'a351fec2c318c11ea9b9b0a0ae18fb0b-1529426863.eu-central-1.elb.amazonaws.com', 'a5e652663674a11e997c60ac8a4ec150-1684524385.eu-central-1.elb.amazonaws.com', 'a88045584548111e997c60ac8a4ec150-1610510072.eu-central-1.elb.amazonaws.com', 'afc4d9aa2a91d11e997c60ac8a4ec150-2082092489.eu-central-1.elb.amazonaws.com', 'e.truedata.co', 'utiq-aws.net', 'webtrekk.net', 'wt-eu02.net', 'ak-is2.net', 'wizaly.com']) AS domain +), + +whotracksme AS ( + SELECT DISTINCT + domain, + category + FROM `httparchive.almanac.whotracksme` + WHERE date = '2025-07-01' +), + +cnames AS ( + SELECT + client, + cnames.cname, + page + --ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples + FROM `httparchive.crawl.pages`, + UNNEST(convert_cname_json(JSON_QUERY(custom_metrics, '$.privacy.request_hostnames_with_cname'))) AS cnames + WHERE date = '2025-07-01' AND + NET.REG_DOMAIN(cnames.origin) = NET.REG_DOMAIN(page) AND + NET.REG_DOMAIN(cnames.cname) != NET.REG_DOMAIN(page) + GROUP BY + client, + cnames.cname, + page +), + +pages_total AS ( + SELECT + client, + COUNT(DISTINCT page) AS total_pages + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' + GROUP BY client +), + +cname_stats AS ( + SELECT + client, + NET.REG_DOMAIN(cname) AS cname, + adguard_trackers.domain IS NOT NULL AS adguard_known_cname, + whotracksme.category AS whotracksme_category, + COUNT(DISTINCT page) AS number_of_pages + --ANY_VALUE(page_examples) + FROM cnames + LEFT JOIN adguard_trackers + ON ENDS_WITH(cnames.cname, adguard_trackers.domain) + LEFT JOIN whotracksme + ON ENDS_WITH(cnames.cname, whotracksme.domain) + GROUP BY + client, + cname, + adguard_known_cname, + whotracksme_category +) + +SELECT + client, + cname, + adguard_known_cname, + whotracksme_category, + number_of_pages, + number_of_pages / total_pages AS pct_pages +FROM cname_stats +LEFT JOIN pages_total +USING (client) +ORDER BY number_of_pages DESC diff --git a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql new file mode 100644 index 00000000000..891f58fdb62 --- /dev/null +++ b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql @@ -0,0 +1,43 @@ +# Counts of countries for publishers using IAB Transparency & Consent Framework +# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata +# "Country code of the country that determines the legislation of +# reference. Normally corresponds to the country code of the country +# in which the publisher's business entity is established." + +WITH totals AS ( + SELECT + client, + COUNT(DISTINCT root_page) AS total_websites + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + JSON_TYPE(custom_metrics.privacy.iab_tcf_v2.data) = 'object' + GROUP BY client +), + +cmps AS ( + SELECT + client, + --ANY_VALUE(custom_metrics.privacy.iab_tcf_v2.data) AS example, + STRING(custom_metrics.privacy.iab_tcf_v2.data.publisherCC) AS publisherCC, + COUNT(DISTINCT root_page) AS number_of_pages + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + JSON_TYPE(custom_metrics.privacy.iab_tcf_v2.data) = 'object' + GROUP BY + client, + publisherCC +) + +SELECT + client, + publisherCC, + --example, + number_of_pages / total_websites AS pct_of_pages +FROM cmps +JOIN totals +USING (client) +ORDER BY + client, + number_of_pages DESC diff --git a/sql/2025/privacy/most_common_referrer_policy.sql b/sql/2025/privacy/most_common_referrer_policy.sql new file mode 100644 index 00000000000..de96ae3a3a7 --- /dev/null +++ b/sql/2025/privacy/most_common_referrer_policy.sql @@ -0,0 +1,65 @@ +# Most common values for Referrer-Policy (at site level) + +WITH totals AS ( + SELECT + client, + COUNT(DISTINCT page) AS total_pages + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE + GROUP BY client +), + +referrer_policy_custom_metrics AS ( + SELECT + client, + page, + LOWER(TRIM(policy_meta)) AS policy_meta + FROM `httparchive.crawl.pages`, + UNNEST(SPLIT(JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.entire_document_policy'), ',')) AS policy_meta + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +), + +response_headers AS ( + SELECT + client, + page, + LOWER(response_header.name) AS name, + LOWER(response_header.value) AS value + FROM `httparchive.all.requests`, + UNNEST(response_headers) AS response_header + WHERE + date = '2025-07-01' AND + is_main_document = TRUE +), + +referrer_policy_headers AS ( + SELECT + client, + page, + TRIM(policy_header) AS policy_header + FROM response_headers, + UNNEST(SPLIT(value, ',')) AS policy_header + WHERE + name = 'referrer-policy' +) + +SELECT + client, + COALESCE(policy_header, policy_meta) AS policy, + COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages, + COUNT(DISTINCT page) AS number_of_pages +FROM referrer_policy_custom_metrics +FULL OUTER JOIN referrer_policy_headers +USING (client, page) +JOIN totals +USING (client) +GROUP BY + client, + policy +ORDER BY + pct_pages DESC +LIMIT 100 diff --git a/sql/2025/privacy/most_common_strings_for_iab_usp.sql b/sql/2025/privacy/most_common_strings_for_iab_usp.sql new file mode 100644 index 00000000000..d6cfe4af0c9 --- /dev/null +++ b/sql/2025/privacy/most_common_strings_for_iab_usp.sql @@ -0,0 +1,27 @@ +# Counts of US Privacy String values for websites using IAB US Privacy Framework +# cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md + +WITH usp_data AS ( + SELECT + client, + page, + JSON_VALUE(custom_metrics, '$.privacy.iab_usp.privacy_string.uspString') AS uspString, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS pages_total + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +) + +SELECT + client, + uspString, + COUNT(DISTINCT page) / ANY_VALUE(pages_total) AS pct_pages, + COUNT(DISTINCT page) AS number_of_pages +FROM usp_data +GROUP BY + client, + uspString +ORDER BY + pct_pages DESC +LIMIT 100 diff --git a/sql/2025/privacy/most_common_tracker_categories.sql b/sql/2025/privacy/most_common_tracker_categories.sql new file mode 100644 index 00000000000..c93aeadcac8 --- /dev/null +++ b/sql/2025/privacy/most_common_tracker_categories.sql @@ -0,0 +1,65 @@ +# Percent of pages that deploy at least one tracker from each tracker category +WITH whotracksme AS ( + SELECT + domain, + category, + tracker + FROM httparchive.almanac.whotracksme + WHERE date = '2025-07-01' +), + +totals AS ( + SELECT + client, + COUNT(DISTINCT page) AS total_websites + FROM httparchive.crawl.requests + WHERE + date = '2025-07-01' + GROUP BY client +), + +tracker_categories AS ( + SELECT + client, + category, + page + FROM httparchive.crawl.requests + JOIN whotracksme + ON ( + NET.HOST(url) = domain OR + ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) + ) + WHERE + date = '2025-07-01' AND + NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) -- third party +), + +aggregated AS ( + SELECT + client, + category, + COUNT(DISTINCT page) AS number_of_websites + FROM tracker_categories + GROUP BY + client, + category + UNION ALL + SELECT + client, + 'any' AS category, + COUNT(DISTINCT page) AS number_of_websites + FROM tracker_categories + GROUP BY + client +) + +SELECT + client, + category, + number_of_websites, + total_websites, + number_of_websites / total_websites AS pct_websites +FROM aggregated +JOIN totals +USING (client) +ORDER BY number_of_websites DESC diff --git a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql new file mode 100644 index 00000000000..c8fd1e038a0 --- /dev/null +++ b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql @@ -0,0 +1,94 @@ +#standardSQL +# Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) + +-- Extracting third-parties observed using ARA API on a publisher +CREATE TEMP FUNCTION jsonObjectKeys(input STRING) +RETURNS ARRAY +LANGUAGE js AS """ + if (!input) { + return []; + } + return Object.keys(JSON.parse(input)); +"""; + +-- Extracting ARA API source registration details being passed by a given third-party (passed AS "key") +CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING) +RETURNS ARRAY +LANGUAGE js AS """ + if (!input) { + return []; + } + const jsonObject = JSON.parse(input); + const values = jsonObject[key] || []; + const result = []; + + values.forEach(value => { + if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) { + const parts = value.replace('attribution-reporting-register-source|', '').split('|'); + parts.forEach(part => { + if (part.startsWith('destination=')) { + const destinations = part.replace('destination=', '').split(','); + destinations.forEach(destination => { + result.push('destination=' + destination.trim()); + }); + } else { + result.push(part.trim()); + } + }); + } + }); + + return result; +"""; + +WITH ara_features AS ( + SELECT + client, + CASE + WHEN rank <= 1000 THEN '1000' + WHEN rank <= 10000 THEN '10000' + WHEN rank <= 100000 THEN '100000' + WHEN rank <= 1000000 THEN '1000000' + WHEN rank <= 10000000 THEN '10000000' + ELSE 'Other' + END AS rank_group, + NET.REG_DOMAIN(page) AS publisher, + CASE + WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', '')) + ELSE NULL + END AS destination, + third_party_domain + FROM `httparchive.crawl.pages`, + UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain, + UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + ara LIKE 'destination%' +) + +SELECT + client, + rank_group, + COUNT(destination) AS total_destinations, + COUNT(DISTINCT destination) AS distinct_destinations, + ROUND(COUNT(DISTINCT destination) * 100 / COUNT(destination), 2) AS destination_pct, + COUNT(third_party_domain) AS total_third_party_domains, + COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains, + ROUND(COUNT(DISTINCT third_party_domain) * 100 / COUNT(third_party_domain), 2) AS third_party_domain_pct, + COUNT(publisher) AS total_publishers, + COUNT(DISTINCT publisher) AS distinct_publishers, + ROUND(COUNT(DISTINCT publisher) * 100 / COUNT(publisher), 2) AS publisher_pct +FROM ara_features +WHERE destination IS NOT NULL AND third_party_domain IS NOT NULL +GROUP BY client, rank_group +ORDER BY + client, + CASE rank_group + WHEN '1000' THEN 1 + WHEN '10000' THEN 2 + WHEN '100000' THEN 3 + WHEN '1000000' THEN 4 + WHEN '10000000' THEN 5 + ELSE 6 + END; diff --git a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql new file mode 100644 index 00000000000..d439a0a9c1f --- /dev/null +++ b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql @@ -0,0 +1,44 @@ +#standardSQL +# Privacy Sandbox Attestation and Related Websites JSON status (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) + +WITH wellknown AS ( + SELECT + client, + NET.HOST(page) AS host, + CASE + WHEN rank <= 1000 THEN '1000' + WHEN rank <= 10000 THEN '10000' + WHEN rank <= 100000 THEN '100000' + WHEN rank <= 1000000 THEN '1000000' + WHEN rank <= 10000000 THEN '10000000' + ELSE 'Other' + END AS rank_group, + CAST(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/related-website-set.json".found') AS BOOL) AS rws, + CAST(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/privacy-sandbox-attestations.json".found') AS BOOL) AS attestation + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +) + +SELECT + client, + rank_group, + SUM(CASE WHEN rws THEN 1 ELSE 0 END) AS related_websites_set, + SUM(CASE WHEN attestation THEN 1 ELSE 0 END) AS privacy_sandbox_attestation +FROM + wellknown +WHERE + rws OR attestation +GROUP BY client, rank_group +ORDER BY + client, + CASE rank_group + WHEN '1000' THEN 1 + WHEN '10000' THEN 2 + WHEN '100000' THEN 3 + WHEN '1000000' THEN 4 + WHEN '10000000' THEN 5 + ELSE 6 + END; diff --git a/sql/2025/privacy/number_of_websites_per_technology.sql b/sql/2025/privacy/number_of_websites_per_technology.sql new file mode 100644 index 00000000000..ff808a4eafc --- /dev/null +++ b/sql/2025/privacy/number_of_websites_per_technology.sql @@ -0,0 +1,34 @@ +WITH technologies AS ( + SELECT + client, + page, + category, + technology, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites + FROM `httparchive.crawl.pages`, + UNNEST(technologies) AS tech, + UNNEST(categories) AS category + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +) + +SELECT + client, + technology, + COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages, + COUNT(DISTINCT page) AS number_of_pages, + ARRAY_AGG(DISTINCT category) AS categories +FROM technologies +WHERE + category IN ( + 'Analytics', 'Browser fingerprinting', 'Customer data platform', + 'Geolocation', + 'Advertising', 'Retargeting', 'Personalisation', 'Segmentation', + 'Cookie compliance' + ) +GROUP BY + client, + technology +ORDER BY + pct_pages DESC diff --git a/sql/2025/privacy/number_of_websites_per_technology_category.sql b/sql/2025/privacy/number_of_websites_per_technology_category.sql new file mode 100644 index 00000000000..ac40b734f59 --- /dev/null +++ b/sql/2025/privacy/number_of_websites_per_technology_category.sql @@ -0,0 +1,22 @@ +SELECT + client, + category, + COUNT(DISTINCT IF(category = tech_category, page, NULL)) / COUNT(DISTINCT page) AS pct_pages, + COUNT(DISTINCT IF(category = tech_category, page, NULL)) AS number_of_pages +FROM `httparchive.crawl.pages`, + UNNEST(technologies) AS tech, + UNNEST(categories) AS tech_category, + UNNEST([ + 'Analytics', 'Browser fingerprinting', 'Customer data platform', + 'Geolocation', + 'Advertising', 'Retargeting', 'Personalisation', 'Segmentation', + 'Cookie compliance' + ]) AS category +WHERE + date = '2025-07-01' AND + is_root_page = TRUE +GROUP BY + client, + category +ORDER BY + pct_pages DESC diff --git a/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql new file mode 100644 index 00000000000..67068c3fca4 --- /dev/null +++ b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql @@ -0,0 +1,32 @@ +# Percent of websites using a fingerprinting library based on wappalyzer category +WITH totals AS ( + SELECT + client, + COUNT(DISTINCT page) AS total_websites + FROM httparchive.crawl.pages + WHERE + date = '2025-07-01' + GROUP BY + client +) + +SELECT + client, + technology.technology, + total_websites, + COUNT(DISTINCT page) AS number_of_websites, + COUNT(DISTINCT page) / total_websites AS percent_of_websites +FROM httparchive.crawl.pages +JOIN totals USING (client), + UNNEST(technologies) AS technology, + UNNEST(technology.categories) AS category +WHERE + date = '2025-07-01' AND + category = 'Browser fingerprinting' +GROUP BY + client, + total_websites, + technology +ORDER BY + client, + number_of_websites DESC diff --git a/sql/2025/privacy/number_of_websites_with_client_hints.sql b/sql/2025/privacy/number_of_websites_with_client_hints.sql new file mode 100644 index 00000000000..7953b46f929 --- /dev/null +++ b/sql/2025/privacy/number_of_websites_with_client_hints.sql @@ -0,0 +1,44 @@ +WITH response_headers AS ( + SELECT + client, + page, + LOWER(response_header.name) AS header_name, + LOWER(response_header.value) AS header_value, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites + FROM `httparchive.all.requests`, + UNNEST(response_headers) response_header + WHERE + date = '2025-07-01' AND + is_main_document = TRUE +), + +meta_tags AS ( + SELECT + client, + page, + LOWER(JSON_VALUE(meta_node, '$.http-equiv')) AS tag_name, + LOWER(JSON_VALUE(meta_node, '$.content')) AS tag_value + FROM ( + SELECT + client, + page, + JSON_VALUE(custom_metrics, '$.almanac') AS metrics + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' + ), + UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node + WHERE JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL +) + +SELECT + client, + COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages, + COUNT(DISTINCT page) AS number_of_pages +FROM response_headers +FULL OUTER JOIN meta_tags +USING (client, page) +WHERE + header_name = 'accept-ch' OR + tag_name = 'accept-ch' +GROUP BY client +ORDER BY pct_pages DESC diff --git a/sql/2025/privacy/number_of_websites_with_dnt.sql b/sql/2025/privacy/number_of_websites_with_dnt.sql new file mode 100644 index 00000000000..8cace44174e --- /dev/null +++ b/sql/2025/privacy/number_of_websites_with_dnt.sql @@ -0,0 +1,34 @@ +# Pages that request DNT status + +WITH blink AS ( + SELECT DISTINCT + client, + num_urls, + pct_urls + FROM `httparchive.blink_features.usage` + WHERE + yyyymmdd = '20250601' AND + feature IN ('NavigatorDoNotTrack') +), + +pages AS ( + SELECT + client, + COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_doNotTrack') = 'true', page, NULL)) AS num_urls, + COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_doNotTrack') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_urls + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE + GROUP BY client +) + +SELECT + COALESCE(blink.client, pages.client) AS client, + blink.num_urls AS number_of_pages_usage_per_blink, + blink.pct_urls AS pct_of_websites_usage_per_blink, + pages.num_urls AS number_of_pages_usage_per_custom_metric, + pages.pct_urls AS pct_of_websites_usage_per_custom_metric +FROM blink +FULL OUTER JOIN pages +ON blink.client = pages.client diff --git a/sql/2025/privacy/number_of_websites_with_gpc.sql b/sql/2025/privacy/number_of_websites_with_gpc.sql new file mode 100644 index 00000000000..2b03afb7ebc --- /dev/null +++ b/sql/2025/privacy/number_of_websites_with_gpc.sql @@ -0,0 +1,34 @@ +# Pages that provide `/.well-known/gpc.json` for Global Privacy Control + +WITH pages AS ( + SELECT + client, + COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_well_known, + COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) AS number_of_pages_well_known, + COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_js_api, + COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) AS number_of_pages_js_api + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE + GROUP BY client +), + +headers AS ( + SELECT + client, + COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_headers, + COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) AS number_of_pages_headers + FROM `httparchive.all.requests`, + UNNEST(response_headers) headers + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + is_main_document = TRUE + GROUP BY client +) + +SELECT * +FROM pages +FULL OUTER JOIN headers +USING (client) diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql new file mode 100644 index 00000000000..a2090b5df38 --- /dev/null +++ b/sql/2025/privacy/number_of_websites_with_iab.sql @@ -0,0 +1,61 @@ +# Counts of pages with IAB Frameworks +# TODO: check presence of multiple frameworks per page + +WITH privacy_custom_metrics_data AS ( + SELECT + client, + JSON_QUERY(custom_metrics, '$.privacy') AS metrics + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +) + +SELECT + client, + number_of_pages_with_tcfv1 / number_of_pages AS pct_pages_with_tcfv1, + number_of_pages_with_tcfv1, + number_of_pages_with_tcfv2 / number_of_pages AS pct_pages_with_tcfv2, + number_of_pages_with_tcfv2, + number_of_pages_with_usp / number_of_pages AS pct_pages_with_usp, + number_of_pages_with_usp, + number_of_pages_with_tcf / number_of_pages AS pct_pages_with_tcf, + number_of_pages_with_tcf, + number_of_pages_with_any / number_of_pages AS pct_pages_with_any, + number_of_pages_with_any, + number_of_pages_with_tcfv1_compliant / number_of_pages AS pct_pages_with_tcfv1_compliant, + number_of_pages_with_tcfv1_compliant, + number_of_pages_with_tcfv2_compliant / number_of_pages AS pct_pages_with_tcfv2_compliant, + number_of_pages_with_tcfv2_compliant, + number_of_pages_with_gpp / number_of_pages AS pct_pages_with_gpp, + number_of_pages_with_gpp, + number_of_pages_with_gpp_data / number_of_pages AS pct_pages_with_gpp_data, + number_of_pages_with_gpp_data +FROM ( + SELECT + client, + COUNT(0) AS number_of_pages, + COUNTIF(tcfv1) AS number_of_pages_with_tcfv1, + COUNTIF(tcfv2) AS number_of_pages_with_tcfv2, + COUNTIF(usp) AS number_of_pages_with_usp, + COUNTIF(tcfv1 OR tcfv2) AS number_of_pages_with_tcf, + COUNTIF(tcfv1 OR tcfv2 OR usp OR gpp) AS number_of_pages_with_any, + COUNTIF(tcfv1 AND tcfv1_compliant) AS number_of_pages_with_tcfv1_compliant, + COUNTIF(tcfv2 AND tcfv2_compliant) AS number_of_pages_with_tcfv2_compliant, + COUNTIF(gpp) AS number_of_pages_with_gpp, + COUNTIF(gpp_data) AS number_of_pages_with_gpp_data + FROM ( + SELECT + client, + JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true' AS tcfv1, + JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true' AS tcfv2, + JSON_VALUE(metrics, '$.iab_gpp.present') = 'true' AS gpp, + JSON_VALUE(metrics, '$.iab_usp.present') = 'true' AS usp, + JSON_VALUE(metrics, '$.iab_tcf_v1.compliant_setup') = 'true' AS tcfv1_compliant, + JSON_VALUE(metrics, '$.iab_tcf_v2.compliant_setup') = 'true' AS tcfv2_compliant, + JSON_VALUE(metrics, '$.iab_gpp.data') IS NOT NULL AS gpp_data + FROM + privacy_custom_metrics_data + ) + GROUP BY client +) diff --git a/sql/2025/privacy/number_of_websites_with_nb_trackers.sql b/sql/2025/privacy/number_of_websites_with_nb_trackers.sql new file mode 100644 index 00000000000..e8570c65e93 --- /dev/null +++ b/sql/2025/privacy/number_of_websites_with_nb_trackers.sql @@ -0,0 +1,96 @@ +# Number of websites that deploy a certain number of trackers +WITH whotracksme AS ( + SELECT + domain, + category, + tracker + FROM almanac.whotracksme + WHERE date = '2025-07-01' +), + +totals AS ( + SELECT + client, + COUNT(DISTINCT page) AS total_websites + FROM httparchive.crawl.requests + WHERE date = '2025-07-01' + GROUP BY client +) + +SELECT + client, + 'any' AS type, + number_of_trackers, + COUNT(DISTINCT page) AS number_of_websites, + total_websites, + COUNT(DISTINCT page) / total_websites AS pct_websites +FROM ( + SELECT + client, + page, + COUNT(DISTINCT tracker) AS number_of_trackers + FROM httparchive.crawl.requests + JOIN whotracksme + ON ( + NET.HOST(url) = domain OR + ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) + ) + WHERE + date = '2025-07-01' AND + NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) + GROUP BY + client, + page +) +JOIN + totals +USING (client) +GROUP BY + client, + number_of_trackers, + total_websites +UNION ALL +SELECT + client, + 'any_tracker' AS type, + number_of_trackers, + COUNT(DISTINCT page) AS number_of_websites, + total_websites, + COUNT(DISTINCT page) / total_websites AS pct_websites +FROM ( + SELECT + client, + page, + COUNT(DISTINCT tracker) AS number_of_trackers + FROM httparchive.almanac.requests + JOIN + whotracksme + ON ( + NET.HOST(urlShort) = domain OR + ENDS_WITH(NET.HOST(urlShort), CONCAT('.', domain)) + ) + WHERE + date = '2025-07-01' AND + NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) AND -- third party + ( + -- categories selected from https://whotracks.me/blog/tracker_categories.html + whotracksme.category = 'advertising' OR + whotracksme.category = 'pornvertising' OR + whotracksme.category = 'site_analytics' OR + whotracksme.category = 'social_media' + ) + GROUP BY + client, + page +) +JOIN + totals +USING (client) +GROUP BY + client, + number_of_trackers, + total_websites +ORDER BY + client, + type, + number_of_trackers diff --git a/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql new file mode 100644 index 00000000000..0b91d2169ab --- /dev/null +++ b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql @@ -0,0 +1,88 @@ +WITH referrer_policy_custom_metrics AS ( + SELECT + client, + page, + JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.entire_document_policy') AS meta_policy, + ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.referrerPolicy.individual_requests')) > 0 AS individual_requests, + CAST(JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.link_relations.A') AS INT64) > 0 AS link_relations + FROM + `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +), + +referrer_policy_headers AS ( + SELECT + client, + page, + LOWER(response_header.value) AS header_policy + FROM + `httparchive.all.requests`, + UNNEST(response_headers) AS response_header + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + is_main_document = TRUE AND + response_header.name = 'referrer-policy' +) + +SELECT + client, + number_of_pages_with_entire_document_policy_meta / number_of_pages AS pct_pages_with_entire_document_policy_meta, + number_of_pages_with_entire_document_policy_meta, + number_of_pages_with_entire_document_policy_header / number_of_pages AS pct_pages_with_entire_document_policy_header, + number_of_pages_with_entire_document_policy_header, + number_of_pages_with_entire_document_policy / number_of_pages AS pct_pages_with_entire_document_policy, + number_of_pages_with_entire_document_policy, + number_of_pages_with_any_individual_requests / number_of_pages AS pct_pages_with_any_individual_requests, + number_of_pages_with_any_individual_requests, + number_of_pages_with_any_link_relations / number_of_pages AS pct_pages_with_any_link_relations, + number_of_pages_with_any_link_relations, + number_of_pages_with_any_referrer_policy / number_of_pages AS pct_pages_with_any_referrer_policy, + number_of_pages_with_any_referrer_policy +FROM ( + SELECT + client, + COUNT(DISTINCT page) AS number_of_pages, + COUNT(DISTINCT IF( + meta_policy IS NOT NULL, + page, NULL + )) AS number_of_pages_with_entire_document_policy_meta, + COUNT(DISTINCT IF( + header_policy IS NOT NULL, + page, NULL + )) AS number_of_pages_with_entire_document_policy_header, + COUNT( + DISTINCT IF( + meta_policy IS NOT NULL OR + header_policy IS NOT NULL, + page, NULL + ) + ) AS number_of_pages_with_entire_document_policy, + COUNT(DISTINCT IF( + individual_requests, + page, NULL + )) AS number_of_pages_with_any_individual_requests, + COUNT(DISTINCT IF( + link_relations, + page, NULL + )) AS number_of_pages_with_any_link_relations, + COUNT( + DISTINCT IF( + meta_policy IS NOT NULL OR + header_policy IS NOT NULL OR + individual_requests OR + link_relations, + page, NULL + ) + ) AS number_of_pages_with_any_referrer_policy + FROM + referrer_policy_custom_metrics + FULL OUTER JOIN + referrer_policy_headers + USING (client, page) + GROUP BY client +) +ORDER BY + client diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql new file mode 100644 index 00000000000..591c302fa86 --- /dev/null +++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql @@ -0,0 +1,105 @@ +# Pages that participate in the privacy-relayed origin trials +CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS ( + SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) +); + +CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) +RETURNS STRUCT< + token STRING, + origin STRING, + feature STRING, + expiry TIMESTAMP, + is_subdomain BOOL, + is_third_party BOOL +> AS ( + STRUCT( + DECODE_ORIGIN_TRIAL(token) AS token, + JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin, + JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature, + TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry, + JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain, + JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party + ) +); + +WITH pages AS ( + SELECT + client, + page, + JSON_QUERY(custom_metrics, '$.origin-trials') AS ot_metrics, + JSON_QUERY(custom_metrics, '$.almanac') AS almanac_metrics + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +), + +response_headers AS ( + SELECT + client, + page, + PARSE_ORIGIN_TRIAL(response_header.value) AS ot -- may not lowercase this value as it is a base64 string + FROM `httparchive.all.requests`, + UNNEST(response_headers) response_header + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + is_main_document = TRUE AND + LOWER(response_header.name) = 'origin-trial' +), + +meta_tags AS ( + SELECT + client, + page, + PARSE_ORIGIN_TRIAL(JSON_VALUE(meta_node, '$.content')) AS ot -- may not lowercase this value as it is a base64 string + FROM pages, + UNNEST(JSON_QUERY_ARRAY(almanac_metrics, '$.meta-nodes.nodes')) meta_node + WHERE + LOWER(JSON_VALUE(meta_node, '$.http-equiv')) = 'origin-trial' +), + +ot_from_custom_metric AS ( + SELECT + client, + page, + PARSE_ORIGIN_TRIAL(JSON_VALUE(metric, '$.token')) AS ot + FROM pages, + UNNEST(JSON_QUERY_ARRAY(ot_metrics)) metric +) + +SELECT + client, + feature, + number_of_pages / total_pages AS pct_pages, + number_of_pages, + is_active +FROM ( + SELECT + client, + ot.feature, + ot.expiry >= CURRENT_TIMESTAMP() AS is_active, + COUNT(DISTINCT page) AS number_of_pages + FROM ( + SELECT * FROM response_headers + UNION ALL + SELECT * FROM meta_tags + UNION ALL + SELECT * FROM ot_from_custom_metric + ) + GROUP BY + client, + feature, + is_active +) +LEFT JOIN ( + SELECT + client, + COUNT(DISTINCT page) AS total_pages + FROM pages + GROUP BY + client +) +USING (client) +ORDER BY + number_of_pages DESC diff --git a/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql b/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql new file mode 100644 index 00000000000..209061c0e72 --- /dev/null +++ b/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql @@ -0,0 +1,43 @@ +WITH whotracksme AS ( + SELECT + domain, + category, + tracker + FROM `max-ostapenko.Public.whotracksme` + WHERE date = '2025-07-01' +), + +pre_aggregated AS ( + SELECT + client, + category, + page, + tracker, + COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages + FROM `httparchive.all.requests` + JOIN whotracksme + ON NET.REG_DOMAIN(url) = domain + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) -- third party + GROUP BY + client, + category, + tracker, + page +) + +SELECT + client, + category, + tracker, + COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages, + COUNT(DISTINCT page) AS number_of_pages +FROM pre_aggregated +GROUP BY + client, + category, + tracker +ORDER BY + pct_pages DESC diff --git a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql new file mode 100644 index 00000000000..9d46cd2c71e --- /dev/null +++ b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql @@ -0,0 +1,150 @@ +#standardSQL +# Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers + +-- Extracting third-parties observed using PS APIs on a publisher +CREATE TEMP FUNCTION jsonObjectKeys(input STRING) +RETURNS ARRAY +LANGUAGE js AS """ + if (!input) { + return []; + } + return Object.keys(JSON.parse(input)); +"""; + +-- Extracting PS APIs being called by a given third-party (passed as "key") +CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING) +RETURNS ARRAY +LANGUAGE js AS """ + if (!input) { + return []; + } + const jsonObject = JSON.parse(input); + const values = jsonObject[key] || []; + + function splitByDelimiters(value) { + const delimiterRegex = new RegExp(',|, |\\n|\\u0000', 'g'); + return value.split(delimiterRegex).map(v => v.trim()).filter(v => v); + } + + const result = []; + const replacements = { + 'Ch': 'CH', 'Ua': 'UA', 'Wow64': 'WoW64', 'Dpr': 'DPR', 'Rtt': 'RTT', 'Ect': 'ECT', 'Etc': 'ETC', '-Architecture': '-Arch', '-Arc': '-Arch', '-Archh': '-Arch', + '-Factors': '-Factor', '-ETC': '-ECT', '-Modal': '-Model', '-UA-UA': '-UA', '-UAm': '-UA', 'UAmodel': 'UA-Model', 'UAplatform': 'UA-Platform', 'Secch-UA': 'Sec-CH-UA', + 'CH-Width': 'CH-Viewport-Width', '-UAodel': '-UA-Model', '-Platformua-Platform': '-Platform', '-Platformuser-Agent': '-Platform', '-Version"': '-Version' + }; + values.forEach(value => { + if (value.startsWith('accept-ch|')) { + const parts = splitByDelimiters(value.replace('accept-ch|', '')); + parts.forEach(part => { + if (["UA", "Arch", "Bitness", "Full-Version-List", "Mobile", "Model", "Platform", "Platform-Version", "WoW64"].includes(part)) { + result.push("Sec-CH-UA-" + part); + } else { + let formattedPart = part.split('-').map(segment => + segment.charAt(0).toUpperCase() + segment.slice(1).toLowerCase() + ).join('-'); + for (const [key, value] of Object.entries(replacements)) { + formattedPart = formattedPart.replace(new RegExp(key, 'g'), value); + } + result.push(formattedPart); + } + }); + } else { + result.push(value); + } + }); + + return result; +"""; + +WITH privacy_sandbox_features AS ( + SELECT + client, + CASE + WHEN rank <= 1000 THEN '1000' + WHEN rank <= 10000 THEN '10000' + WHEN rank <= 100000 THEN '100000' + WHEN rank <= 1000000 THEN '1000000' + WHEN rank <= 10000000 THEN '10000000' + ELSE 'Other' + END AS rank_group, + NET.REG_DOMAIN(page) AS publisher, + third_party_domain, + CASE + WHEN api LIKE '%opics%|%' + THEN + REPLACE(SUBSTR(api, 0, STRPOS(api, '|') - 1) || '-' || SPLIT(api, '|')[SAFE_OFFSET(1)], '|', '-') + WHEN api LIKE 'attribution-reporting-register-source%' + THEN + SPLIT(api, '|')[OFFSET(0)] + ELSE + api + END AS feature + FROM `httparchive.crawl.pages`, + UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain, + UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS api + WHERE + date = '2025-07-01' AND + is_root_page = TRUE +), + +grouped_features AS ( + SELECT + rank_group, + feature, + COUNT(DISTINCT publisher) AS publisher_count, + COUNT(DISTINCT third_party_domain) AS third_party_count + FROM privacy_sandbox_features + GROUP BY rank_group, feature +), + +aggregated_features AS ( + SELECT + feature, + SUM(CASE WHEN rank_group = '1000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_1000, + SUM(CASE WHEN rank_group = '1000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_1000, + SUM(CASE WHEN rank_group = '1000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_1000, + SUM(CASE WHEN rank_group = '1000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_1000, + SUM(CASE WHEN rank_group = '10000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_10000, + SUM(CASE WHEN rank_group = '10000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_10000, + SUM(CASE WHEN rank_group = '10000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_10000, + SUM(CASE WHEN rank_group = '10000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_10000, + SUM(CASE WHEN rank_group = '100000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_100000, + SUM(CASE WHEN rank_group = '100000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_100000, + SUM(CASE WHEN rank_group = '100000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_100000, + SUM(CASE WHEN rank_group = '100000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_100000, + SUM(CASE WHEN rank_group = '1000000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_1000000, + SUM(CASE WHEN rank_group = '1000000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_1000000, + SUM(CASE WHEN rank_group = '1000000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_1000000, + SUM(CASE WHEN rank_group = '1000000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_1000000, + SUM(CASE WHEN rank_group = '10000000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_10000000, + SUM(CASE WHEN rank_group = '10000000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_10000000, + SUM(CASE WHEN rank_group = '10000000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_10000000, + SUM(CASE WHEN rank_group = '10000000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_10000000 + FROM grouped_features + GROUP BY feature +) + +SELECT + feature AS privacy_sandbox_features, + total_publisher_leq_1000, + distinct_publisher_leq_1000, + total_third_parties_leq_1000, + distinct_third_parties_leq_1000, + total_publisher_leq_10000, + distinct_publisher_leq_10000, + total_third_parties_leq_10000, + distinct_third_parties_leq_10000, + total_publisher_leq_100000, + distinct_publisher_leq_100000, + total_third_parties_leq_100000, + distinct_third_parties_leq_100000, + total_publisher_leq_1000000, + distinct_publisher_leq_1000000, + total_third_parties_leq_1000000, + distinct_third_parties_leq_1000000, + total_publisher_leq_10000000, + distinct_publisher_leq_10000000, + total_third_parties_leq_10000000, + distinct_third_parties_leq_10000000 +FROM aggregated_features +ORDER BY feature; diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql new file mode 100644 index 00000000000..6e192dda53a --- /dev/null +++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql @@ -0,0 +1,83 @@ +#standardSQL +# Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level) + +-- Extracting third-parties observed using ARA API on a publisher +CREATE TEMP FUNCTION jsonObjectKeys(input STRING) +RETURNS ARRAY +LANGUAGE js AS """ + if (!input) { + return []; + } + return Object.keys(JSON.parse(input)); +"""; + +-- Extracting ARA API source registration details being passed by a given third-party (passed as "key") +CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING) +RETURNS ARRAY +LANGUAGE js AS """ + if (!input) { + return []; + } + const jsonObject = JSON.parse(input); + const values = jsonObject[key] || []; + const result = []; + + values.forEach(value => { + if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) { + const parts = value.replace('attribution-reporting-register-source|', '').split('|'); + parts.forEach(part => { + if (part.startsWith('destination=')) { + const destinations = part.replace('destination=', '').split(','); + destinations.forEach(destination => { + result.push('destination=' + destination.trim()); + }); + } else { + result.push(part.trim()); + } + }); + } + }); + + return result; +"""; + +WITH ara_features AS ( + SELECT + client, + CASE + WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', '')) + ELSE NULL + END AS destination, + COUNT(NET.REG_DOMAIN(page)) AS total_publishers, + COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_publishers, + COUNT(third_party_domain) AS total_third_party_domains, + COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains + FROM `httparchive.crawl.pages`, + UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain, + UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + ara LIKE 'destination%' + GROUP BY client, destination + HAVING destination IS NOT NULL +), + +ranked_features AS ( + SELECT + client, + destination, + total_publishers, + distinct_publishers, + total_third_party_domains, + distinct_third_party_domains, + ROW_NUMBER() OVER ( + PARTITION BY client + ORDER BY distinct_publishers DESC + ) AS publisher_rank + FROM ara_features +) + +SELECT * FROM ranked_features +WHERE publisher_rank <= 25 +ORDER BY client, distinct_publishers DESC; diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql new file mode 100644 index 00000000000..67c9142326f --- /dev/null +++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql @@ -0,0 +1,83 @@ +#standardSQL +# Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level) + +-- Extracting third-parties observed using ARA API on a publisher +CREATE TEMP FUNCTION jsonObjectKeys(input STRING) +RETURNS ARRAY +LANGUAGE js AS """ + if (!input) { + return []; + } + return Object.keys(JSON.parse(input)); +"""; + +-- Extracting ARA API source registration details being passed by a given third-party (passed as "key") +CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING) +RETURNS ARRAY +LANGUAGE js AS """ + if (!input) { + return []; + } + const jsonObject = JSON.parse(input); + const values = jsonObject[key] || []; + const result = []; + + values.forEach(value => { + if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) { + const parts = value.replace('attribution-reporting-register-source|', '').split('|'); + parts.forEach(part => { + if (part.startsWith('destination=')) { + const destinations = part.replace('destination=', '').split(','); + destinations.forEach(destination => { + result.push('destination=' + destination.trim()); + }); + } else { + result.push(part.trim()); + } + }); + } + }); + + return result; +"""; + +WITH ara_features AS ( + SELECT + client, + CASE + WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', '')) + ELSE NULL + END AS destination, + COUNT(NET.REG_DOMAIN(page)) AS total_publishers, + COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_publishers, + COUNT(third_party_domain) AS total_third_party_domains, + COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains + FROM `httparchive.crawl.pages`, + UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain, + UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara + WHERE + date = '2025-07-01' AND + is_root_page = TRUE AND + ara LIKE 'destination%' + GROUP BY client, destination + HAVING destination IS NOT NULL +), + +ranked_features AS ( + SELECT + client, + destination, + total_publishers, + distinct_publishers, + total_third_party_domains, + distinct_third_party_domains, + ROW_NUMBER() OVER ( + PARTITION BY client + ORDER BY distinct_third_party_domains DESC + ) AS third_party_domain_rank + FROM ara_features +) + +SELECT * FROM ranked_features +WHERE third_party_domain_rank <= 25 +ORDER BY client, distinct_third_party_domains DESC; diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index e32047eecb3..b95cd9eab1a 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -1,216 +1,216 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "OVkCxlRQH6Yt", - "outputId": "0e907d5e-3824-4b0c-935d-81e629702390" - }, - "outputs": [], - "source": [ - "# @title Download repo\n", - "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "U37785Bxt5tE" - }, - "outputs": [], - "source": [ - "# @title Configure the chapter to process\n", - "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n", - "almanac_year = 2024 #@param {type: \"integer\"}\n", - "chapter_name = 'privacy' #@param {type: \"string\"}\n", - "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/18r8cT6x9lPdM-rXvXjsqx84W7ZDdTDYGD59xr0UGOwg/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UzhgG5xvbQ1E", - "outputId": "9cf3ef02-ec76-43ac-cd63-03edf7f2f619" - }, - "outputs": [], - "source": [ - "# @title Update chapter branch\n", - "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n", - "!cd almanac.httparchive.org/ && git checkout $branch_name && git pull" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "45dBifFPJAtO" - }, - "outputs": [], - "source": [ - "# @title Authenticate\n", - "import google.auth\n", - "import os\n", - "from google.colab import auth\n", - "from google.cloud import bigquery\n", - "\n", - "import gspread\n", - "from gspread_dataframe import set_with_dataframe\n", - "\n", - "\n", - "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n", - "auth.authenticate_user()\n", - "credentials, project = google.auth.default()\n", - "client = bigquery.Client()\n", - "gc = gspread.authorize(credentials)\n", - "\n", - "try:\n", - " ss = gc.open_by_url(spreadsheet_url)\n", - "except:\n", - " print('Spreadsheet not found')" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "collapsed": true, - "id": "nblNil985Tjt", - "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2" - }, - "outputs": [], - "source": [ - "# @title Upload query results\n", - "\n", - "import glob\n", - "import re\n", - "from tabulate import tabulate\n", - "from IPython.display import clear_output\n", - "\n", - "\n", - "filename_match = '(number_of_websites_with_related_origin_trials|most_common_cname_domains)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", - "filename_match_exclude = '(ads_and_sellers_graph)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", - "dry_run = True # @param {type: \"boolean\"}\n", - "overwrite_sheets = True # @param {type: \"boolean\"}\n", - "maximum_tb_billed = None # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n", - "\n", - "filename_include_regexp = r'{}'.format(filename_match)\n", - "filename_exclude_regexp = r'{}'.format(filename_match_exclude)\n", - "folder = r'almanac.httparchive.org/sql/{year}/{chapter}/*.sql'.format(\n", - " year=almanac_year,\n", - " chapter=chapter_name.lower()\n", - ")\n", - "existing_sheets = [s.title for s in ss.worksheets()]\n", - "\n", - "# Print formatted logs\n", - "queries_processed_log = []\n", - "def print_logs_table(log=None, append=True):\n", - " if log:\n", - " queries_processed_log.append(log)\n", - " table = tabulate(queries_processed_log, headers=['Query name', 'TB processed/billed', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n", - " if not append:\n", - " del queries_processed_log[-1]\n", - " clear_output(wait=True)\n", - " print(table)\n", - "\n", - "# Find matching SQL queries and save results to Google Sheets.\n", - "for filepath in sorted(glob.iglob(folder)):\n", - " filename = filepath.split('/')[-1]\n", - "\n", - " print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n", - "\n", - " if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n", - "\n", - " with open(filepath) as f:\n", - " query = f.read()\n", - "\n", - " try:\n", - " response = client.query(\n", - " query,\n", - " job_config = bigquery.QueryJobConfig(dry_run = True)\n", - " )\n", - " except Exception as e:\n", - " print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n", - " continue\n", - "\n", - " tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n", - " sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n", - "\n", - " if sheet_title in existing_sheets:\n", - " if overwrite_sheets:\n", - " st = ss.worksheet(sheet_title)\n", - " else:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n", - " continue\n", - "\n", - " if dry_run:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n", - " continue\n", - "\n", - " try:\n", - " if maximum_tb_billed:\n", - " response = client.query(\n", - " query,\n", - " job_config = bigquery.QueryJobConfig(\n", - " maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n", - " )\n", - " )\n", - " else:\n", - " response = client.query(query)\n", - "\n", - " df = response.to_dataframe()\n", - " if ('st' not in locals() or st.title != sheet_title):\n", - " st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n", - " set_with_dataframe(st, df, resize=False)\n", - "\n", - " tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n", - " print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n", - "\n", - " except Exception as e:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n", - " continue\n", - "\n", - " else:\n", - " print_logs_table([filename, None, None, 'Filename mismatch'])" - ] - } - ], - "metadata": { + "id": "OVkCxlRQH6Yt", + "outputId": "0e907d5e-3824-4b0c-935d-81e629702390" + }, + "outputs": [], + "source": [ + "# @title Download repo\n", + "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "U37785Bxt5tE" + }, + "outputs": [], + "source": [ + "# @title Configure the chapter to process\n", + "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n", + "almanac_year = 2024 #@param {type: \"integer\"}\n", + "chapter_name = 'privacy' #@param {type: \"string\"}\n", + "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/" }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" + "id": "UzhgG5xvbQ1E", + "outputId": "9cf3ef02-ec76-43ac-cd63-03edf7f2f619" + }, + "outputs": [], + "source": [ + "# @title Update chapter branch\n", + "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n", + "!cd almanac.httparchive.org/ && git checkout $branch_name && git pull" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "45dBifFPJAtO" + }, + "outputs": [], + "source": [ + "# @title Authenticate\n", + "import google.auth\n", + "import os\n", + "from google.colab import auth\n", + "from google.cloud import bigquery\n", + "\n", + "import gspread\n", + "from gspread_dataframe import set_with_dataframe\n", + "\n", + "\n", + "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n", + "auth.authenticate_user()\n", + "credentials, project = google.auth.default()\n", + "client = bigquery.Client()\n", + "gc = gspread.authorize(credentials)\n", + "\n", + "try:\n", + " ss = gc.open_by_url(spreadsheet_url)\n", + "except:\n", + " print('Spreadsheet not found')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python", - "version": "3.12.4" - } + "collapsed": true, + "id": "nblNil985Tjt", + "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2" + }, + "outputs": [], + "source": [ + "# @title Upload query results\n", + "\n", + "import glob\n", + "import re\n", + "from tabulate import tabulate\n", + "from IPython.display import clear_output\n", + "\n", + "\n", + "filename_match = '(number_of_websites_with_related_origin_trials|most_common_cname_domains)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match_exclude = '(ads_and_sellers_graph)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "dry_run = True # @param {type: \"boolean\"}\n", + "overwrite_sheets = True # @param {type: \"boolean\"}\n", + "maximum_tb_billed = None # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n", + "\n", + "filename_include_regexp = r'{}'.format(filename_match)\n", + "filename_exclude_regexp = r'{}'.format(filename_match_exclude)\n", + "folder = r'almanac.httparchive.org/sql/{year}/{chapter}/*.sql'.format(\n", + " year=almanac_year,\n", + " chapter=chapter_name.lower()\n", + ")\n", + "existing_sheets = [s.title for s in ss.worksheets()]\n", + "\n", + "# Print formatted logs\n", + "queries_processed_log = []\n", + "def print_logs_table(log=None, append=True):\n", + " if log:\n", + " queries_processed_log.append(log)\n", + " table = tabulate(queries_processed_log, headers=['Query name', 'TB processed/billed', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n", + " if not append:\n", + " del queries_processed_log[-1]\n", + " clear_output(wait=True)\n", + " print(table)\n", + "\n", + "# Find matching SQL queries and save results to Google Sheets.\n", + "for filepath in sorted(glob.iglob(folder)):\n", + " filename = filepath.split('/')[-1]\n", + "\n", + " print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n", + "\n", + " if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n", + "\n", + " with open(filepath) as f:\n", + " query = f.read()\n", + "\n", + " try:\n", + " response = client.query(\n", + " query,\n", + " job_config = bigquery.QueryJobConfig(dry_run = True)\n", + " )\n", + " except Exception as e:\n", + " print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n", + " continue\n", + "\n", + " tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n", + " sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n", + "\n", + " if sheet_title in existing_sheets:\n", + " if overwrite_sheets:\n", + " st = ss.worksheet(sheet_title)\n", + " else:\n", + " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n", + " continue\n", + "\n", + " if dry_run:\n", + " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n", + " continue\n", + "\n", + " try:\n", + " if maximum_tb_billed:\n", + " response = client.query(\n", + " query,\n", + " job_config = bigquery.QueryJobConfig(\n", + " maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n", + " )\n", + " )\n", + " else:\n", + " response = client.query(query)\n", + "\n", + " df = response.to_dataframe()\n", + " if ('st' not in locals() or st.title != sheet_title):\n", + " st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n", + " set_with_dataframe(st, df, resize=False)\n", + "\n", + " tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n", + " print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n", + "\n", + " except Exception as e:\n", + " print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n", + " continue\n", + "\n", + " else:\n", + " print_logs_table([filename, None, None, 'Filename mismatch'])" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "name": "python", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } From 943ae2805c982da03b3be84f0521eef68c5876fe Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sat, 2 Aug 2025 01:48:51 +0200 Subject: [PATCH 02/27] query updates --- sql/2025/privacy/ccpa_most_common_phrases.sql | 31 -------------- sql/2025/privacy/ccpa_prevalence.sql | 27 ------------ .../privacy/cookies_top_first_party_names.sql | 10 ++--- .../cookies_top_third_party_domains.sql | 6 +-- .../privacy/cookies_top_third_party_names.sql | 8 ++-- .../privacy/easylist-tracker-detection.sql | 41 ------------------- .../fingerprinting_most_common_apis.sql | 36 ---------------- .../fingerprinting_most_common_scripts.sql | 23 ----------- .../privacy/fingerprinting_script_count.sql | 21 ---------- sql/2025/privacy/most_common_client_hints.sql | 14 +++---- .../most_common_cmps_for_iab_tcf_v2.sql | 2 +- .../privacy/most_common_cname_domains.sql | 13 +++--- .../privacy/most_common_referrer_policy.sql | 4 +- .../most_common_strings_for_iab_usp.sql | 2 +- ...stered_by_third_parties_and_publishers.sql | 19 ++------- ...er_of_privacy_sandbox_attested_domains.sql | 4 +- .../number_of_websites_with_client_hints.sql | 12 +++--- .../privacy/number_of_websites_with_dnt.sql | 6 +-- .../privacy/number_of_websites_with_gpc.sql | 10 ++--- .../privacy/number_of_websites_with_iab.sql | 16 ++++---- .../number_of_websites_with_nb_trackers.sql | 15 +++---- ...number_of_websites_with_referrerpolicy.sql | 12 +++--- ...of_websites_with_related_origin_trials.sql | 14 +++---- ..._of_websites_with_whotracksme_trackers.sql | 4 +- ...doption-by-third-parties-by-publishers.sql | 19 ++------- ...inations_registered_by_most_publishers.sql | 19 ++------- ...tions_registered_by_most_third_parties.sql | 19 ++------- 27 files changed, 91 insertions(+), 316 deletions(-) delete mode 100644 sql/2025/privacy/ccpa_most_common_phrases.sql delete mode 100644 sql/2025/privacy/ccpa_prevalence.sql delete mode 100644 sql/2025/privacy/easylist-tracker-detection.sql delete mode 100644 sql/2025/privacy/fingerprinting_most_common_apis.sql delete mode 100644 sql/2025/privacy/fingerprinting_most_common_scripts.sql delete mode 100644 sql/2025/privacy/fingerprinting_script_count.sql diff --git a/sql/2025/privacy/ccpa_most_common_phrases.sql b/sql/2025/privacy/ccpa_most_common_phrases.sql deleted file mode 100644 index ae38070672d..00000000000 --- a/sql/2025/privacy/ccpa_most_common_phrases.sql +++ /dev/null @@ -1,31 +0,0 @@ -WITH pages_with_phrase AS ( - SELECT - client, - rank_grouping, - page, - COUNT(DISTINCT page) OVER (PARTITION BY client, rank_grouping) AS total_pages_with_phrase_in_rank_group, - JSON_QUERY_ARRAY(custom_metrics, '$.privacy.ccpa_link.CCPALinkPhrases') AS ccpa_link_phrases - FROM `httparchive.crawl.pages`, --TABLESAMPLE SYSTEM (0.01 PERCENT) - UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping - WHERE date = '2025-07-01' AND - is_root_page = true AND - rank <= rank_grouping AND - array_length(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.ccpa_link.CCPALinkPhrases')) > 0 -) - -SELECT - client, - rank_grouping, - link_phrase, - COUNT(DISTINCT page) AS num_pages, - COUNT(DISTINCT page) / any_value(total_pages_with_phrase_in_rank_group) AS pct_pages -FROM pages_with_phrase, - UNNEST(ccpa_link_phrases) AS link_phrase -GROUP BY - link_phrase, - rank_grouping, - client -ORDER BY - rank_grouping, - client, - num_pages DESC diff --git a/sql/2025/privacy/ccpa_prevalence.sql b/sql/2025/privacy/ccpa_prevalence.sql deleted file mode 100644 index c51db559ae7..00000000000 --- a/sql/2025/privacy/ccpa_prevalence.sql +++ /dev/null @@ -1,27 +0,0 @@ -WITH pages AS ( - SELECT - client, - rank_grouping, - page, - JSON_VALUE(custom_metrics, '$.privacy.ccpa_link.hasCCPALink') AS has_ccpa_link - FROM `httparchive.crawl.pages`, -- TABLESAMPLE SYSTEM (0.0025 PERCENT) - UNNEST([1000, 10000, 100000, 1000000, 10000000, 100000000]) AS rank_grouping - WHERE date = '2025-07-01' AND - is_root_page = true AND - rank <= rank_grouping -) - -SELECT - client, - rank_grouping, - has_ccpa_link, - COUNT(DISTINCT page) AS num_pages -FROM pages -GROUP BY - has_ccpa_link, - rank_grouping, - client -ORDER BY - rank_grouping, - client, - has_ccpa_link diff --git a/sql/2025/privacy/cookies_top_first_party_names.sql b/sql/2025/privacy/cookies_top_first_party_names.sql index 5b310e6fb75..c9d689c6c06 100644 --- a/sql/2025/privacy/cookies_top_first_party_names.sql +++ b/sql/2025/privacy/cookies_top_first_party_names.sql @@ -5,7 +5,7 @@ WITH pages AS ( client, root_page, custom_metrics, - COUNT(DISTINCT net.host(root_page)) OVER (PARTITION BY client) AS total_domains + COUNT(DISTINCT NET.HOST(root_page)) OVER (PARTITION BY client) AS total_domains FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' ), @@ -14,18 +14,18 @@ cookies AS ( SELECT client, cookie, - NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host, + NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host, NET.HOST(root_page) AS firstparty_host, total_domains FROM pages, - UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie + UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie ) SELECT client, COUNT(DISTINCT firstparty_host) AS domain_count, - COUNT(DISTINCT firstparty_host) / any_value(total_domains) AS pct_domains, - JSON_VALUE(cookie, '$.name') AS cookie_name + COUNT(DISTINCT firstparty_host) / ANY_VALUE(total_domains) AS pct_domains, + SAFE.STRING(cookie.name) AS cookie_name FROM cookies WHERE firstparty_host LIKE '%' || cookie_host GROUP BY diff --git a/sql/2025/privacy/cookies_top_third_party_domains.sql b/sql/2025/privacy/cookies_top_third_party_domains.sql index c8feb446e42..a4d1ea09aaa 100644 --- a/sql/2025/privacy/cookies_top_third_party_domains.sql +++ b/sql/2025/privacy/cookies_top_third_party_domains.sql @@ -14,18 +14,18 @@ cookies AS ( client, page, cookie, - NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host, + NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host, NET.HOST(root_page) AS firstparty_host, total_pages FROM pages, - UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie + UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie ) SELECT client, cookie_host, COUNT(DISTINCT page) AS page_count, - COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages + COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages FROM cookies WHERE firstparty_host NOT LIKE '%' || cookie_host GROUP BY diff --git a/sql/2025/privacy/cookies_top_third_party_names.sql b/sql/2025/privacy/cookies_top_third_party_names.sql index 981a77da56d..8c5eb2cbc0d 100644 --- a/sql/2025/privacy/cookies_top_third_party_names.sql +++ b/sql/2025/privacy/cookies_top_third_party_names.sql @@ -14,18 +14,18 @@ cookies AS ( SELECT client, cookie, - NET.HOST(JSON_VALUE(cookie, '$.domain')) AS cookie_host, + NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host, NET.HOST(root_page) AS firstparty_host, total_domains FROM pages, - UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.cookies')) AS cookie + UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie ) SELECT client, COUNT(DISTINCT firstparty_host) AS domain_count, - COUNT(DISTINCT firstparty_host) / any_value(total_domains) AS pct_domains, - JSON_VALUE(cookie, '$.name') AS cookie_name + COUNT(DISTINCT firstparty_host) / ANY_VALUE(total_domains) AS pct_domains, + SAFE.STRING(cookie.name) AS cookie_name FROM cookies WHERE firstparty_host NOT LIKE '%' || cookie_host GROUP BY diff --git a/sql/2025/privacy/easylist-tracker-detection.sql b/sql/2025/privacy/easylist-tracker-detection.sql deleted file mode 100644 index 15a9e2f5115..00000000000 --- a/sql/2025/privacy/easylist-tracker-detection.sql +++ /dev/null @@ -1,41 +0,0 @@ -CREATE TEMP FUNCTION -CheckDomainInURL(url STRING, domain STRING) -RETURNS INT64 -LANGUAGE js AS """ - return url.includes(domain) ? 1 : 0; -"""; - --- We need to use the `easylist_adservers.csv` to populate the table to get the list of domains to block --- https://github.com/easylist/easylist/blob/master/easylist/easylist_adservers.txt -WITH easylist_data AS ( - SELECT string_field_0 - FROM `httparchive.almanac.easylist_adservers` -), - -requests_data AS ( - SELECT url - FROM `httparchive.all.requests` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE -), - -block_status AS ( - SELECT - r.url, - MAX( - CASE - WHEN CheckDomainInURL(r.url, e.string_field_0) = 1 THEN 1 - ELSE 0 - END - ) AS should_block - FROM requests_data r - LEFT JOIN easylist_data e - ON CheckDomainInURL(r.url, e.string_field_0) = 1 - GROUP BY r.url -) - -SELECT - COUNT(0) AS blocked_url_count -FROM block_status -WHERE should_block = 1; diff --git a/sql/2025/privacy/fingerprinting_most_common_apis.sql b/sql/2025/privacy/fingerprinting_most_common_apis.sql deleted file mode 100644 index f7d952e0ad5..00000000000 --- a/sql/2025/privacy/fingerprinting_most_common_apis.sql +++ /dev/null @@ -1,36 +0,0 @@ -CREATE TEMP FUNCTION getFingerprintingTypes(input STRING) -RETURNS ARRAY -LANGUAGE js AS """ -if (input) { - try { - return Object.keys(JSON.parse(input)) - } catch (e) { - return [] - } -} else { - return [] -} -"""; - -WITH pages AS ( - SELECT - client, - page, - fingerprinting_type, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages - FROM `httparchive.crawl.pages`, - UNNEST(getFingerprintingTypes(JSON_EXTRACT(custom_metrics, '$.privacy.fingerprinting.counts'))) AS fingerprinting_type - WHERE date = '2025-07-01' -) - -SELECT - client, - fingerprinting_type, - COUNT(DISTINCT page) AS page_count, - COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages -FROM pages -GROUP BY - client, - fingerprinting_type -ORDER BY - page_count DESC diff --git a/sql/2025/privacy/fingerprinting_most_common_scripts.sql b/sql/2025/privacy/fingerprinting_most_common_scripts.sql deleted file mode 100644 index 316c07b50d8..00000000000 --- a/sql/2025/privacy/fingerprinting_most_common_scripts.sql +++ /dev/null @@ -1,23 +0,0 @@ -WITH pages AS ( - SELECT - page, - client, - custom_metrics, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages - FROM `httparchive.crawl.pages` - WHERE date = '2025-07-01' -) - -SELECT - client, - script, - COUNT(DISTINCT page) AS page_count, - COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages -FROM pages, - UNNEST(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.fingerprinting.likelyFingerprintingScripts')) AS script -GROUP BY - client, - script -ORDER BY - page_count DESC -LIMIT 100; diff --git a/sql/2025/privacy/fingerprinting_script_count.sql b/sql/2025/privacy/fingerprinting_script_count.sql deleted file mode 100644 index 3ca08b05326..00000000000 --- a/sql/2025/privacy/fingerprinting_script_count.sql +++ /dev/null @@ -1,21 +0,0 @@ -WITH pages AS ( - SELECT - page, - client, - ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.fingerprinting.likelyFingerprintingScripts')) AS script_count, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages - FROM `httparchive.crawl.pages` - WHERE date = '2025-07-01' -) - -SELECT - script_count, - client, - COUNT(DISTINCT page) AS page_count, - COUNT(DISTINCT page) / any_value(total_pages) AS pct_pages -FROM pages -GROUP BY - script_count, - client -ORDER BY - script_count ASC; diff --git a/sql/2025/privacy/most_common_client_hints.sql b/sql/2025/privacy/most_common_client_hints.sql index 88c2267abf2..8358eb5c884 100644 --- a/sql/2025/privacy/most_common_client_hints.sql +++ b/sql/2025/privacy/most_common_client_hints.sql @@ -6,8 +6,8 @@ WITH response_headers AS ( LOWER(response_header.name) AS header_name, LOWER(response_header.value) AS header_value, COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites - FROM `httparchive.all.requests`, - UNNEST(response_headers) response_header + FROM `httparchive.crawl.requests`, + UNNEST(response_headers) AS response_header WHERE date = '2025-07-01' AND is_root_page = TRUE AND @@ -18,20 +18,20 @@ meta_tags AS ( SELECT client, page, - LOWER(JSON_VALUE(meta_node, '$.http-equiv')) AS tag_name, - LOWER(JSON_VALUE(meta_node, '$.content')) AS tag_value + LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name, + LOWER(SAFE.STRING(meta_node.content)) AS tag_value FROM ( SELECT client, page, - JSON_QUERY(custom_metrics, '$.almanac') AS metrics + custom_metrics.other.almanac AS metrics FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' AND is_root_page = TRUE ), - UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node - WHERE JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL + UNNEST(JSON_QUERY_ARRAY(metrics.`meta-nodes`.nodes)) AS meta_node + WHERE SAFE.STRING(meta_node.`http-equiv`) IS NOT NULL ) SELECT diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql index 53f76c63a2f..e3952f1925c 100644 --- a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql +++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql @@ -6,7 +6,7 @@ WITH cmps AS ( SELECT client, page, - JSON_VALUE(custom_metrics, '$.privacy.iab_tcf_v2.data.cmpId') AS cmpId, + SAFE.STRING(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId, COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages FROM `httparchive.crawl.pages` WHERE diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql index b260273dc7c..625a1895933 100644 --- a/sql/2025/privacy/most_common_cname_domains.sql +++ b/sql/2025/privacy/most_common_cname_domains.sql @@ -1,9 +1,8 @@ # Most common CNAME domains -CREATE TEMP FUNCTION convert_cname_json(json_str STRING) +CREATE TEMP FUNCTION CONVERT_CNAME_JSON(obj JSON) RETURNS ARRAY> LANGUAGE js AS """ try { - const obj = JSON.parse(json_str); const result = []; for (const key in obj) { result.push({ @@ -37,10 +36,10 @@ cnames AS ( SELECT client, cnames.cname, - page - --ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples + page, + ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples FROM `httparchive.crawl.pages`, - UNNEST(convert_cname_json(JSON_QUERY(custom_metrics, '$.privacy.request_hostnames_with_cname'))) AS cnames + UNNEST(CONVERT_CNAME_JSON(custom_metrics.privacy.request_hostnames_with_cname)) AS cnames WHERE date = '2025-07-01' AND NET.REG_DOMAIN(cnames.origin) = NET.REG_DOMAIN(page) AND NET.REG_DOMAIN(cnames.cname) != NET.REG_DOMAIN(page) @@ -65,8 +64,8 @@ cname_stats AS ( NET.REG_DOMAIN(cname) AS cname, adguard_trackers.domain IS NOT NULL AS adguard_known_cname, whotracksme.category AS whotracksme_category, - COUNT(DISTINCT page) AS number_of_pages - --ANY_VALUE(page_examples) + COUNT(DISTINCT page) AS number_of_pages, + ANY_VALUE(page_examples) FROM cnames LEFT JOIN adguard_trackers ON ENDS_WITH(cnames.cname, adguard_trackers.domain) diff --git a/sql/2025/privacy/most_common_referrer_policy.sql b/sql/2025/privacy/most_common_referrer_policy.sql index de96ae3a3a7..2688340af9e 100644 --- a/sql/2025/privacy/most_common_referrer_policy.sql +++ b/sql/2025/privacy/most_common_referrer_policy.sql @@ -17,7 +17,7 @@ referrer_policy_custom_metrics AS ( page, LOWER(TRIM(policy_meta)) AS policy_meta FROM `httparchive.crawl.pages`, - UNNEST(SPLIT(JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.entire_document_policy'), ',')) AS policy_meta + UNNEST(SPLIT(SAFE.STRING(custom_metrics.privacy.referrerPolicy.entire_document_policy), ',')) AS policy_meta WHERE date = '2025-07-01' AND is_root_page = TRUE @@ -29,7 +29,7 @@ response_headers AS ( page, LOWER(response_header.name) AS name, LOWER(response_header.value) AS value - FROM `httparchive.all.requests`, + FROM `httparchive.crawl.requests`, UNNEST(response_headers) AS response_header WHERE date = '2025-07-01' AND diff --git a/sql/2025/privacy/most_common_strings_for_iab_usp.sql b/sql/2025/privacy/most_common_strings_for_iab_usp.sql index d6cfe4af0c9..1a447353ae9 100644 --- a/sql/2025/privacy/most_common_strings_for_iab_usp.sql +++ b/sql/2025/privacy/most_common_strings_for_iab_usp.sql @@ -5,7 +5,7 @@ WITH usp_data AS ( SELECT client, page, - JSON_VALUE(custom_metrics, '$.privacy.iab_usp.privacy_string.uspString') AS uspString, + SAFE.STRING(custom_metrics.privacy.iab_usp.privacy_string.uspString) AS uspString, COUNT(DISTINCT page) OVER (PARTITION BY client) AS pages_total FROM `httparchive.crawl.pages` WHERE diff --git a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql index c8fd1e038a0..b52d5491b09 100644 --- a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql +++ b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql @@ -1,24 +1,13 @@ #standardSQL # Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) --- Extracting third-parties observed using ARA API on a publisher -CREATE TEMP FUNCTION jsonObjectKeys(input STRING) -RETURNS ARRAY -LANGUAGE js AS """ - if (!input) { - return []; - } - return Object.keys(JSON.parse(input)); -"""; - -- Extracting ARA API source registration details being passed by a given third-party (passed AS "key") -CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING) +CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) RETURNS ARRAY LANGUAGE js AS """ - if (!input) { + if (!jsonObject) { return []; } - const jsonObject = JSON.parse(input); const values = jsonObject[key] || []; const result = []; @@ -59,8 +48,8 @@ WITH ara_features AS ( END AS destination, third_party_domain FROM `httparchive.crawl.pages`, - UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain, - UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara + UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain, + UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara WHERE date = '2025-07-01' AND is_root_page = TRUE AND diff --git a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql index d439a0a9c1f..7bba913cc74 100644 --- a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql +++ b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql @@ -13,8 +13,8 @@ WITH wellknown AS ( WHEN rank <= 10000000 THEN '10000000' ELSE 'Other' END AS rank_group, - CAST(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/related-website-set.json".found') AS BOOL) AS rws, - CAST(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/privacy-sandbox-attestations.json".found') AS BOOL) AS attestation + SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/related-website-set.json`.found) AS rws, + SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/privacy-sandbox-attestations.json`.found) AS attestation FROM `httparchive.crawl.pages` WHERE diff --git a/sql/2025/privacy/number_of_websites_with_client_hints.sql b/sql/2025/privacy/number_of_websites_with_client_hints.sql index 7953b46f929..a4fef23d131 100644 --- a/sql/2025/privacy/number_of_websites_with_client_hints.sql +++ b/sql/2025/privacy/number_of_websites_with_client_hints.sql @@ -5,7 +5,7 @@ WITH response_headers AS ( LOWER(response_header.name) AS header_name, LOWER(response_header.value) AS header_value, COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites - FROM `httparchive.all.requests`, + FROM `httparchive.crawl.requests`, UNNEST(response_headers) response_header WHERE date = '2025-07-01' AND @@ -16,18 +16,18 @@ meta_tags AS ( SELECT client, page, - LOWER(JSON_VALUE(meta_node, '$.http-equiv')) AS tag_name, - LOWER(JSON_VALUE(meta_node, '$.content')) AS tag_value + LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name, + LOWER(SAFE.STRING(meta_node.content)) AS tag_value FROM ( SELECT client, page, - JSON_VALUE(custom_metrics, '$.almanac') AS metrics + custom_metrics.other.almanac AS metrics FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' ), - UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node - WHERE JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL + UNNEST(JSON_QUERY_ARRAY(metrics.`meta-nodes`.nodes)) AS meta_node + WHERE SAFE.STRING(meta_node.`http-equiv`) IS NOT NULL ) SELECT diff --git a/sql/2025/privacy/number_of_websites_with_dnt.sql b/sql/2025/privacy/number_of_websites_with_dnt.sql index 8cace44174e..66d50089d83 100644 --- a/sql/2025/privacy/number_of_websites_with_dnt.sql +++ b/sql/2025/privacy/number_of_websites_with_dnt.sql @@ -7,15 +7,15 @@ WITH blink AS ( pct_urls FROM `httparchive.blink_features.usage` WHERE - yyyymmdd = '20250601' AND + date = '2025-07-01' AND feature IN ('NavigatorDoNotTrack') ), pages AS ( SELECT client, - COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_doNotTrack') = 'true', page, NULL)) AS num_urls, - COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_doNotTrack') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_urls + COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_doNotTrack), page, NULL)) AS num_urls, + COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_doNotTrack), page, NULL)) / COUNT(DISTINCT page) AS pct_urls FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' AND diff --git a/sql/2025/privacy/number_of_websites_with_gpc.sql b/sql/2025/privacy/number_of_websites_with_gpc.sql index 2b03afb7ebc..bf903a0ea42 100644 --- a/sql/2025/privacy/number_of_websites_with_gpc.sql +++ b/sql/2025/privacy/number_of_websites_with_gpc.sql @@ -3,10 +3,10 @@ WITH pages AS ( SELECT client, - COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_well_known, - COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.well-known."/.well-known/gpc.json".found') = 'true', page, NULL)) AS number_of_pages_well_known, - COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_js_api, - COUNT(DISTINCT IF(JSON_VALUE(custom_metrics, '$.privacy.navigator_globalPrivacyControl') = 'true', page, NULL)) AS number_of_pages_js_api + COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/gpc.json`.found), page, NULL)) / COUNT(DISTINCT page) AS pct_pages_well_known, + COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/gpc.json`.found), page, NULL)) AS number_of_pages_well_known, + COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_globalPrivacyControl), page, NULL)) / COUNT(DISTINCT page) AS pct_pages_js_api, + COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_globalPrivacyControl), page, NULL)) AS number_of_pages_js_api FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' AND @@ -19,7 +19,7 @@ headers AS ( client, COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_headers, COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) AS number_of_pages_headers - FROM `httparchive.all.requests`, + FROM `httparchive.crawl.requests`, UNNEST(response_headers) headers WHERE date = '2025-07-01' AND diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql index a2090b5df38..4865ef9cf4a 100644 --- a/sql/2025/privacy/number_of_websites_with_iab.sql +++ b/sql/2025/privacy/number_of_websites_with_iab.sql @@ -4,7 +4,7 @@ WITH privacy_custom_metrics_data AS ( SELECT client, - JSON_QUERY(custom_metrics, '$.privacy') AS metrics + custom_metrics.privacy AS metrics FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' AND @@ -47,13 +47,13 @@ FROM ( FROM ( SELECT client, - JSON_VALUE(metrics, '$.iab_tcf_v1.present') = 'true' AS tcfv1, - JSON_VALUE(metrics, '$.iab_tcf_v2.present') = 'true' AS tcfv2, - JSON_VALUE(metrics, '$.iab_gpp.present') = 'true' AS gpp, - JSON_VALUE(metrics, '$.iab_usp.present') = 'true' AS usp, - JSON_VALUE(metrics, '$.iab_tcf_v1.compliant_setup') = 'true' AS tcfv1_compliant, - JSON_VALUE(metrics, '$.iab_tcf_v2.compliant_setup') = 'true' AS tcfv2_compliant, - JSON_VALUE(metrics, '$.iab_gpp.data') IS NOT NULL AS gpp_data + SAFE.BOOL(metrics.iab_tcf_v1.present) AS tcfv1, + SAFE.BOOL(metrics.iab_tcf_v2.present) AS tcfv2, + SAFE.BOOL(metrics.iab_gpp.present) AS gpp, + SAFE.BOOL(metrics.iab_usp.present) AS usp, + SAFE.BOOL(metrics.iab_tcf_v1.compliant_setup) AS tcfv1_compliant, + SAFE.BOOL(metrics.iab_tcf_v2.compliant_setup) AS tcfv2_compliant, + metrics.iab_gpp.data IS NOT NULL AS gpp_data FROM privacy_custom_metrics_data ) diff --git a/sql/2025/privacy/number_of_websites_with_nb_trackers.sql b/sql/2025/privacy/number_of_websites_with_nb_trackers.sql index e8570c65e93..b54ad6d93cf 100644 --- a/sql/2025/privacy/number_of_websites_with_nb_trackers.sql +++ b/sql/2025/privacy/number_of_websites_with_nb_trackers.sql @@ -1,10 +1,11 @@ -# Number of websites that deploy a certain number of trackers +-- Number of websites that deploy a certain number of trackers + WITH whotracksme AS ( SELECT domain, category, tracker - FROM almanac.whotracksme + FROM `httparchive.almanac.whotracksme` WHERE date = '2025-07-01' ), @@ -12,7 +13,7 @@ totals AS ( SELECT client, COUNT(DISTINCT page) AS total_websites - FROM httparchive.crawl.requests + FROM `httparchive.crawl.requests` WHERE date = '2025-07-01' GROUP BY client ) @@ -29,7 +30,7 @@ FROM ( client, page, COUNT(DISTINCT tracker) AS number_of_trackers - FROM httparchive.crawl.requests + FROM `httparchive.crawl.requests` JOIN whotracksme ON ( NET.HOST(url) = domain OR @@ -62,12 +63,12 @@ FROM ( client, page, COUNT(DISTINCT tracker) AS number_of_trackers - FROM httparchive.almanac.requests + FROM `httparchive.crawl.requests` JOIN whotracksme ON ( - NET.HOST(urlShort) = domain OR - ENDS_WITH(NET.HOST(urlShort), CONCAT('.', domain)) + NET.HOST(url) = domain OR + ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) ) WHERE date = '2025-07-01' AND diff --git a/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql index 0b91d2169ab..92fb30a988d 100644 --- a/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql +++ b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql @@ -2,11 +2,10 @@ WITH referrer_policy_custom_metrics AS ( SELECT client, page, - JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.entire_document_policy') AS meta_policy, - ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics, '$.privacy.referrerPolicy.individual_requests')) > 0 AS individual_requests, - CAST(JSON_VALUE(custom_metrics, '$.privacy.referrerPolicy.link_relations.A') AS INT64) > 0 AS link_relations - FROM - `httparchive.crawl.pages` + SAFE.STRING(custom_metrics.privacy.referrerPolicy.entire_document_policy) AS meta_policy, + ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics.privacy.referrerPolicy.individual_requests)) > 0 AS individual_requests, + SAFE.INT64(custom_metrics.privacy.referrerPolicy.link_relations.A) > 0 AS link_relations + FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' AND is_root_page = TRUE @@ -17,8 +16,7 @@ referrer_policy_headers AS ( client, page, LOWER(response_header.value) AS header_policy - FROM - `httparchive.all.requests`, + FROM `httparchive.crawl.requests`, UNNEST(response_headers) AS response_header WHERE date = '2025-07-01' AND diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql index 591c302fa86..89e9eba8ccd 100644 --- a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql +++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql @@ -26,8 +26,8 @@ WITH pages AS ( SELECT client, page, - JSON_QUERY(custom_metrics, '$.origin-trials') AS ot_metrics, - JSON_QUERY(custom_metrics, '$.almanac') AS almanac_metrics + custom_metrics.other.`origin-trials` AS ot_metrics, + custom_metrics.other.almanac AS almanac_metrics FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' AND @@ -39,7 +39,7 @@ response_headers AS ( client, page, PARSE_ORIGIN_TRIAL(response_header.value) AS ot -- may not lowercase this value as it is a base64 string - FROM `httparchive.all.requests`, + FROM `httparchive.crawl.requests`, UNNEST(response_headers) response_header WHERE date = '2025-07-01' AND @@ -52,18 +52,18 @@ meta_tags AS ( SELECT client, page, - PARSE_ORIGIN_TRIAL(JSON_VALUE(meta_node, '$.content')) AS ot -- may not lowercase this value as it is a base64 string + PARSE_ORIGIN_TRIAL(SAFE.STRING(meta_node.content)) AS ot -- may not lowercase this value as it is a base64 string FROM pages, - UNNEST(JSON_QUERY_ARRAY(almanac_metrics, '$.meta-nodes.nodes')) meta_node + UNNEST(JSON_QUERY_ARRAY(almanac_metrics.`meta-nodes`.nodes)) meta_node WHERE - LOWER(JSON_VALUE(meta_node, '$.http-equiv')) = 'origin-trial' + LOWER(SAFE.STRING(meta_node.`http-equiv`)) = 'origin-trial' ), ot_from_custom_metric AS ( SELECT client, page, - PARSE_ORIGIN_TRIAL(JSON_VALUE(metric, '$.token')) AS ot + PARSE_ORIGIN_TRIAL(SAFE.STRING(metric.token)) AS ot FROM pages, UNNEST(JSON_QUERY_ARRAY(ot_metrics)) metric ) diff --git a/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql b/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql index 209061c0e72..9691070ebf9 100644 --- a/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql +++ b/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql @@ -3,7 +3,7 @@ WITH whotracksme AS ( domain, category, tracker - FROM `max-ostapenko.Public.whotracksme` + FROM `httparchive.almanac.whotracksme` WHERE date = '2025-07-01' ), @@ -14,7 +14,7 @@ pre_aggregated AS ( page, tracker, COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages - FROM `httparchive.all.requests` + FROM `httparchive.crawl.requests` JOIN whotracksme ON NET.REG_DOMAIN(url) = domain WHERE diff --git a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql index 9d46cd2c71e..a971f2e560d 100644 --- a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql +++ b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql @@ -1,24 +1,13 @@ #standardSQL # Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers --- Extracting third-parties observed using PS APIs on a publisher -CREATE TEMP FUNCTION jsonObjectKeys(input STRING) -RETURNS ARRAY -LANGUAGE js AS """ - if (!input) { - return []; - } - return Object.keys(JSON.parse(input)); -"""; - -- Extracting PS APIs being called by a given third-party (passed as "key") -CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING) +CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) RETURNS ARRAY LANGUAGE js AS """ - if (!input) { + if (!jsonObject) { return []; } - const jsonObject = JSON.parse(input); const values = jsonObject[key] || []; function splitByDelimiters(value) { @@ -80,8 +69,8 @@ WITH privacy_sandbox_features AS ( api END AS feature FROM `httparchive.crawl.pages`, - UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain, - UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS api + UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain, + UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS api WHERE date = '2025-07-01' AND is_root_page = TRUE diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql index 6e192dda53a..fe47074c3c2 100644 --- a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql +++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql @@ -1,24 +1,13 @@ #standardSQL # Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level) --- Extracting third-parties observed using ARA API on a publisher -CREATE TEMP FUNCTION jsonObjectKeys(input STRING) -RETURNS ARRAY -LANGUAGE js AS """ - if (!input) { - return []; - } - return Object.keys(JSON.parse(input)); -"""; - -- Extracting ARA API source registration details being passed by a given third-party (passed as "key") -CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING) +CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) RETURNS ARRAY LANGUAGE js AS """ - if (!input) { + if (!jsonObject) { return []; } - const jsonObject = JSON.parse(input); const values = jsonObject[key] || []; const result = []; @@ -53,8 +42,8 @@ WITH ara_features AS ( COUNT(third_party_domain) AS total_third_party_domains, COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains FROM `httparchive.crawl.pages`, - UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain, - UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara + UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain, + UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara WHERE date = '2025-07-01' AND is_root_page = TRUE AND diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql index 67c9142326f..89da104a5c9 100644 --- a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql +++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql @@ -1,24 +1,13 @@ #standardSQL # Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level) --- Extracting third-parties observed using ARA API on a publisher -CREATE TEMP FUNCTION jsonObjectKeys(input STRING) -RETURNS ARRAY -LANGUAGE js AS """ - if (!input) { - return []; - } - return Object.keys(JSON.parse(input)); -"""; - -- Extracting ARA API source registration details being passed by a given third-party (passed as "key") -CREATE TEMP FUNCTION jsonObjectValues(input STRING, key STRING) +CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) RETURNS ARRAY LANGUAGE js AS """ - if (!input) { + if (!jsonObject) { return []; } - const jsonObject = JSON.parse(input); const values = jsonObject[key] || []; const result = []; @@ -53,8 +42,8 @@ WITH ara_features AS ( COUNT(third_party_domain) AS total_third_party_domains, COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains FROM `httparchive.crawl.pages`, - UNNEST(jsonObjectKeys(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'))) AS third_party_domain, - UNNEST(jsonObjectValues(JSON_QUERY(custom_metrics, '$.privacy-sandbox.privacySandBoxAPIUsage'), third_party_domain)) AS ara + UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain, + UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara WHERE date = '2025-07-01' AND is_root_page = TRUE AND From 451cca9a0a50987bf00b99fc59ef764f7de9d03e Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sat, 2 Aug 2025 02:09:37 +0200 Subject: [PATCH 03/27] sheet exporter update --- sql/util/bq_to_sheets.ipynb | 300 ++++++++++++++++++++++++++---------- 1 file changed, 218 insertions(+), 82 deletions(-) diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index b95cd9eab1a..2581abee78c 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -9,19 +9,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": { "cellView": "form", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "OVkCxlRQH6Yt", - "outputId": "0e907d5e-3824-4b0c-935d-81e629702390" + "id": "U37785Bxt5tE" }, "outputs": [], "source": [ - "# @title Download repo\n", - "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git" + "# @title Configure the chapter to process\n", + "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n", + "almanac_year = 2025 #@param {type: \"integer\"}\n", + "chapter_name = 'privacy' #@param {type: \"string\"}\n", + "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Vdfg06z4I44VZBgzY0BeNCmSHjWcWeYIObJU4K0yZb4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}" ] }, { @@ -29,20 +28,21 @@ "execution_count": null, "metadata": { "cellView": "form", - "id": "U37785Bxt5tE" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OVkCxlRQH6Yt", + "outputId": "0e907d5e-3824-4b0c-935d-81e629702390" }, "outputs": [], "source": [ - "# @title Configure the chapter to process\n", - "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n", - "almanac_year = 2024 #@param {type: \"integer\"}\n", - "chapter_name = 'privacy' #@param {type: \"string\"}\n", - "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}" + "# @title Download repo (skip when running locally)\n", + "# !git clone https://github.com/HTTPArchive/almanac.httparchive.org.git" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": { "cellView": "form", "colab": { @@ -51,42 +51,95 @@ "id": "UzhgG5xvbQ1E", "outputId": "9cf3ef02-ec76-43ac-cd63-03edf7f2f619" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Branch: privacy-sql-2025\n" + ] + } + ], "source": [ - "# @title Update chapter branch\n", + "# @title Update chapter branch (skip when running locally)\n", "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n", - "!cd almanac.httparchive.org/ && git checkout $branch_name && git pull" + "print(f\"Branch: {branch_name}\")\n", + "# !cd almanac.httparchive.org/ && git checkout $branch_name && git pull" ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Run to authenticate if in Colab (skip when running locally)\n", + "# from google.colab import auth\n", + "# auth.authenticate_user()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "# Run to authenticate if not in Colab\n", + "# Prepare the environments as described in src/README.md\n", + "!pip install gspread gspread_dataframe tabulate -q" + ] + }, + { + "cell_type": "code", + "execution_count": 40, "metadata": { "cellView": "form", "id": "45dBifFPJAtO" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Spreadsheet authentication failed: \n", + "Note: Make sure you have access to the spreadsheet and proper Google credentials\n" + ] + } + ], "source": [ "# @title Authenticate\n", "import google.auth\n", "import os\n", - "from google.colab import auth\n", "from google.cloud import bigquery\n", "\n", "import gspread\n", "from gspread_dataframe import set_with_dataframe\n", "\n", - "\n", "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n", - "auth.authenticate_user()\n", + "\n", "credentials, project = google.auth.default()\n", "client = bigquery.Client()\n", "gc = gspread.authorize(credentials)\n", "\n", "try:\n", " ss = gc.open_by_url(spreadsheet_url)\n", - "except:\n", - " print('Spreadsheet not found')" + " existing_sheets = [s.title for s in ss.worksheets()]\n", + " print(f\"Successfully connected to spreadsheet with {len(existing_sheets)} existing sheets\")\n", + "except Exception as e:\n", + " print(f'Spreadsheet authentication failed: {e}')\n", + " print(\"Note: Make sure you have access to the spreadsheet and proper Google credentials\")\n", + " ss = None\n", + " existing_sheets = []" ] }, { @@ -101,7 +154,71 @@ "id": "nblNil985Tjt", "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| Query name | TB processed/billed | Sheet name | Upload skipped reason |\n", + "+===========================================================================+=======================+=======================================================================+=========================+\n", + "| cookies_top_first_party_names.sql | 0.081 | Cookies Top First Party Names | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| cookies_top_third_party_domains.sql | 0.083 | Cookies Top Third Party Domains | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| cookies_top_third_party_names.sql | 0.081 | Cookies Top Third Party Names | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_bounce_domains.sql | 6.166 | Most Common Bounce Domains | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_client_hints.sql | 5.217 | Most Common Client Hints | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | 0.021 | Most Common Cmps For Iab Tcf V2 | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_cname_domains.sql | 0.021 | Most Common Cname Domains | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_referrer_policy.sql | 3.66 | Most Common Referrer Policy | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_strings_for_iab_usp.sql | 0.021 | Most Common Strings For Iab Usp | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_tracker_categories.sql | 0.973 | Most Common Tracker Categories | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | 1.573 | Number Of Ara Destinations Registered By Third Parties And Publishers | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | 1.571 | Number Of Privacy Sandbox Attested Domains | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_per_technology.sql | 0.025 | Number Of Websites Per Technology | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_per_technology_category.sql | 0.016 | Number Of Websites Per Technology Category | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | 0.025 | Number Of Websites Using Each Fingerprinting | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_client_hints.sql | 2.895 | Number Of Websites With Client Hints | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_dnt.sql | 0.021 | Number Of Websites With Dnt | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_gpc.sql | 5.235 | Number Of Websites With Gpc | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_iab.sql | 0.019 | Number Of Websites With Iab | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | 0.973 | Number Of Websites With Nb Trackers | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | 3.664 | Number Of Websites With Referrerpolicy | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | 5.217 | Number Of Websites With Related Origin Trials | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | 0.978 | Number Of Websites With Whotracksme Trackers | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | 1.573 | Privacy Sandbox Adoption By Third Parties By Publishers | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | 1.573 | Top Ara Destinations Registered By Most Publishers | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| top_ara_destinations_registered_by_most_third_parties.sql | 1.573 | Top Ara Destinations Registered By Most Third Parties | Dry run |\n", + "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n" + ] + } + ], "source": [ "# @title Upload query results\n", "\n", @@ -109,92 +226,102 @@ "import re\n", "from tabulate import tabulate\n", "from IPython.display import clear_output\n", + "import os\n", "\n", - "\n", - "filename_match = '(number_of_websites_with_related_origin_trials|most_common_cname_domains)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", - "filename_match_exclude = '(ads_and_sellers_graph)\\.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match = '\\\\.sql$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match_exclude = '^$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = True # @param {type: \"boolean\"}\n", - "overwrite_sheets = True # @param {type: \"boolean\"}\n", - "maximum_tb_billed = None # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n", + "overwrite_sheets = False # @param {type: \"boolean\"}\n", + "maximum_tb_billed = 0.5 # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n", "\n", "filename_include_regexp = r'{}'.format(filename_match)\n", "filename_exclude_regexp = r'{}'.format(filename_match_exclude)\n", - "folder = r'almanac.httparchive.org/sql/{year}/{chapter}/*.sql'.format(\n", - " year=almanac_year,\n", - " chapter=chapter_name.lower()\n", - ")\n", - "existing_sheets = [s.title for s in ss.worksheets()]\n", + "\n", + "folder = os.path.join(os.getcwd(), '../', str(almanac_year), chapter_name.lower(), '*.sql')\n", + "\n", + "print(f\"Looking for SQL files in: {folder}\")\n", "\n", "# Print formatted logs\n", "queries_processed_log = []\n", "def print_logs_table(log=None, append=True):\n", " if log:\n", " queries_processed_log.append(log)\n", - " table = tabulate(queries_processed_log, headers=['Query name', 'TB processed/billed', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n", + " table = tabulate(queries_processed_log, headers=['Query name', 'TB processed - estimate', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n", " if not append:\n", " del queries_processed_log[-1]\n", " clear_output(wait=True)\n", " print(table)\n", "\n", "# Find matching SQL queries and save results to Google Sheets.\n", - "for filepath in sorted(glob.iglob(folder)):\n", - " filename = filepath.split('/')[-1]\n", + "sql_files = list(glob.iglob(folder))\n", + "print(f\"Found {len(sql_files)} SQL files\")\n", + "\n", + "if not sql_files:\n", + " print(\"No SQL files found. Check the folder path.\")\n", + "else:\n", + " for filepath in sorted(sql_files):\n", + " filename = os.path.basename(filepath)\n", "\n", - " print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n", + " print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n", "\n", - " if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n", + " if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n", "\n", - " with open(filepath) as f:\n", - " query = f.read()\n", + " with open(filepath) as f:\n", + " query = f.read()\n", + "\n", + " try:\n", + " response = client.query(\n", + " query,\n", + " job_config = bigquery.QueryJobConfig(dry_run = True)\n", + " )\n", + " except Exception as e:\n", + " print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n", + " continue\n", "\n", - " try:\n", - " response = client.query(\n", - " query,\n", - " job_config = bigquery.QueryJobConfig(dry_run = True)\n", - " )\n", - " except Exception as e:\n", - " print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n", - " continue\n", + " tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n", + " sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n", "\n", - " tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n", - " sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n", + " if sheet_title in existing_sheets:\n", + " if overwrite_sheets:\n", + " st = ss.worksheet(sheet_title)\n", + " else:\n", + " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n", + " continue\n", "\n", - " if sheet_title in existing_sheets:\n", - " if overwrite_sheets:\n", - " st = ss.worksheet(sheet_title)\n", - " else:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n", + " if dry_run:\n", + " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n", " continue\n", "\n", - " if dry_run:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n", - " continue\n", + " # Skip actual execution if no spreadsheet connection\n", + " if ss is None:\n", + " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'No spreadsheet connection'])\n", + " continue\n", "\n", - " try:\n", - " if maximum_tb_billed:\n", - " response = client.query(\n", - " query,\n", - " job_config = bigquery.QueryJobConfig(\n", - " maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n", + " try:\n", + " if maximum_tb_billed:\n", + " response = client.query(\n", + " query,\n", + " job_config = bigquery.QueryJobConfig(\n", + " maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n", + " )\n", " )\n", - " )\n", - " else:\n", - " response = client.query(query)\n", + " else:\n", + " response = client.query(query)\n", "\n", - " df = response.to_dataframe()\n", - " if ('st' not in locals() or st.title != sheet_title):\n", - " st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n", - " set_with_dataframe(st, df, resize=False)\n", + " df = response.to_dataframe()\n", + " if ('st' not in locals() or st.title != sheet_title):\n", + " st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n", + " set_with_dataframe(st, df, resize=False)\n", "\n", - " tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n", - " print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n", + " tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n", + " print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n", "\n", - " except Exception as e:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n", - " continue\n", + " except Exception as e:\n", + " print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n", + " continue\n", "\n", - " else:\n", - " print_logs_table([filename, None, None, 'Filename mismatch'])" + " else:\n", + " print_logs_table([filename, None, None, 'Filename mismatch'])" ] } ], @@ -203,12 +330,21 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": ".venv (3.12.7)", + "language": "python", "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.12.4" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" } }, "nbformat": 4, From e394bb635b2ebb97db409d36e137b93385b89dd9 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sat, 2 Aug 2025 02:18:35 +0200 Subject: [PATCH 04/27] ID update --- sql/util/bq_to_sheets.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index 2581abee78c..6ba694515c4 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": { "cellView": "form", "id": "U37785Bxt5tE" @@ -20,7 +20,7 @@ "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n", "almanac_year = 2025 #@param {type: \"integer\"}\n", "chapter_name = 'privacy' #@param {type: \"string\"}\n", - "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Vdfg06z4I44VZBgzY0BeNCmSHjWcWeYIObJU4K0yZb4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}" + "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}" ] }, { @@ -144,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": { "cellView": "form", "colab": { From 2a7db9b4a2d2693faaaf64c6cbab1d0de9aac334 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sat, 2 Aug 2025 02:18:47 +0200 Subject: [PATCH 05/27] formatting --- sql/2025/privacy/most_common_bounce_domains.sql | 1 + sql/2025/privacy/most_common_client_hints.sql | 3 ++- sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql | 6 +++--- sql/2025/privacy/most_common_cname_domains.sql | 7 ++++--- .../privacy/most_common_countries_for_iab_tcf_v2.sql | 10 +++++----- sql/2025/privacy/most_common_referrer_policy.sql | 2 +- sql/2025/privacy/most_common_strings_for_iab_usp.sql | 4 ++-- sql/2025/privacy/most_common_tracker_categories.sql | 3 ++- ...ions_registered_by_third_parties_and_publishers.sql | 3 +-- .../number_of_privacy_sandbox_attested_domains.sql | 3 +-- .../number_of_websites_using_each_fingerprinting.sql | 3 ++- sql/2025/privacy/number_of_websites_with_dnt.sql | 2 +- sql/2025/privacy/number_of_websites_with_gpc.sql | 2 +- sql/2025/privacy/number_of_websites_with_iab.sql | 4 ++-- .../number_of_websites_with_related_origin_trials.sql | 3 ++- ...sandbox-adoption-by-third-parties-by-publishers.sql | 3 +-- ..._ara_destinations_registered_by_most_publishers.sql | 3 +-- ...a_destinations_registered_by_most_third_parties.sql | 3 +-- 18 files changed, 33 insertions(+), 32 deletions(-) diff --git a/sql/2025/privacy/most_common_bounce_domains.sql b/sql/2025/privacy/most_common_bounce_domains.sql index 91f007d26f2..b13f7552f8b 100644 --- a/sql/2025/privacy/most_common_bounce_domains.sql +++ b/sql/2025/privacy/most_common_bounce_domains.sql @@ -1,6 +1,7 @@ -- Detection logic explained: -- https://github.com/privacycg/proposals/issues/6 -- https://github.com/privacycg/nav-tracking-mitigations/blob/main/bounce-tracking-explainer.md + WITH redirect_requests AS ( SELECT client, diff --git a/sql/2025/privacy/most_common_client_hints.sql b/sql/2025/privacy/most_common_client_hints.sql index 8358eb5c884..dcf0d4c16da 100644 --- a/sql/2025/privacy/most_common_client_hints.sql +++ b/sql/2025/privacy/most_common_client_hints.sql @@ -1,4 +1,5 @@ -# Pages that use Client Hints +-- Pages that use Client Hints + WITH response_headers AS ( SELECT client, diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql index e3952f1925c..09dce3f75e8 100644 --- a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql +++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql @@ -1,6 +1,6 @@ -# Counts of CMPs using IAB Transparency & Consent Framework -# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata -# CMP vendor list: https://iabeurope.eu/cmp-list/ +-- Counts of CMPs using IAB Transparency & Consent Framework +-- cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md--tcdata +-- CMP vendor list: https://iabeurope.eu/cmp-list/ WITH cmps AS ( SELECT diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql index 625a1895933..6f0cb9ead0b 100644 --- a/sql/2025/privacy/most_common_cname_domains.sql +++ b/sql/2025/privacy/most_common_cname_domains.sql @@ -1,4 +1,5 @@ -# Most common CNAME domains +-- Most common CNAME domains + CREATE TEMP FUNCTION CONVERT_CNAME_JSON(obj JSON) RETURNS ARRAY> LANGUAGE js AS """ @@ -16,8 +17,8 @@ try { } """; -# Adguard CNAME Trackers source: -# https://github.com/AdguardTeam/cname-trackers/blob/master/script/src/cloaked-trackers.json +-- Adguard CNAME Trackers source: +-- https://github.com/AdguardTeam/cname-trackers/blob/master/script/src/cloaked-trackers.json WITH adguard_trackers AS ( SELECT domain diff --git a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql index 891f58fdb62..16dfe503255 100644 --- a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql +++ b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql @@ -1,8 +1,8 @@ -# Counts of countries for publishers using IAB Transparency & Consent Framework -# cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md#tcdata -# "Country code of the country that determines the legislation of -# reference. Normally corresponds to the country code of the country -# in which the publisher's business entity is established." +-- Counts of countries for publishers using IAB Transparency & Consent Framework +-- cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md--tcdata +-- "Country code of the country that determines the legislation of +-- reference. Normally corresponds to the country code of the country +-- in which the publisher's business entity is established." WITH totals AS ( SELECT diff --git a/sql/2025/privacy/most_common_referrer_policy.sql b/sql/2025/privacy/most_common_referrer_policy.sql index 2688340af9e..eed79b736e3 100644 --- a/sql/2025/privacy/most_common_referrer_policy.sql +++ b/sql/2025/privacy/most_common_referrer_policy.sql @@ -1,4 +1,4 @@ -# Most common values for Referrer-Policy (at site level) +-- Most common values for Referrer-Policy (at site level) WITH totals AS ( SELECT diff --git a/sql/2025/privacy/most_common_strings_for_iab_usp.sql b/sql/2025/privacy/most_common_strings_for_iab_usp.sql index 1a447353ae9..837b7ff2375 100644 --- a/sql/2025/privacy/most_common_strings_for_iab_usp.sql +++ b/sql/2025/privacy/most_common_strings_for_iab_usp.sql @@ -1,5 +1,5 @@ -# Counts of US Privacy String values for websites using IAB US Privacy Framework -# cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md +-- Counts of US Privacy String values for websites using IAB US Privacy Framework +-- cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md WITH usp_data AS ( SELECT diff --git a/sql/2025/privacy/most_common_tracker_categories.sql b/sql/2025/privacy/most_common_tracker_categories.sql index c93aeadcac8..31fe6d707d5 100644 --- a/sql/2025/privacy/most_common_tracker_categories.sql +++ b/sql/2025/privacy/most_common_tracker_categories.sql @@ -1,4 +1,5 @@ -# Percent of pages that deploy at least one tracker from each tracker category +-- Percent of pages that deploy at least one tracker from each tracker category + WITH whotracksme AS ( SELECT domain, diff --git a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql index b52d5491b09..4b692ee59bd 100644 --- a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql +++ b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql @@ -1,5 +1,4 @@ -#standardSQL -# Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) +-- Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) -- Extracting ARA API source registration details being passed by a given third-party (passed AS "key") CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) diff --git a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql index 7bba913cc74..a2f98701bd4 100644 --- a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql +++ b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql @@ -1,5 +1,4 @@ -#standardSQL -# Privacy Sandbox Attestation and Related Websites JSON status (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) +-- Privacy Sandbox Attestation and Related Websites JSON status (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) WITH wellknown AS ( SELECT diff --git a/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql index 67068c3fca4..da695ed3be7 100644 --- a/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql +++ b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql @@ -1,4 +1,5 @@ -# Percent of websites using a fingerprinting library based on wappalyzer category +-- Percent of websites using a fingerprinting library based on wappalyzer category + WITH totals AS ( SELECT client, diff --git a/sql/2025/privacy/number_of_websites_with_dnt.sql b/sql/2025/privacy/number_of_websites_with_dnt.sql index 66d50089d83..9e79e93848c 100644 --- a/sql/2025/privacy/number_of_websites_with_dnt.sql +++ b/sql/2025/privacy/number_of_websites_with_dnt.sql @@ -1,4 +1,4 @@ -# Pages that request DNT status +-- Pages that request DNT status WITH blink AS ( SELECT DISTINCT diff --git a/sql/2025/privacy/number_of_websites_with_gpc.sql b/sql/2025/privacy/number_of_websites_with_gpc.sql index bf903a0ea42..667b7bf9f6d 100644 --- a/sql/2025/privacy/number_of_websites_with_gpc.sql +++ b/sql/2025/privacy/number_of_websites_with_gpc.sql @@ -1,4 +1,4 @@ -# Pages that provide `/.well-known/gpc.json` for Global Privacy Control +-- Pages that provide `/.well-known/gpc.json` for Global Privacy Control WITH pages AS ( SELECT diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql index 4865ef9cf4a..667051ca60b 100644 --- a/sql/2025/privacy/number_of_websites_with_iab.sql +++ b/sql/2025/privacy/number_of_websites_with_iab.sql @@ -1,5 +1,5 @@ -# Counts of pages with IAB Frameworks -# TODO: check presence of multiple frameworks per page +-- Counts of pages with IAB Frameworks +-- TODO: check presence of multiple frameworks per page WITH privacy_custom_metrics_data AS ( SELECT diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql index 89e9eba8ccd..679a8576a2b 100644 --- a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql +++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql @@ -1,4 +1,5 @@ -# Pages that participate in the privacy-relayed origin trials +-- Pages that participate in the privacy-relayed origin trials + CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS ( SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) ); diff --git a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql index a971f2e560d..df1c63cb830 100644 --- a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql +++ b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql @@ -1,5 +1,4 @@ -#standardSQL -# Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers +-- Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers -- Extracting PS APIs being called by a given third-party (passed as "key") CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql index fe47074c3c2..2ce936f278c 100644 --- a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql +++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql @@ -1,5 +1,4 @@ -#standardSQL -# Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level) +-- Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level) -- Extracting ARA API source registration details being passed by a given third-party (passed as "key") CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql index 89da104a5c9..5150224a2b5 100644 --- a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql +++ b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql @@ -1,5 +1,4 @@ -#standardSQL -# Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level) +-- Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level) -- Extracting ARA API source registration details being passed by a given third-party (passed as "key") CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) From cd572c8337e2d9367495058113e02e4df0638d02 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 3 Aug 2025 00:34:47 +0200 Subject: [PATCH 06/27] lint --- sql/2025/privacy/most_common_cname_domains.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql index 6f0cb9ead0b..2941e119e34 100644 --- a/sql/2025/privacy/most_common_cname_domains.sql +++ b/sql/2025/privacy/most_common_cname_domains.sql @@ -66,7 +66,7 @@ cname_stats AS ( adguard_trackers.domain IS NOT NULL AS adguard_known_cname, whotracksme.category AS whotracksme_category, COUNT(DISTINCT page) AS number_of_pages, - ANY_VALUE(page_examples) + ANY_VALUE(page_examples) AS page_examples FROM cnames LEFT JOIN adguard_trackers ON ENDS_WITH(cnames.cname, adguard_trackers.domain) From bb5959a7c2ac1cefe8c529c97848b5d9b0290058 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sat, 13 Sep 2025 00:10:09 +0200 Subject: [PATCH 07/27] Refactor origin trial functions for improved readability and structure --- ...of_websites_with_related_origin_trials.sql | 31 ++++++++++--------- ...of_websites_with_related_origin_trials.sql | 31 ++++++++++--------- sql/util/functions.sql | 24 -------------- 3 files changed, 34 insertions(+), 52 deletions(-) diff --git a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql index e55b352eadf..b8f84911ad2 100644 --- a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql +++ b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql @@ -1,24 +1,27 @@ # Pages that participate in the privacy-relayed origin trials -CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS ( - SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) -); - -CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) -RETURNS STRUCT< +CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT +< token STRING, origin STRING, feature STRING, expiry TIMESTAMP, is_subdomain BOOL, is_third_party BOOL -> AS ( - STRUCT( - DECODE_ORIGIN_TRIAL(token) AS token, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature, - TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party +> +DETERMINISTIC AS ( + ( + WITH decoded_token AS ( + SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded + ) + SELECT STRUCT( + decoded AS token, + JSON_VALUE(decoded, '$.origin') AS origin, + JSON_VALUE(decoded, '$.feature') AS feature, + TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry, + JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain, + JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party + ) + FROM decoded_token ) ); diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql index 679a8576a2b..bc395d721bb 100644 --- a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql +++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql @@ -1,25 +1,28 @@ -- Pages that participate in the privacy-relayed origin trials -CREATE TEMP FUNCTION `DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS ( - SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) -); - -CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) -RETURNS STRUCT< +CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT +< token STRING, origin STRING, feature STRING, expiry TIMESTAMP, is_subdomain BOOL, is_third_party BOOL -> AS ( - STRUCT( - DECODE_ORIGIN_TRIAL(token) AS token, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature, - TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party +> +DETERMINISTIC AS ( + ( + WITH decoded_token AS ( + SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded + ) + SELECT STRUCT( + decoded AS token, + JSON_VALUE(decoded, '$.origin') AS origin, + JSON_VALUE(decoded, '$.feature') AS feature, + TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry, + JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain, + JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party + ) + FROM decoded_token ) ); diff --git a/sql/util/functions.sql b/sql/util/functions.sql index becc0ee67f8..b9f861b3683 100644 --- a/sql/util/functions.sql +++ b/sql/util/functions.sql @@ -8,27 +8,3 @@ try { return null; } """; - -# Origin Trials -CREATE OR REPLACE FUNCTION `httparchive.fn.DECODE_ORIGIN_TRIAL`(token STRING) RETURNS STRING DETERMINISTIC AS ( - SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) -); - -CREATE OR REPLACE FUNCTION `httparchive.fn.PARSE_ORIGIN_TRIAL`(token STRING) -RETURNS STRUCT< - token STRING, - origin STRING, - feature STRING, - expiry TIMESTAMP, - is_subdomain BOOL, - is_third_party BOOL -> AS ( - STRUCT( - DECODE_ORIGIN_TRIAL(token) AS token, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.origin') AS origin, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.feature') AS feature, - TIMESTAMP_SECONDS(CAST(JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.expiry') AS INT64)) AS expiry, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isSubdomain') = 'true' AS is_subdomain, - JSON_VALUE(DECODE_ORIGIN_TRIAL(token), '$.isThirdParty') = 'true' AS is_third_party - ) -); From 55cec4f91f4af0ae0c1d28eb5b3789ebbc9a9ff5 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sat, 13 Sep 2025 00:43:47 +0200 Subject: [PATCH 08/27] lint --- ...of_websites_with_related_origin_trials.sql | 19 +++++++++-------- ...of_websites_with_related_origin_trials.sql | 21 ++++++++++--------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql index b8f84911ad2..b9f7e0878e2 100644 --- a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql +++ b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql @@ -1,6 +1,5 @@ # Pages that participate in the privacy-relayed origin trials -CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT -< +CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT< token STRING, origin STRING, feature STRING, @@ -13,13 +12,15 @@ DETERMINISTIC AS ( WITH decoded_token AS ( SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded ) - SELECT STRUCT( - decoded AS token, - JSON_VALUE(decoded, '$.origin') AS origin, - JSON_VALUE(decoded, '$.feature') AS feature, - TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry, - JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain, - JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party + + SELECT + STRUCT( + decoded AS token, + JSON_VALUE(decoded, '$.origin') AS origin, + JSON_VALUE(decoded, '$.feature') AS feature, + TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry, + JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain, + JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party ) FROM decoded_token ) diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql index bc395d721bb..a667110aab9 100644 --- a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql +++ b/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql @@ -1,7 +1,6 @@ -- Pages that participate in the privacy-relayed origin trials -CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT -< +CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT< token STRING, origin STRING, feature STRING, @@ -14,14 +13,16 @@ DETERMINISTIC AS ( WITH decoded_token AS ( SELECT SAFE_CONVERT_BYTES_TO_STRING(SUBSTR(SAFE.FROM_BASE64(token), 70)) AS decoded ) - SELECT STRUCT( - decoded AS token, - JSON_VALUE(decoded, '$.origin') AS origin, - JSON_VALUE(decoded, '$.feature') AS feature, - TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry, - JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain, - JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party - ) + + SELECT + STRUCT( + decoded AS token, + JSON_VALUE(decoded, '$.origin') AS origin, + JSON_VALUE(decoded, '$.feature') AS feature, + TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry, + JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain, + JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party + ) FROM decoded_token ) ); From c3a2ee75e05f6b5c49790c71bd5726beda073be7 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sat, 13 Sep 2025 01:00:39 +0200 Subject: [PATCH 09/27] lint --- .../privacy/number_of_websites_with_related_origin_trials.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql index b9f7e0878e2..7a57ed673bd 100644 --- a/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql +++ b/sql/2024/privacy/number_of_websites_with_related_origin_trials.sql @@ -21,7 +21,7 @@ DETERMINISTIC AS ( TIMESTAMP_SECONDS(CAST(JSON_VALUE(decoded, '$.expiry') AS INT64)) AS expiry, JSON_VALUE(decoded, '$.isSubdomain') = 'true' AS is_subdomain, JSON_VALUE(decoded, '$.isThirdParty') = 'true' AS is_third_party - ) + ) FROM decoded_token ) ); From bd7506d2043b22c321df19d082ea199f363359ee Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 19 Oct 2025 20:33:35 +0200 Subject: [PATCH 10/27] make bq_to_sheets.ipynb runnable and add deps to requirements --- sql/util/bq_to_sheets.ipynb | 207 ++++++++++++++++++------------------ src/requirements.txt | 4 + 2 files changed, 105 insertions(+), 106 deletions(-) diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index 6ba694515c4..c504f20d8dd 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "cellView": "form", "id": "U37785Bxt5tE" @@ -37,12 +37,13 @@ "outputs": [], "source": [ "# @title Download repo (skip when running locally)\n", - "# !git clone https://github.com/HTTPArchive/almanac.httparchive.org.git" + "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git\n", + "!cd almanac.httparchive.org/" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 2, "metadata": { "cellView": "form", "colab": { @@ -56,7 +57,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "Branch: privacy-sql-2025\n" + "Branch: privacy-sql-2025\n", + "M\tsql/util/bq_to_sheets.ipynb\n", + "M\tsrc/requirements.txt\n", + "Already on 'privacy-sql-2025'\n", + "Your branch is up to date with 'origin/privacy-sql-2025'.\n", + "Already up to date.\n" ] } ], @@ -64,7 +70,7 @@ "# @title Update chapter branch (skip when running locally)\n", "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n", "print(f\"Branch: {branch_name}\")\n", - "# !cd almanac.httparchive.org/ && git checkout $branch_name && git pull" + "!git checkout $branch_name && git pull" ] }, { @@ -74,34 +80,13 @@ "outputs": [], "source": [ "# Run to authenticate if in Colab (skip when running locally)\n", - "# from google.colab import auth\n", - "# auth.authenticate_user()" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.2\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" - ] - } - ], - "source": [ - "# Run to authenticate if not in Colab\n", - "# Prepare the environments as described in src/README.md\n", - "!pip install gspread gspread_dataframe tabulate -q" + "from google.colab import auth\n", + "auth.authenticate_user()" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 3, "metadata": { "cellView": "form", "id": "45dBifFPJAtO" @@ -111,14 +96,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "Spreadsheet authentication failed: \n", - "Note: Make sure you have access to the spreadsheet and proper Google credentials\n" + "Successfully connected to spreadsheet with 1 existing sheets\n" ] } ], "source": [ "# @title Authenticate\n", "import google.auth\n", + "from google.auth.transport.requests import Request\n", + "from google.oauth2.credentials import Credentials\n", "import os\n", "from google.cloud import bigquery\n", "\n", @@ -127,24 +113,33 @@ "\n", "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n", "\n", - "credentials, project = google.auth.default()\n", - "client = bigquery.Client()\n", + "# !gcloud auth application-default login --scopes=https://www.googleapis.com/auth/spreadsheets,https://www.googleapis.com/auth/drive,https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/cloud-platform\n", + "\n", + "# Define the scopes needed for both BigQuery and Google Sheets\n", + "SCOPES = [\n", + " 'https://www.googleapis.com/auth/spreadsheets',\n", + " 'https://www.googleapis.com/auth/drive',\n", + " 'https://www.googleapis.com/auth/bigquery'\n", + "]\n", + "\n", + "# Get credentials with proper scopes\n", + "credentials, project = google.auth.default(scopes=SCOPES)\n", + "\n", + "# Refresh credentials if needed\n", + "if hasattr(credentials, 'refresh') and hasattr(credentials, 'expired') and credentials.expired:\n", + " credentials.refresh(Request())\n", + "\n", + "client = bigquery.Client(credentials=credentials)\n", "gc = gspread.authorize(credentials)\n", "\n", - "try:\n", - " ss = gc.open_by_url(spreadsheet_url)\n", - " existing_sheets = [s.title for s in ss.worksheets()]\n", - " print(f\"Successfully connected to spreadsheet with {len(existing_sheets)} existing sheets\")\n", - "except Exception as e:\n", - " print(f'Spreadsheet authentication failed: {e}')\n", - " print(\"Note: Make sure you have access to the spreadsheet and proper Google credentials\")\n", - " ss = None\n", - " existing_sheets = []" + "ss = gc.open_by_url(spreadsheet_url)\n", + "existing_sheets = [s.title for s in ss.worksheets()]\n", + "print(f\"Successfully connected to spreadsheet with {len(existing_sheets)} existing sheets\")" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 4, "metadata": { "cellView": "form", "colab": { @@ -159,69 +154,68 @@ "name": "stdout", "output_type": "stream", "text": [ - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| Query name | TB processed/billed | Sheet name | Upload skipped reason |\n", - "+===========================================================================+=======================+=======================================================================+=========================+\n", - "| cookies_top_first_party_names.sql | 0.081 | Cookies Top First Party Names | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| cookies_top_third_party_domains.sql | 0.083 | Cookies Top Third Party Domains | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| cookies_top_third_party_names.sql | 0.081 | Cookies Top Third Party Names | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_bounce_domains.sql | 6.166 | Most Common Bounce Domains | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_client_hints.sql | 5.217 | Most Common Client Hints | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | 0.021 | Most Common Cmps For Iab Tcf V2 | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_cname_domains.sql | 0.021 | Most Common Cname Domains | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_referrer_policy.sql | 3.66 | Most Common Referrer Policy | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_strings_for_iab_usp.sql | 0.021 | Most Common Strings For Iab Usp | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_tracker_categories.sql | 0.973 | Most Common Tracker Categories | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | 1.573 | Number Of Ara Destinations Registered By Third Parties And Publishers | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | 1.571 | Number Of Privacy Sandbox Attested Domains | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_per_technology.sql | 0.025 | Number Of Websites Per Technology | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_per_technology_category.sql | 0.016 | Number Of Websites Per Technology Category | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | 0.025 | Number Of Websites Using Each Fingerprinting | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_client_hints.sql | 2.895 | Number Of Websites With Client Hints | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_dnt.sql | 0.021 | Number Of Websites With Dnt | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_gpc.sql | 5.235 | Number Of Websites With Gpc | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_iab.sql | 0.019 | Number Of Websites With Iab | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | 0.973 | Number Of Websites With Nb Trackers | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | 3.664 | Number Of Websites With Referrerpolicy | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | 5.217 | Number Of Websites With Related Origin Trials | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | 0.978 | Number Of Websites With Whotracksme Trackers | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | 1.573 | Privacy Sandbox Adoption By Third Parties By Publishers | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | 1.573 | Top Ara Destinations Registered By Most Publishers | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| top_ara_destinations_registered_by_most_third_parties.sql | 1.573 | Top Ara Destinations Registered By Most Third Parties | Dry run |\n", - "+---------------------------------------------------------------------------+-----------------------+-----------------------------------------------------------------------+-------------------------+\n" + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| Query name | TB processed - estimate | Sheet name | Upload skipped reason |\n", + "+===========================================================================+===========================+=======================================================================+=========================+\n", + "| cookies_top_first_party_names.sql | 0 | Cookies Top First Party Names | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| cookies_top_third_party_domains.sql | 0 | Cookies Top Third Party Domains | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| cookies_top_third_party_names.sql | 0 | Cookies Top Third Party Names | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_bounce_domains.sql | 1.716 | Most Common Bounce Domains | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_client_hints.sql | 1.337 | Most Common Client Hints | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | 0.011 | Most Common Cmps For Iab Tcf V2 | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_cname_domains.sql | 0.021 | Most Common Cname Domains | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_referrer_policy.sql | 1.012 | Most Common Referrer Policy | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_strings_for_iab_usp.sql | 0.011 | Most Common Strings For Iab Usp | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| most_common_tracker_categories.sql | 0.973 | Most Common Tracker Categories | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | 0.855 | Number Of Ara Destinations Registered By Third Parties And Publishers | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | 0.854 | Number Of Privacy Sandbox Attested Domains | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_per_technology.sql | 0.013 | Number Of Websites Per Technology | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_per_technology_category.sql | 0.008 | Number Of Websites Per Technology Category | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | 0.025 | Number Of Websites Using Each Fingerprinting | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_client_hints.sql | 1.863 | Number Of Websites With Client Hints | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_dnt.sql | 0.011 | Number Of Websites With Dnt | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_gpc.sql | 1.346 | Number Of Websites With Gpc | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_iab.sql | 0.01 | Number Of Websites With Iab | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | 0.973 | Number Of Websites With Nb Trackers | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | 0.493 | Number Of Websites With Referrerpolicy | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | 2.193 | Number Of Websites With Related Origin Trials | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | 0.494 | Number Of Websites With Whotracksme Trackers | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | 0.855 | Privacy Sandbox Adoption By Third Parties By Publishers | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | 0.855 | Top Ara Destinations Registered By Most Publishers | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", + "| top_ara_destinations_registered_by_most_third_parties.sql | 0.855 | Top Ara Destinations Registered By Most Third Parties | |\n", + "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n" ] } ], "source": [ "# @title Upload query results\n", - "\n", "import glob\n", "import re\n", "from tabulate import tabulate\n", @@ -229,13 +223,14 @@ "import os\n", "\n", "filename_match = '\\\\.sql$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", - "filename_match_exclude = '^$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", - "dry_run = True # @param {type: \"boolean\"}\n", + "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "dry_run = False # @param {type: \"boolean\"}\n", "overwrite_sheets = False # @param {type: \"boolean\"}\n", - "maximum_tb_billed = 0.5 # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n", + "maximum_tb_billed = 7 # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n", "\n", - "filename_include_regexp = r'{}'.format(filename_match)\n", - "filename_exclude_regexp = r'{}'.format(filename_match_exclude)\n", + "# Handle empty filename_match and filename_match_exclude\n", + "filename_include_regexp = r'.*' if not filename_match or filename_match == '*' else r'{}'.format(filename_match)\n", + "filename_exclude_regexp = r'^$' if not filename_match_exclude else r'{}'.format(filename_match_exclude)\n", "\n", "folder = os.path.join(os.getcwd(), '../', str(almanac_year), chapter_name.lower(), '*.sql')\n", "\n", diff --git a/src/requirements.txt b/src/requirements.txt index 27be9241089..19c398b6671 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -10,3 +10,7 @@ sqlfluff==3.4.2 pandas==2.3.3 google-cloud-bigquery==3.38.0 requests==2.32.5 +db-dtypes==1.4.3 +tabulate==0.9.0 +gspread==6.2.1 +gspread-dataframe==4.0.0 From 08aa531fe0bdba4e581a6b83144a035de4b016fe Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 20 Oct 2025 20:28:53 +0200 Subject: [PATCH 11/27] Refactor privacy queries and utilities; make bq_to_sheets runnable --- .../most_common_cmps_for_iab_tcf_v2.sql | 5 +- .../privacy/number_of_websites_with_iab.sql | 96 ++++-- sql/util/bq_to_sheets.ipynb | 308 +++++++++--------- sql/util/bq_writer.py | 1 - sql/util/haveibeenpwned.py | 59 ++-- sql/util/whotracksme_trackers.py | 2 +- src/requirements.txt | 1 + 7 files changed, 244 insertions(+), 228 deletions(-) diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql index 09dce3f75e8..6e4541a41d2 100644 --- a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql +++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql @@ -6,12 +6,11 @@ WITH cmps AS ( SELECT client, page, - SAFE.STRING(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId, + SAFE.INT64(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId, COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages FROM `httparchive.crawl.pages` WHERE - date = '2025-07-01' AND - is_root_page = TRUE + date = '2025-07-01' ) SELECT diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql index 667051ca60b..017ca4aac6f 100644 --- a/sql/2025/privacy/number_of_websites_with_iab.sql +++ b/sql/2025/privacy/number_of_websites_with_iab.sql @@ -2,36 +2,6 @@ -- TODO: check presence of multiple frameworks per page WITH privacy_custom_metrics_data AS ( - SELECT - client, - custom_metrics.privacy AS metrics - FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE -) - -SELECT - client, - number_of_pages_with_tcfv1 / number_of_pages AS pct_pages_with_tcfv1, - number_of_pages_with_tcfv1, - number_of_pages_with_tcfv2 / number_of_pages AS pct_pages_with_tcfv2, - number_of_pages_with_tcfv2, - number_of_pages_with_usp / number_of_pages AS pct_pages_with_usp, - number_of_pages_with_usp, - number_of_pages_with_tcf / number_of_pages AS pct_pages_with_tcf, - number_of_pages_with_tcf, - number_of_pages_with_any / number_of_pages AS pct_pages_with_any, - number_of_pages_with_any, - number_of_pages_with_tcfv1_compliant / number_of_pages AS pct_pages_with_tcfv1_compliant, - number_of_pages_with_tcfv1_compliant, - number_of_pages_with_tcfv2_compliant / number_of_pages AS pct_pages_with_tcfv2_compliant, - number_of_pages_with_tcfv2_compliant, - number_of_pages_with_gpp / number_of_pages AS pct_pages_with_gpp, - number_of_pages_with_gpp, - number_of_pages_with_gpp_data / number_of_pages AS pct_pages_with_gpp_data, - number_of_pages_with_gpp_data -FROM ( SELECT client, COUNT(0) AS number_of_pages, @@ -54,8 +24,70 @@ FROM ( SAFE.BOOL(metrics.iab_tcf_v1.compliant_setup) AS tcfv1_compliant, SAFE.BOOL(metrics.iab_tcf_v2.compliant_setup) AS tcfv2_compliant, metrics.iab_gpp.data IS NOT NULL AS gpp_data - FROM - privacy_custom_metrics_data + FROM ( + SELECT + client, + custom_metrics.privacy AS metrics + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' AND + is_root_page = TRUE + ) ) GROUP BY client ) + +SELECT + client, + metric.metric, + metric.pct_pages, + metric.number_of_pages +FROM ( + SELECT + client, + ARRAY>[STRUCT( + 'tcfv1', + number_of_pages_with_tcfv1 / number_of_pages, + number_of_pages_with_tcfv1 + ), STRUCT( + 'tcfv2', + number_of_pages_with_tcfv2 / number_of_pages, + number_of_pages_with_tcfv2 + ), STRUCT( + 'usp', + number_of_pages_with_usp / number_of_pages, + number_of_pages_with_usp + ), STRUCT( + 'tcf', + number_of_pages_with_tcf / number_of_pages, + number_of_pages_with_tcf + ), STRUCT( + 'any_framework', + number_of_pages_with_any / number_of_pages, + number_of_pages_with_any + ), STRUCT( + 'tcfv1_compliant', + number_of_pages_with_tcfv1_compliant / number_of_pages, + number_of_pages_with_tcfv1_compliant + ), STRUCT( + 'tcfv2_compliant', + number_of_pages_with_tcfv2_compliant / number_of_pages, + number_of_pages_with_tcfv2_compliant + ), STRUCT( + 'gpp', + number_of_pages_with_gpp / number_of_pages, + number_of_pages_with_gpp + ), STRUCT( + 'gpp_data_available', + number_of_pages_with_gpp_data / number_of_pages, + number_of_pages_with_gpp_data + )] AS metrics + FROM privacy_custom_metrics_data +), + UNNEST(metrics) AS metric +ORDER BY + client; diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index c504f20d8dd..5c1a47985c8 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -9,14 +9,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": { "cellView": "form", "id": "U37785Bxt5tE" }, "outputs": [], "source": [ - "# @title Configure the chapter to process\n", + "# @title Configuration\n", "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n", "almanac_year = 2025 #@param {type: \"integer\"}\n", "chapter_name = 'privacy' #@param {type: \"string\"}\n", @@ -36,7 +36,7 @@ }, "outputs": [], "source": [ - "# @title Download repo (skip when running locally)\n", + "# @title Download repo (Colab only - skip when running locally)\n", "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git\n", "!cd almanac.httparchive.org/" ] @@ -67,9 +67,9 @@ } ], "source": [ - "# @title Update chapter branch (skip when running locally)\n", + "# @title Update chapter branch (Colab only - skip when running locally)\n", "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n", - "print(f\"Branch: {branch_name}\")\n", + "print(f\"Switching to branch: {branch_name}\")\n", "!git checkout $branch_name && git pull" ] }, @@ -79,14 +79,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Run to authenticate if in Colab (skip when running locally)\n", + "# @title Authenticate (Colab only - skip when running locally)\n", "from google.colab import auth\n", "auth.authenticate_user()" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 12, "metadata": { "cellView": "form", "id": "45dBifFPJAtO" @@ -96,50 +96,40 @@ "name": "stdout", "output_type": "stream", "text": [ - "Successfully connected to spreadsheet with 1 existing sheets\n" + "✓ Connected to spreadsheet with 28 existing sheets\n" ] } ], "source": [ - "# @title Authenticate\n", + "# @title Setup BigQuery and Google Sheets clients\n", "import google.auth\n", - "from google.auth.transport.requests import Request\n", - "from google.oauth2.credentials import Credentials\n", "import os\n", "from google.cloud import bigquery\n", - "\n", "import gspread\n", "from gspread_dataframe import set_with_dataframe\n", "\n", "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n", "\n", - "# !gcloud auth application-default login --scopes=https://www.googleapis.com/auth/spreadsheets,https://www.googleapis.com/auth/drive,https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/cloud-platform\n", - "\n", - "# Define the scopes needed for both BigQuery and Google Sheets\n", + "# Authenticate with required scopes for BigQuery and Google Sheets\n", "SCOPES = [\n", " 'https://www.googleapis.com/auth/spreadsheets',\n", " 'https://www.googleapis.com/auth/drive',\n", " 'https://www.googleapis.com/auth/bigquery'\n", "]\n", "\n", - "# Get credentials with proper scopes\n", "credentials, project = google.auth.default(scopes=SCOPES)\n", - "\n", - "# Refresh credentials if needed\n", - "if hasattr(credentials, 'refresh') and hasattr(credentials, 'expired') and credentials.expired:\n", - " credentials.refresh(Request())\n", - "\n", "client = bigquery.Client(credentials=credentials)\n", "gc = gspread.authorize(credentials)\n", "\n", + "# Connect to spreadsheet\n", "ss = gc.open_by_url(spreadsheet_url)\n", "existing_sheets = [s.title for s in ss.worksheets()]\n", - "print(f\"Successfully connected to spreadsheet with {len(existing_sheets)} existing sheets\")" + "print(f\"✓ Connected to spreadsheet with {len(existing_sheets)} existing sheets\")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 38, "metadata": { "cellView": "form", "colab": { @@ -154,169 +144,175 @@ "name": "stdout", "output_type": "stream", "text": [ - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| Query name | TB processed - estimate | Sheet name | Upload skipped reason |\n", - "+===========================================================================+===========================+=======================================================================+=========================+\n", - "| cookies_top_first_party_names.sql | 0 | Cookies Top First Party Names | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| cookies_top_third_party_domains.sql | 0 | Cookies Top Third Party Domains | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| cookies_top_third_party_names.sql | 0 | Cookies Top Third Party Names | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_bounce_domains.sql | 1.716 | Most Common Bounce Domains | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_client_hints.sql | 1.337 | Most Common Client Hints | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | 0.011 | Most Common Cmps For Iab Tcf V2 | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_cname_domains.sql | 0.021 | Most Common Cname Domains | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_referrer_policy.sql | 1.012 | Most Common Referrer Policy | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_strings_for_iab_usp.sql | 0.011 | Most Common Strings For Iab Usp | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| most_common_tracker_categories.sql | 0.973 | Most Common Tracker Categories | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | 0.855 | Number Of Ara Destinations Registered By Third Parties And Publishers | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | 0.854 | Number Of Privacy Sandbox Attested Domains | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_per_technology.sql | 0.013 | Number Of Websites Per Technology | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_per_technology_category.sql | 0.008 | Number Of Websites Per Technology Category | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | 0.025 | Number Of Websites Using Each Fingerprinting | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_client_hints.sql | 1.863 | Number Of Websites With Client Hints | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_dnt.sql | 0.011 | Number Of Websites With Dnt | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_gpc.sql | 1.346 | Number Of Websites With Gpc | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_iab.sql | 0.01 | Number Of Websites With Iab | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | 0.973 | Number Of Websites With Nb Trackers | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | 0.493 | Number Of Websites With Referrerpolicy | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | 2.193 | Number Of Websites With Related Origin Trials | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | 0.494 | Number Of Websites With Whotracksme Trackers | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | 0.855 | Privacy Sandbox Adoption By Third Parties By Publishers | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | 0.855 | Top Ara Destinations Registered By Most Publishers | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n", - "| top_ara_destinations_registered_by_most_third_parties.sql | 0.855 | Top Ara Destinations Registered By Most Third Parties | |\n", - "+---------------------------------------------------------------------------+---------------------------+-----------------------------------------------------------------------+-------------------------+\n" + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+=================================+==========================+\n", + "| cookies_top_first_party_names.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| cookies_top_third_party_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| cookies_top_third_party_names.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | 0 | Most Common Cmps For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", + "\n", + "✓ Processed 27 queries\n" ] } ], "source": [ - "# @title Upload query results\n", + "# @title Upload query results to Google Sheets\n", "import glob\n", "import re\n", "from tabulate import tabulate\n", - "from IPython.display import clear_output\n", - "import os\n", + "from IPython.display import clear_output, display, HTML\n", "\n", - "filename_match = '\\\\.sql$' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "# Query filters and options\n", + "filename_match = 'most_common_cmps_for_iab_tcf_v2.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = False # @param {type: \"boolean\"}\n", - "overwrite_sheets = False # @param {type: \"boolean\"}\n", - "maximum_tb_billed = 7 # @param {type: \"raw\", placeholder: \"Insert a number or empty to disable\"}\n", + "overwrite_sheets = True # @param {type: \"boolean\"}\n", + "maximum_tb_billed = 7 # @param {type: \"raw\", placeholder: \"Max TB to bill per query\"}\n", "\n", - "# Handle empty filename_match and filename_match_exclude\n", - "filename_include_regexp = r'.*' if not filename_match or filename_match == '*' else r'{}'.format(filename_match)\n", - "filename_exclude_regexp = r'^$' if not filename_match_exclude else r'{}'.format(filename_match_exclude)\n", + "# Setup file filters\n", + "filename_include_regexp = r'.*' if not filename_match or filename_match == '*' else filename_match\n", + "filename_exclude_regexp = r'^$' if not filename_match_exclude else filename_match_exclude\n", "\n", - "folder = os.path.join(os.getcwd(), '../', str(almanac_year), chapter_name.lower(), '*.sql')\n", + "# Build path to SQL files\n", + "sql_folder = os.path.join(os.getcwd(), '../', str(almanac_year), chapter_name.lower(), '*.sql')\n", + "print(f\"Looking for SQL files in: {sql_folder}\")\n", + "sql_files = sorted(glob.glob(sql_folder))\n", "\n", - "print(f\"Looking for SQL files in: {folder}\")\n", + "if not sql_files:\n", + " print(\"❌ No SQL files found. Check the folder path.\")\n", + "else:\n", + " print(f\"Found {len(sql_files)} SQL files\\n\")\n", "\n", - "# Print formatted logs\n", - "queries_processed_log = []\n", - "def print_logs_table(log=None, append=True):\n", - " if log:\n", - " queries_processed_log.append(log)\n", - " table = tabulate(queries_processed_log, headers=['Query name', 'TB processed - estimate', 'Sheet name', 'Upload skipped reason'], tablefmt=\"grid\")\n", - " if not append:\n", - " del queries_processed_log[-1]\n", - " clear_output(wait=True)\n", - " print(table)\n", + " # Progress tracking\n", + " queries_processed_log = []\n", "\n", - "# Find matching SQL queries and save results to Google Sheets.\n", - "sql_files = list(glob.iglob(folder))\n", - "print(f\"Found {len(sql_files)} SQL files\")\n", + " def log_result(filename, tb_processed=None, sheet_name=None, skip_reason=None, preview=False):\n", + " \"\"\"Add result to log and display table\"\"\"\n", + " log_entry = [filename, tb_processed, sheet_name, skip_reason]\n", + " if not preview:\n", + " queries_processed_log.append(log_entry)\n", "\n", - "if not sql_files:\n", - " print(\"No SQL files found. Check the folder path.\")\n", - "else:\n", - " for filepath in sorted(sql_files):\n", + " # Build table from current log plus preview entry if needed\n", + " display_log = queries_processed_log if not preview else queries_processed_log + [log_entry]\n", + " table = tabulate(display_log, headers=['Query', 'TB Billed', 'Sheet', 'Status/Skip Reason'], tablefmt=\"grid\")\n", + " clear_output(wait=True)\n", + " print(table)\n", + "\n", + " # Process each SQL file\n", + " for filepath in sql_files:\n", " filename = os.path.basename(filepath)\n", "\n", - " print_logs_table([filename, 'Processing...', 'Processing...', 'Processing...'], append=False)\n", + " # Show processing status\n", + " log_result(filename, 'Processing...', 'Processing...', 'Processing...', preview=True)\n", "\n", - " if re.search(filename_include_regexp, filename) and not re.search(filename_exclude_regexp, filename):\n", + " # Check if filename matches filters\n", + " if not re.search(filename_include_regexp, filename) or re.search(filename_exclude_regexp, filename):\n", + " log_result(filename, None, None, 'Filename filter mismatch')\n", + " continue\n", "\n", - " with open(filepath) as f:\n", - " query = f.read()\n", + " # Read query\n", + " with open(filepath) as f:\n", + " query = f.read()\n", "\n", - " try:\n", - " response = client.query(\n", - " query,\n", - " job_config = bigquery.QueryJobConfig(dry_run = True)\n", - " )\n", - " except Exception as e:\n", - " print_logs_table([filename, None, None, f'Dry run query error:\\n{e}'])\n", - " continue\n", + " # Estimate query cost (dry run)\n", + " try:\n", + " dry_run_response = client.query(query, job_config=bigquery.QueryJobConfig(dry_run=True))\n", + " tb_processed = dry_run_response.total_bytes_processed / 1024**4\n", + " except Exception as e:\n", + " log_result(filename, None, None, f'Dry run error: {str(e)[:100]}...')\n", + " continue\n", "\n", - " tb_processed = response.total_bytes_processed/1024/1024/1024/1024\n", - " sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n", + " # Generate sheet title from filename\n", + " sheet_title = re.sub(r'(\\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()\n", "\n", - " if sheet_title in existing_sheets:\n", - " if overwrite_sheets:\n", - " st = ss.worksheet(sheet_title)\n", - " else:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Sheet already exists'])\n", - " continue\n", + " # Skip execution if dry run mode\n", + " if dry_run:\n", + " log_result(filename, f'{tb_processed:.3f}', sheet_title, 'Dry run mode')\n", + " continue\n", + "\n", + " # Check if sheet already exists\n", + " if sheet_title in existing_sheets and not overwrite_sheets:\n", + " log_result(filename, f'{tb_processed:.3f}', sheet_title, 'Sheet exists (set overwrite_sheets=True)')\n", + " continue\n", "\n", - " if dry_run:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'Dry run'])\n", - " continue\n", + " # Execute query and upload to Sheets\n", + " try:\n", + " # Run query with billing limit\n", + " job_config = bigquery.QueryJobConfig()\n", + " if maximum_tb_billed:\n", + " job_config.maximum_bytes_billed = int(maximum_tb_billed * 1024**4)\n", "\n", - " # Skip actual execution if no spreadsheet connection\n", - " if ss is None:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', sheet_title, 'No spreadsheet connection'])\n", - " continue\n", + " query_response = client.query(query, job_config=job_config)\n", + " df = query_response.to_dataframe()\n", "\n", - " try:\n", - " if maximum_tb_billed:\n", - " response = client.query(\n", - " query,\n", - " job_config = bigquery.QueryJobConfig(\n", - " maximum_bytes_billed = maximum_tb_billed*1024*1024*1024*1024\n", - " )\n", - " )\n", - " else:\n", - " response = client.query(query)\n", + " # Get or create sheet\n", + " if sheet_title in existing_sheets:\n", + " sheet = ss.worksheet(sheet_title)\n", + " else:\n", + " sheet = ss.add_worksheet(sheet_title, rows=1, cols=1)\n", + " existing_sheets.append(sheet_title)\n", "\n", - " df = response.to_dataframe()\n", - " if ('st' not in locals() or st.title != sheet_title):\n", - " st = ss.add_worksheet(sheet_title, rows = 1, cols = 1)\n", - " set_with_dataframe(st, df, resize=False)\n", + " # Upload data\n", + " set_with_dataframe(sheet, df, resize=False)\n", "\n", - " tb_billed = response.total_bytes_billed/1024/1024/1024/1024\n", - " print_logs_table([filename, f'{tb_billed:.3f}', sheet_title, None])\n", + " tb_billed = query_response.total_bytes_billed / 1024**4\n", + " log_result(filename, f'{tb_billed:.3f}', sheet_title, '✓ Uploaded')\n", "\n", - " except Exception as e:\n", - " print_logs_table([filename, f'{tb_processed:.3f}', None, f'Query error:\\n{e}'])\n", - " continue\n", + " except Exception as e:\n", + " log_result(filename, f'{tb_billed:.3f}', None, f'Query error: {str(e)[:100]}...')\n", "\n", - " else:\n", - " print_logs_table([filename, None, None, 'Filename mismatch'])" + " print(f\"\\n✓ Processed {len(queries_processed_log)} queries\")" ] } ], diff --git a/sql/util/bq_writer.py b/sql/util/bq_writer.py index 3355dcfcb94..a72b6784ee0 100644 --- a/sql/util/bq_writer.py +++ b/sql/util/bq_writer.py @@ -18,7 +18,6 @@ def write_to_bq(df, table_id, schema, write_disposition="WRITE_APPEND"): client = bigquery.Client() job_config = bigquery.LoadJobConfig( - source_format=bigquery.SourceFormat.CSV, write_disposition=write_disposition, schema=schema, ) diff --git a/sql/util/haveibeenpwned.py b/sql/util/haveibeenpwned.py index 1ea1bb7f75f..8ad2fd1cba6 100644 --- a/sql/util/haveibeenpwned.py +++ b/sql/util/haveibeenpwned.py @@ -1,50 +1,33 @@ """ -This module retrieves data from the "haveibeenpwned" API and loads it into a BigQuery table. +Retrieves breach data from the Have I Been Pwned API and loads it into BigQuery. + """ import json -from datetime import datetime as DateTime +from datetime import datetime -import pandas +import pandas as pd import requests # pylint: disable=import-error -from bq_writer import write_to_bq, bigquery - - -# Retrieve data from the "haveibeenpwned" API -breaches = json.loads( - requests.get("https://haveibeenpwned.com/api/v2/breaches", timeout=10).content -) -df = pandas.DataFrame(breaches) - -year = DateTime.now().year -df["date"] = DateTime(year, 6, 1).date() -df["Name"] = df["Name"].astype(str) -df["Title"] = df["Title"].astype(str) -df["Domain"] = df["Domain"].astype(str) -df["BreachDate"] = pandas.to_datetime( - df["BreachDate"], format="%Y-%m-%d", errors="coerce" -).dt.date -df["AddedDate"] = pandas.to_datetime( - df["AddedDate"], format="%Y-%m-%d", errors="coerce" -).dt.date -df["ModifiedDate"] = pandas.to_datetime( - df["ModifiedDate"], format="%Y-%m-%d", errors="coerce" -).dt.date -df["Description"] = df["Description"].astype(str) -df["LogoPath"] = df["LogoPath"].astype(str) -df["DataClasses"] = df["DataClasses"].apply(json.dumps) +from bq_writer import bigquery, write_to_bq -# Append to httparchive.almanac.breaches +# Fetch breach data from API +response = requests.get("https://haveibeenpwned.com/api/v2/breaches", timeout=10) +breaches = response.json() +df = pd.DataFrame(breaches) +# Convert date fields +df["BreachDate"] = pd.to_datetime(df["BreachDate"], errors="coerce") +df["AddedDate"] = pd.to_datetime(df["AddedDate"], errors="coerce") +df["ModifiedDate"] = pd.to_datetime(df["ModifiedDate"], errors="coerce") +# Define BigQuery schema schema = [ - bigquery.SchemaField("date", "DATE"), bigquery.SchemaField("Name", "STRING"), bigquery.SchemaField("Title", "STRING"), bigquery.SchemaField("Domain", "STRING"), bigquery.SchemaField("BreachDate", "DATE"), - bigquery.SchemaField("AddedDate", "DATE"), - bigquery.SchemaField("ModifiedDate", "DATE"), + bigquery.SchemaField("AddedDate", "TIMESTAMP"), + bigquery.SchemaField("ModifiedDate", "TIMESTAMP"), bigquery.SchemaField("PwnCount", "INTEGER"), bigquery.SchemaField("Description", "STRING"), bigquery.SchemaField("LogoPath", "STRING"), @@ -53,7 +36,13 @@ bigquery.SchemaField("IsSensitive", "BOOLEAN"), bigquery.SchemaField("IsRetired", "BOOLEAN"), bigquery.SchemaField("IsSpamList", "BOOLEAN"), - bigquery.SchemaField("DataClasses", "STRING"), + bigquery.SchemaField("IsMalware", "BOOLEAN"), + bigquery.SchemaField("IsSubscriptionFree", "BOOLEAN"), + bigquery.SchemaField("IsStealerLog", "BOOLEAN"), + bigquery.SchemaField("DataClasses", "STRING", mode="REPEATED"), + bigquery.SchemaField("Attribution", "STRING"), + bigquery.SchemaField("DisclosureUrl", "STRING"), ] -write_to_bq(df, "httparchive.almanac.breaches", schema) +# Write to BigQuery +write_to_bq(df, "httparchive.almanac.breaches", schema, write_disposition="WRITE_TRUNCATE") diff --git a/sql/util/whotracksme_trackers.py b/sql/util/whotracksme_trackers.py index ec68f922e17..ad78ecf38ea 100644 --- a/sql/util/whotracksme_trackers.py +++ b/sql/util/whotracksme_trackers.py @@ -18,7 +18,7 @@ TRACKERS_QUERY = """ SELECT - '2024-06-01' AS date, + '2025-07-01' AS date, categories.name as category, tracker, domain diff --git a/src/requirements.txt b/src/requirements.txt index 19c398b6671..18b2eb93de8 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -8,6 +8,7 @@ sqlfluff==3.4.2 # sql/util/* dependencies pandas==2.3.3 +pandas-gbq==0.29.2 google-cloud-bigquery==3.38.0 requests==2.32.5 db-dtypes==1.4.3 From c2566e672516801efb80aad319bc18fa3fbc55ff Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 20 Oct 2025 20:52:40 +0200 Subject: [PATCH 12/27] Potential fix for code scanning alert no. 640: Unused import Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- sql/util/haveibeenpwned.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/util/haveibeenpwned.py b/sql/util/haveibeenpwned.py index 8ad2fd1cba6..38d27b86f36 100644 --- a/sql/util/haveibeenpwned.py +++ b/sql/util/haveibeenpwned.py @@ -4,7 +4,6 @@ """ import json -from datetime import datetime import pandas as pd import requests # pylint: disable=import-error From 42da6adb51c26897ba17d07f304cbf81d57a8062 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 20 Oct 2025 20:54:36 +0200 Subject: [PATCH 13/27] Remove unused json import --- sql/util/haveibeenpwned.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/util/haveibeenpwned.py b/sql/util/haveibeenpwned.py index 38d27b86f36..269adf2dd6e 100644 --- a/sql/util/haveibeenpwned.py +++ b/sql/util/haveibeenpwned.py @@ -3,8 +3,6 @@ """ -import json - import pandas as pd import requests # pylint: disable=import-error from bq_writer import bigquery, write_to_bq From 8e543efe823aa4535e195507a31f7e861de29351 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 11 Jan 2026 03:00:08 +0100 Subject: [PATCH 14/27] Add SQL scripts for tracking first-party and third-party cookies; remove outdated scripts and update requirements --- sql/2025/privacy/cookies_top_first_party.sql | 27 +++++ .../privacy/cookies_top_first_party_names.sql | 37 ------ sql/2025/privacy/cookies_top_third_party.sql | 23 ++++ .../cookies_top_third_party_domains.sql | 37 ------ .../privacy/cookies_top_third_party_names.sql | 37 ------ .../privacy/most_common_bounce_domains.sql | 102 ++++++++--------- sql/2025/privacy/most_common_client_hints.sql | 89 +++++++-------- .../most_common_cmps_for_iab_tcf_v2.sql | 36 +++--- .../privacy/most_common_cname_domains.sql | 4 +- sql/util/bq_to_sheets.ipynb | 107 ++---------------- src/requirements.txt | 2 + 11 files changed, 166 insertions(+), 335 deletions(-) create mode 100644 sql/2025/privacy/cookies_top_first_party.sql delete mode 100644 sql/2025/privacy/cookies_top_first_party_names.sql create mode 100644 sql/2025/privacy/cookies_top_third_party.sql delete mode 100644 sql/2025/privacy/cookies_top_third_party_domains.sql delete mode 100644 sql/2025/privacy/cookies_top_third_party_names.sql diff --git a/sql/2025/privacy/cookies_top_first_party.sql b/sql/2025/privacy/cookies_top_first_party.sql new file mode 100644 index 00000000000..917e9cf41ab --- /dev/null +++ b/sql/2025/privacy/cookies_top_first_party.sql @@ -0,0 +1,27 @@ +/* Most common cookie names, by number of domains on which they appear. +Goal is to identify common trackers that use first-party cookies across sites. +*/ + +FROM `httparchive.crawl.pages` +|> WHERE date = '2025-07-01' -- AND rank = 1000 +|> EXTEND COUNT(DISTINCT NET.HOST(root_page)) OVER (PARTITION BY client) AS total_domains +|> JOIN UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie +|> EXTEND +NET.HOST(root_page) AS firstparty_domain, +NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_domain, +SAFE.STRING(cookie.name) AS cookie_name +|> WHERE ENDS_WITH('.' || firstparty_domain, '.' || cookie_domain) +|> AGGREGATE +COUNT(DISTINCT firstparty_domain) AS domain_count, +COUNT(DISTINCT firstparty_domain) / ANY_VALUE(total_domains) AS pct_domains +GROUP BY client, cookie_name +|> PIVOT ( + ANY_VALUE(domain_count) AS domain_count, + ANY_VALUE(pct_domains) AS pct_domains + FOR client IN ('desktop', 'mobile') +) +|> RENAME +pct_domains_mobile AS mobile, +pct_domains_desktop AS desktop +|> ORDER BY domain_count_mobile + domain_count_desktop DESC +|> LIMIT 1000 diff --git a/sql/2025/privacy/cookies_top_first_party_names.sql b/sql/2025/privacy/cookies_top_first_party_names.sql deleted file mode 100644 index c9d689c6c06..00000000000 --- a/sql/2025/privacy/cookies_top_first_party_names.sql +++ /dev/null @@ -1,37 +0,0 @@ --- Most common cookie names, by number of domains on which they appear. Goal is to identify common trackers that use first-party cookies across sites. - -WITH pages AS ( - SELECT - client, - root_page, - custom_metrics, - COUNT(DISTINCT NET.HOST(root_page)) OVER (PARTITION BY client) AS total_domains - FROM `httparchive.crawl.pages` - WHERE date = '2025-07-01' -), - -cookies AS ( - SELECT - client, - cookie, - NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host, - NET.HOST(root_page) AS firstparty_host, - total_domains - FROM pages, - UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie -) - -SELECT - client, - COUNT(DISTINCT firstparty_host) AS domain_count, - COUNT(DISTINCT firstparty_host) / ANY_VALUE(total_domains) AS pct_domains, - SAFE.STRING(cookie.name) AS cookie_name -FROM cookies -WHERE firstparty_host LIKE '%' || cookie_host -GROUP BY - client, - cookie_name -ORDER BY - domain_count DESC, - client DESC -LIMIT 500 diff --git a/sql/2025/privacy/cookies_top_third_party.sql b/sql/2025/privacy/cookies_top_third_party.sql new file mode 100644 index 00000000000..9122940796d --- /dev/null +++ b/sql/2025/privacy/cookies_top_third_party.sql @@ -0,0 +1,23 @@ +FROM `httparchive.crawl.pages` +|> WHERE date = '2025-07-01' -- AND rank = 1000 +|> EXTEND COUNT(DISTINCT NET.HOST(root_page)) OVER (PARTITION BY client) AS total_domains +|> JOIN UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie +|> EXTEND +NET.HOST(root_page) AS firstparty_domain, +NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_domain, +NET.HOST(SAFE.STRING(cookie.domain)) || ' / ' || SAFE.STRING(cookie.name) AS cookie_details +|> WHERE NOT ENDS_WITH('.' || firstparty_domain, '.' || cookie_domain) +|> AGGREGATE +COUNT(DISTINCT firstparty_domain) AS domain_count, +COUNT(DISTINCT firstparty_domain) / ANY_VALUE(total_domains) AS pct_domains +GROUP BY client, cookie_details +|> PIVOT ( + ANY_VALUE(domain_count) AS domain_count, + ANY_VALUE(pct_domains) AS pct_domains + FOR client IN ('desktop', 'mobile') +) +|> RENAME +pct_domains_mobile AS mobile, +pct_domains_desktop AS desktop +|> ORDER BY domain_count_mobile + domain_count_desktop DESC +|> LIMIT 1000 diff --git a/sql/2025/privacy/cookies_top_third_party_domains.sql b/sql/2025/privacy/cookies_top_third_party_domains.sql deleted file mode 100644 index a4d1ea09aaa..00000000000 --- a/sql/2025/privacy/cookies_top_third_party_domains.sql +++ /dev/null @@ -1,37 +0,0 @@ -WITH pages AS ( - SELECT - page, - client, - root_page, - custom_metrics, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages - FROM `httparchive.crawl.pages` - WHERE date = '2025-07-01' -), - -cookies AS ( - SELECT - client, - page, - cookie, - NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host, - NET.HOST(root_page) AS firstparty_host, - total_pages - FROM pages, - UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie -) - -SELECT - client, - cookie_host, - COUNT(DISTINCT page) AS page_count, - COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages -FROM cookies -WHERE firstparty_host NOT LIKE '%' || cookie_host -GROUP BY - client, - cookie_host -ORDER BY - page_count DESC, - client -LIMIT 500 diff --git a/sql/2025/privacy/cookies_top_third_party_names.sql b/sql/2025/privacy/cookies_top_third_party_names.sql deleted file mode 100644 index 8c5eb2cbc0d..00000000000 --- a/sql/2025/privacy/cookies_top_third_party_names.sql +++ /dev/null @@ -1,37 +0,0 @@ --- Most common cookie names, by number of domains on which they appear. Goal is to identify common trackers that set cookies using many domains. - -WITH pages AS ( - SELECT - client, - root_page, - custom_metrics, - COUNT(DISTINCT net.host(root_page)) OVER (PARTITION BY client) AS total_domains - FROM `httparchive.crawl.pages` - WHERE date = '2025-07-01' -), - -cookies AS ( - SELECT - client, - cookie, - NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_host, - NET.HOST(root_page) AS firstparty_host, - total_domains - FROM pages, - UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie -) - -SELECT - client, - COUNT(DISTINCT firstparty_host) AS domain_count, - COUNT(DISTINCT firstparty_host) / ANY_VALUE(total_domains) AS pct_domains, - SAFE.STRING(cookie.name) AS cookie_name -FROM cookies -WHERE firstparty_host NOT LIKE '%' || cookie_host -GROUP BY - client, - cookie_name -ORDER BY - domain_count DESC, - client DESC -LIMIT 500 diff --git a/sql/2025/privacy/most_common_bounce_domains.sql b/sql/2025/privacy/most_common_bounce_domains.sql index b13f7552f8b..49280cb1a0a 100644 --- a/sql/2025/privacy/most_common_bounce_domains.sql +++ b/sql/2025/privacy/most_common_bounce_domains.sql @@ -1,90 +1,78 @@ +-- noqa: disable=PRS -- Detection logic explained: -- https://github.com/privacycg/proposals/issues/6 -- https://github.com/privacycg/nav-tracking-mitigations/blob/main/bounce-tracking-explainer.md WITH redirect_requests AS ( - SELECT + FROM `httparchive.crawl.requests` + |> WHERE + date = '2025-07-01' AND + --rank = 1000 AND + SAFE.INT64(summary.status) BETWEEN 300 AND 399 AND + index <= 2 + |> JOIN UNNEST(response_headers) AS header + |> WHERE LOWER(header.name) = 'location' + |> SELECT client, url, index, - response_headers, + NET.REG_DOMAIN(header.value) AS location_domain, page - FROM `httparchive.crawl.requests` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE AND - type NOT IN ('css', 'image', 'font', 'video', 'audio') AND - ROUND(INT64(summary.status) / 100) = 3 AND - index <= 2 ), +-- Find the first navigation redirect navigation_redirect AS ( - -- Find the first navigation redirect - SELECT + FROM redirect_requests + |> WHERE + index = 1 AND + NET.REG_DOMAIN(page) = NET.REG_DOMAIN(url) AND + NET.REG_DOMAIN(url) != location_domain + |> SELECT client, - url, page, - response_header.value AS navigation_redirect_location - FROM redirect_requests, - UNNEST(response_headers) AS response_header - WHERE - index = 1 AND - LOWER(response_header.name) = 'location' AND - NET.REG_DOMAIN(response_header.value) != NET.REG_DOMAIN(page) + location_domain AS bounce_domain ), +-- Find the second navigation redirect bounce_redirect AS ( - -- Find the second navigation redirect - SELECT + FROM redirect_requests + |> WHERE + index = 2 AND + NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) AND + NET.REG_DOMAIN(url) != location_domain + |> SELECT client, url, page, - response_header.value AS bounce_redirect_location, - response_headers - FROM redirect_requests, - UNNEST(response_headers) AS response_header - WHERE - index = 2 AND - LOWER(response_header.name) = 'location' + location_domain AS bounce_redirect_location_domain ), +-- Combine the first and second navigation redirects bounce_sequences AS ( - -- Combine the first and second navigation redirects - SELECT - nav.client, - NET.REG_DOMAIN(navigation_redirect_location) AS bounce_hostname, - COUNT(DISTINCT nav.page) AS number_of_pages - --ARRAY_AGG(bounce.bounce_tracking_cookies) AS bounce_tracking_cookies FROM navigation_redirect AS nav - LEFT JOIN bounce_redirect AS bounce + |> JOIN bounce_redirect AS bounce ON nav.client = bounce.client AND - nav.page = bounce.page AND - nav.navigation_redirect_location = bounce.url - WHERE bounce_redirect_location IS NOT NULL - GROUP BY - nav.client, - bounce_hostname + nav.page = bounce.page + |> AGGREGATE COUNT(DISTINCT nav.page) AS pages_count + GROUP BY nav.client, bounce_domain ), pages_total AS ( - SELECT - client, - COUNT(DISTINCT page) AS total_pages FROM `httparchive.crawl.pages` - WHERE date = '2025-07-01' AND - is_root_page - GROUP BY client + |> WHERE date = '2025-07-01' --AND rank = 1000 + |> AGGREGATE COUNT(DISTINCT page) AS total_pages GROUP BY client ) --- Count the number of websites with bounce tracking per bounce hostname -SELECT - client, - bounce_hostname, - number_of_pages, - number_of_pages / total_pages AS pct_pages FROM bounce_sequences -JOIN pages_total -USING (client) -ORDER BY number_of_pages DESC -LIMIT 100 +|> JOIN pages_total USING (client) +|> EXTEND pages_count / total_pages AS pages_pct +|> DROP total_pages +|> PIVOT( + ANY_VALUE(pages_count) AS cnt, + ANY_VALUE(pages_pct) AS pages_pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME cnt_mobile AS mobile, cnt_desktop AS desktop +|> ORDER BY mobile + desktop DESC +|> LIMIT 100 diff --git a/sql/2025/privacy/most_common_client_hints.sql b/sql/2025/privacy/most_common_client_hints.sql index dcf0d4c16da..815f4638061 100644 --- a/sql/2025/privacy/most_common_client_hints.sql +++ b/sql/2025/privacy/most_common_client_hints.sql @@ -1,53 +1,48 @@ --- Pages that use Client Hints +-- noqa: disable=PRS +WITH totals AS ( + FROM `httparchive.crawl.pages` + |> WHERE date = '2025-07-01' AND is_root_page --AND rank = 1000 + |> AGGREGATE COUNT(*) AS total_websites GROUP BY client +), + +/* Get Accept-CH Headers */ +headers AS ( + FROM `httparchive.crawl.requests` + |> WHERE date = '2025-07-01' AND is_root_page AND is_main_document --AND rank = 1000 + |> JOIN UNNEST(response_headers) AS header + |> WHERE LOWER(header.name) = 'accept-ch' + |> LEFT JOIN UNNEST(SPLIT(LOWER(header.value), ',')) AS header_value + |> SELECT client, page, header_value -WITH response_headers AS ( - SELECT - client, - page, - LOWER(response_header.name) AS header_name, - LOWER(response_header.value) AS header_value, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites - FROM `httparchive.crawl.requests`, - UNNEST(response_headers) AS response_header - WHERE - date = '2025-07-01' AND - is_root_page = TRUE AND - is_main_document = TRUE ), +/* Get Accept-CH Meta Tags */ meta_tags AS ( - SELECT - client, - page, - LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name, - LOWER(SAFE.STRING(meta_node.content)) AS tag_value - FROM ( - SELECT - client, - page, - custom_metrics.other.almanac AS metrics - FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE - ), - UNNEST(JSON_QUERY_ARRAY(metrics.`meta-nodes`.nodes)) AS meta_node - WHERE SAFE.STRING(meta_node.`http-equiv`) IS NOT NULL + FROM `httparchive.crawl.pages` + |> WHERE date = '2025-07-01' AND is_root_page --AND rank = 1000 + |> JOIN UNNEST(JSON_QUERY_ARRAY(custom_metrics.other.almanac.`meta-nodes`.nodes)) AS meta_node + |> EXTEND + LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name, + |> WHERE tag_name = 'accept-ch' + |> LEFT JOIN UNNEST(SPLIT(LOWER(SAFE.STRING(meta_node.content)), ',')) AS tag_value + |> SELECT client, page, tag_value ) -SELECT - client, - IF(header_name = 'accept-ch', header_value, tag_value) AS value, - COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages, - COUNT(DISTINCT page) AS number_of_pages -FROM response_headers -FULL OUTER JOIN meta_tags -USING (client, page) -WHERE - header_name = 'accept-ch' OR - tag_name = 'accept-ch' -GROUP BY - client, - value -ORDER BY pct_pages DESC -LIMIT 200 +FROM headers +|> FULL OUTER JOIN meta_tags USING (client, page) +|> JOIN totals USING (client) +|> EXTEND TRIM(COALESCE(header_value, tag_value)) AS value +|> AGGREGATE +COUNT(DISTINCT page) AS number_of_pages, +COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages +GROUP BY client, value +|> PIVOT( + ANY_VALUE(number_of_pages) AS pages_count, + ANY_VALUE(pct_pages) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME +pct_mobile AS mobile, +pct_desktop AS desktop +|> ORDER BY pages_count_mobile + pages_count_desktop DESC +|> LIMIT 200 diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql index 6e4541a41d2..3ac96acd71b 100644 --- a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql +++ b/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql @@ -2,25 +2,19 @@ -- cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md--tcdata -- CMP vendor list: https://iabeurope.eu/cmp-list/ -WITH cmps AS ( - SELECT - client, - page, - SAFE.INT64(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages - FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' +FROM `httparchive.crawl.pages` +|> WHERE date = '2025-07-01' --AND rank = 1000 +|> EXTEND +SAFE.INT64(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId, +COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages +|> AGGREGATE +COUNT(0) AS number_of_pages, +COUNT(0) / ANY_VALUE(total_pages) AS pct_pages +GROUP BY client, cmpId +|> PIVOT ( + ANY_VALUE(number_of_pages) AS pages_count, + ANY_VALUE(pct_pages) AS pct + FOR client IN ('desktop', 'mobile') ) - -SELECT - client, - cmpId, - COUNT(0) / ANY_VALUE(total_pages) AS pct_pages, - COUNT(0) AS number_of_pages -FROM cmps -GROUP BY - client, - cmpId -ORDER BY - pct_pages DESC +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY pages_count_mobile + pages_count_desktop DESC diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql index 2941e119e34..ae9a40dbd16 100644 --- a/sql/2025/privacy/most_common_cname_domains.sql +++ b/sql/2025/privacy/most_common_cname_domains.sql @@ -38,7 +38,7 @@ cnames AS ( client, cnames.cname, page, - ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples + --ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples FROM `httparchive.crawl.pages`, UNNEST(CONVERT_CNAME_JSON(custom_metrics.privacy.request_hostnames_with_cname)) AS cnames WHERE date = '2025-07-01' AND @@ -66,7 +66,7 @@ cname_stats AS ( adguard_trackers.domain IS NOT NULL AS adguard_known_cname, whotracksme.category AS whotracksme_category, COUNT(DISTINCT page) AS number_of_pages, - ANY_VALUE(page_examples) AS page_examples + --ANY_VALUE(page_examples) AS page_examples FROM cnames LEFT JOIN adguard_trackers ON ENDS_WITH(cnames.cname, adguard_trackers.domain) diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index 5c1a47985c8..5f1f15294d1 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": { "cellView": "form", "id": "U37785Bxt5tE" @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "cellView": "form", "colab": { @@ -52,20 +52,7 @@ "id": "UzhgG5xvbQ1E", "outputId": "9cf3ef02-ec76-43ac-cd63-03edf7f2f619" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Branch: privacy-sql-2025\n", - "M\tsql/util/bq_to_sheets.ipynb\n", - "M\tsrc/requirements.txt\n", - "Already on 'privacy-sql-2025'\n", - "Your branch is up to date with 'origin/privacy-sql-2025'.\n", - "Already up to date.\n" - ] - } - ], + "outputs": [], "source": [ "# @title Update chapter branch (Colab only - skip when running locally)\n", "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n", @@ -86,20 +73,12 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": { "cellView": "form", "id": "45dBifFPJAtO" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✓ Connected to spreadsheet with 28 existing sheets\n" - ] - } - ], + "outputs": [], "source": [ "# @title Setup BigQuery and Google Sheets clients\n", "import google.auth\n", @@ -129,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": { "cellView": "form", "colab": { @@ -139,73 +118,7 @@ "id": "nblNil985Tjt", "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+=================================+==========================+\n", - "| cookies_top_first_party_names.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| cookies_top_third_party_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| cookies_top_third_party_names.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | 0 | Most Common Cmps For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+---------------------------------+--------------------------+\n", - "\n", - "✓ Processed 27 queries\n" - ] - } - ], + "outputs": [], "source": [ "# @title Upload query results to Google Sheets\n", "import glob\n", @@ -214,7 +127,7 @@ "from IPython.display import clear_output, display, HTML\n", "\n", "# Query filters and options\n", - "filename_match = 'most_common_cmps_for_iab_tcf_v2.sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match = '(most_common_client_hints).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = False # @param {type: \"boolean\"}\n", "overwrite_sheets = True # @param {type: \"boolean\"}\n", @@ -321,7 +234,7 @@ "provenance": [] }, "kernelspec": { - "display_name": ".venv (3.12.7)", + "display_name": ".venv (3.14.2)", "language": "python", "name": "python3" }, @@ -335,7 +248,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.7" + "version": "3.14.2" } }, "nbformat": 4, diff --git a/src/requirements.txt b/src/requirements.txt index adb37c058c0..3c1a4d110eb 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -13,3 +13,5 @@ requests==2.32.5 tabulate==0.9.0 gspread==6.2.1 gspread-dataframe==4.0.0 +ipykernel==7.1.0 +db-dtypes==1.5.0 \ No newline at end of file From c195c07b6e3a8f8983f26527631b11c78a635049 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 11 Jan 2026 03:03:03 +0100 Subject: [PATCH 15/27] lint --- sql/2025/privacy/most_common_cname_domains.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql index ae9a40dbd16..9d9c7a6ff60 100644 --- a/sql/2025/privacy/most_common_cname_domains.sql +++ b/sql/2025/privacy/most_common_cname_domains.sql @@ -37,7 +37,7 @@ cnames AS ( SELECT client, cnames.cname, - page, + page --ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples FROM `httparchive.crawl.pages`, UNNEST(CONVERT_CNAME_JSON(custom_metrics.privacy.request_hostnames_with_cname)) AS cnames @@ -65,7 +65,7 @@ cname_stats AS ( NET.REG_DOMAIN(cname) AS cname, adguard_trackers.domain IS NOT NULL AS adguard_known_cname, whotracksme.category AS whotracksme_category, - COUNT(DISTINCT page) AS number_of_pages, + COUNT(DISTINCT page) AS number_of_pages --ANY_VALUE(page_examples) AS page_examples FROM cnames LEFT JOIN adguard_trackers From 5c300e4f53ced847646d779640c87ae121d6e75a Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 11 Jan 2026 03:58:55 +0100 Subject: [PATCH 16/27] Refactor SQL scripts for IAB TCF v2 and client hints; streamline queries and improve clarity --- .../most_common_countries_for_iab_tcf_v2.sql | 44 +- .../number_of_websites_with_client_hints.sql | 67 +- sql/util/bq_to_sheets.ipynb | 1614 ++++++++++++++++- src/requirements.txt | 2 +- 4 files changed, 1669 insertions(+), 58 deletions(-) diff --git a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql index 16dfe503255..f1a9afc1775 100644 --- a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql +++ b/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql @@ -1,43 +1,41 @@ +-- noqa: disable=PRS -- Counts of countries for publishers using IAB Transparency & Consent Framework -- cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md--tcdata -- "Country code of the country that determines the legislation of -- reference. Normally corresponds to the country code of the country -- in which the publisher's business entity is established." -WITH totals AS ( +WITH base_totals AS ( SELECT client, COUNT(DISTINCT root_page) AS total_websites FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - JSON_TYPE(custom_metrics.privacy.iab_tcf_v2.data) = 'object' + WHERE date = '2025-07-01' --AND rank = 1000 GROUP BY client ), -cmps AS ( +base_data AS ( SELECT client, - --ANY_VALUE(custom_metrics.privacy.iab_tcf_v2.data) AS example, - STRING(custom_metrics.privacy.iab_tcf_v2.data.publisherCC) AS publisherCC, - COUNT(DISTINCT root_page) AS number_of_pages + root_page, + STRING(custom_metrics.privacy.iab_tcf_v2.data.publisherCC) AS publisherCC FROM `httparchive.crawl.pages` WHERE - date = '2025-07-01' AND + date = '2025-07-01' AND --rank = 1000 AND JSON_TYPE(custom_metrics.privacy.iab_tcf_v2.data) = 'object' - GROUP BY - client, - publisherCC ) -SELECT - client, - publisherCC, - --example, - number_of_pages / total_websites AS pct_of_pages -FROM cmps -JOIN totals -USING (client) -ORDER BY - client, - number_of_pages DESC +FROM base_data +|> AGGREGATE + COUNT(DISTINCT root_page) AS number_of_pages +GROUP BY client, publisherCC +|> JOIN base_totals USING (client) +|> EXTEND number_of_pages / total_websites AS pct_of_pages +|> DROP total_websites +|> PIVOT( + ANY_VALUE(number_of_pages) AS pages_count, + ANY_VALUE(pct_of_pages) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY pages_count_mobile + pages_count_desktop DESC diff --git a/sql/2025/privacy/number_of_websites_with_client_hints.sql b/sql/2025/privacy/number_of_websites_with_client_hints.sql index a4fef23d131..bf349874deb 100644 --- a/sql/2025/privacy/number_of_websites_with_client_hints.sql +++ b/sql/2025/privacy/number_of_websites_with_client_hints.sql @@ -1,44 +1,61 @@ -WITH response_headers AS ( +-- noqa: disable=PRS +WITH base_totals AS ( SELECT client, - page, - LOWER(response_header.name) AS header_name, - LOWER(response_header.value) AS header_value, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites + COUNT(DISTINCT root_page) AS total_websites + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' + --AND rank = 1000 + GROUP BY client +), + +accept_ch_headers AS ( + SELECT DISTINCT + client, + root_page FROM `httparchive.crawl.requests`, UNNEST(response_headers) response_header WHERE date = '2025-07-01' AND - is_main_document = TRUE + is_main_document = TRUE AND + --rank = 1000 AND + LOWER(response_header.name) = 'accept-ch' ), -meta_tags AS ( - SELECT +accept_ch_meta AS ( + SELECT DISTINCT client, - page, - LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name, - LOWER(SAFE.STRING(meta_node.content)) AS tag_value + root_page FROM ( SELECT client, - page, + root_page, custom_metrics.other.almanac AS metrics FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' + --AND rank = 1000 ), UNNEST(JSON_QUERY_ARRAY(metrics.`meta-nodes`.nodes)) AS meta_node - WHERE SAFE.STRING(meta_node.`http-equiv`) IS NOT NULL + WHERE LOWER(SAFE.STRING(meta_node.`http-equiv`)) = 'accept-ch' +), + +-- Combine both sources +all_accept_ch AS ( + SELECT client, root_page FROM accept_ch_headers + UNION DISTINCT + SELECT client, root_page FROM accept_ch_meta ) -SELECT - client, - COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages, - COUNT(DISTINCT page) AS number_of_pages -FROM response_headers -FULL OUTER JOIN meta_tags -USING (client, page) -WHERE - header_name = 'accept-ch' OR - tag_name = 'accept-ch' -GROUP BY client -ORDER BY pct_pages DESC +FROM all_accept_ch +|> JOIN base_totals USING (client) +|> AGGREGATE + COUNT(DISTINCT all_accept_ch.root_page) AS number_of_websites, + COUNT(DISTINCT all_accept_ch.root_page) / ANY_VALUE(base_totals.total_websites) AS pct_websites +GROUP BY all_accept_ch.client +|> PIVOT( + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_mobile + websites_count_desktop DESC diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index 5f1f15294d1..c8e1abab3b9 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": { "cellView": "form", "id": "U37785Bxt5tE" @@ -78,14 +78,21 @@ "cellView": "form", "id": "45dBifFPJAtO" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✓ Connected to spreadsheet with 26 existing sheets\n" + ] + } + ], "source": [ "# @title Setup BigQuery and Google Sheets clients\n", "import google.auth\n", - "import os\n", "from google.cloud import bigquery\n", "import gspread\n", - "from gspread_dataframe import set_with_dataframe\n", + "\n", "\n", "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n", "\n", @@ -108,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "cellView": "form", "colab": { @@ -118,16 +125,1605 @@ "id": "nblNil985Tjt", "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking for SQL files in: /Users/maxostapenko/Documents/GitHub/almanac.httparchive.org/sql/util/../2025/privacy/*.sql\n", + "Found 26 SQL files\n", + "\n", + "+-----------------------------+---------------+---------------+----------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+=============================+===============+===============+======================+\n", + "| cookies_top_first_party.sql | Processing... | Processing... | Processing... |\n", + "+-----------------------------+---------------+---------------+----------------------+\n", + "+-----------------------------+-------------+---------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+=============================+=============+=========+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+-----------------------------+-------------+---------+--------------------------+\n", + "+-----------------------------+---------------+---------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+=============================+===============+===============+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+-----------------------------+---------------+---------------+--------------------------+\n", + "| cookies_top_third_party.sql | Processing... | Processing... | Processing... |\n", + "+-----------------------------+---------------+---------------+--------------------------+\n", + "+-----------------------------+-------------+---------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+=============================+=============+=========+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+-----------------------------+-------------+---------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+-----------------------------+-------------+---------+--------------------------+\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+================================+===============+===============+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_bounce_domains.sql | Processing... | Processing... | Processing... |\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+================================+=============+=========+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+================================+===============+===============+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_client_hints.sql | Processing... | Processing... | Processing... |\n", + "+--------------------------------+---------------+---------------+--------------------------+\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+================================+=============+=========+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+--------------------------------+-------------+---------+--------------------------+\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+=====================================+===============+===============+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | Processing... | Processing... | Processing... |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+=====================================+=============+=========+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+=====================================+===============+===============+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_cname_domains.sql | Processing... | Processing... | Processing... |\n", + "+-------------------------------------+---------------+---------------+--------------------------+\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+=====================================+=============+=========+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+-------------------------------------+-------------+---------+--------------------------+\n", + "+------------------------------------------+---------------+---------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+==========================================+===============+===============+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+---------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+---------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | Processing... | Processing... | Processing... |\n", + "+------------------------------------------+---------------+---------------+--------------------------+\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/maxostapenko/Documents/GitHub/almanac.httparchive.org/.venv/lib/python3.14/site-packages/google/cloud/bigquery/table.py:1994: UserWarning: BigQuery Storage module not found, fetch data with the REST endpoint instead.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+==========================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+==========================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | Processing... | Processing... | Processing... |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+==========================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+==========================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | Processing... | Processing... | Processing... |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+==========================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+==========================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | Processing... | Processing... | Processing... |\n", + "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+==========================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_third_parties.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+======================================+==========================+\n", + "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "\n", + "✓ Processed 26 queries\n" + ] + } + ], "source": [ "# @title Upload query results to Google Sheets\n", "import glob\n", "import re\n", + "import os\n", + "from gspread_dataframe import set_with_dataframe\n", + "from IPython.display import clear_output\n", "from tabulate import tabulate\n", - "from IPython.display import clear_output, display, HTML\n", + "\n", "\n", "# Query filters and options\n", - "filename_match = '(most_common_client_hints).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match = '(most_common_countries_for_iab_tcf_v2).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = False # @param {type: \"boolean\"}\n", "overwrite_sheets = True # @param {type: \"boolean\"}\n", @@ -159,7 +1755,7 @@ " # Build table from current log plus preview entry if needed\n", " display_log = queries_processed_log if not preview else queries_processed_log + [log_entry]\n", " table = tabulate(display_log, headers=['Query', 'TB Billed', 'Sheet', 'Status/Skip Reason'], tablefmt=\"grid\")\n", - " clear_output(wait=True)\n", + " #clear_output(wait=True)\n", " print(table)\n", "\n", " # Process each SQL file\n", diff --git a/src/requirements.txt b/src/requirements.txt index 3c1a4d110eb..81e28ea1403 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -14,4 +14,4 @@ tabulate==0.9.0 gspread==6.2.1 gspread-dataframe==4.0.0 ipykernel==7.1.0 -db-dtypes==1.5.0 \ No newline at end of file +db-dtypes==1.5.0 From 8768f224f9552714e9fb6b097d244a5f3e551140 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Sun, 11 Jan 2026 21:16:29 +0100 Subject: [PATCH 17/27] Review and apply sql pivots --- ...nce_domains.sql => bounce_domains_top.sql} | 0 ..._client_hints.sql => client_hints_top.sql} | 0 ...lient_hints.sql => client_hints_usage.sql} | 0 ..._party.sql => cookies_first_party_top.sql} | 0 ..._party.sql => cookies_third_party_top.sql} | 0 sql/2025/privacy/dnt_usage.sql | 19 + sql/2025/privacy/fingerprinting_top.sql | 31 + ...iab_tcf_v2.sql => iab_tcf_v2_cmps_top.sql} | 0 ...cf_v2.sql => iab_tcf_v2_countries_top.sql} | 2 +- sql/2025/privacy/iab_usage.sql | 57 + sql/2025/privacy/iab_usp_strings_top.sql | 28 + .../privacy/most_common_cname_domains.sql | 92 - .../most_common_strings_for_iab_usp.sql | 27 - ...ers.sql => number_of_trackers_buckets.sql} | 2 + ..._of_websites_using_each_fingerprinting.sql | 33 - .../privacy/number_of_websites_with_dnt.sql | 34 - .../privacy/number_of_websites_with_gpc.sql | 34 - .../privacy/number_of_websites_with_iab.sql | 93 - ...number_of_websites_with_referrerpolicy.sql | 86 - ...rer_policy.sql => referrer_policy_top.sql} | 51 +- sql/2025/privacy/referrer_policy_usage.sql | 58 + ...ials.sql => related_origin_trials_top.sql} | 0 ...er_technology.sql => technologies_top.sql} | 0 ...tegory.sql => technology_category_top.sql} | 0 ...egories.sql => tracker_categories_top.sql} | 0 ...ckers.sql => whotracksme_trackers_top.sql} | 0 sql/util/bq_to_sheets.ipynb | 1678 +---------------- 27 files changed, 299 insertions(+), 2026 deletions(-) rename sql/2025/privacy/{most_common_bounce_domains.sql => bounce_domains_top.sql} (100%) rename sql/2025/privacy/{most_common_client_hints.sql => client_hints_top.sql} (100%) rename sql/2025/privacy/{number_of_websites_with_client_hints.sql => client_hints_usage.sql} (100%) rename sql/2025/privacy/{cookies_top_first_party.sql => cookies_first_party_top.sql} (100%) rename sql/2025/privacy/{cookies_top_third_party.sql => cookies_third_party_top.sql} (100%) create mode 100644 sql/2025/privacy/dnt_usage.sql create mode 100644 sql/2025/privacy/fingerprinting_top.sql rename sql/2025/privacy/{most_common_cmps_for_iab_tcf_v2.sql => iab_tcf_v2_cmps_top.sql} (100%) rename sql/2025/privacy/{most_common_countries_for_iab_tcf_v2.sql => iab_tcf_v2_countries_top.sql} (93%) create mode 100644 sql/2025/privacy/iab_usage.sql create mode 100644 sql/2025/privacy/iab_usp_strings_top.sql delete mode 100644 sql/2025/privacy/most_common_cname_domains.sql delete mode 100644 sql/2025/privacy/most_common_strings_for_iab_usp.sql rename sql/2025/privacy/{number_of_websites_with_nb_trackers.sql => number_of_trackers_buckets.sql} (99%) delete mode 100644 sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql delete mode 100644 sql/2025/privacy/number_of_websites_with_dnt.sql delete mode 100644 sql/2025/privacy/number_of_websites_with_gpc.sql delete mode 100644 sql/2025/privacy/number_of_websites_with_iab.sql delete mode 100644 sql/2025/privacy/number_of_websites_with_referrerpolicy.sql rename sql/2025/privacy/{most_common_referrer_policy.sql => referrer_policy_top.sql} (50%) create mode 100644 sql/2025/privacy/referrer_policy_usage.sql rename sql/2025/privacy/{number_of_websites_with_related_origin_trials.sql => related_origin_trials_top.sql} (100%) rename sql/2025/privacy/{number_of_websites_per_technology.sql => technologies_top.sql} (100%) rename sql/2025/privacy/{number_of_websites_per_technology_category.sql => technology_category_top.sql} (100%) rename sql/2025/privacy/{most_common_tracker_categories.sql => tracker_categories_top.sql} (100%) rename sql/2025/privacy/{number_of_websites_with_whotracksme_trackers.sql => whotracksme_trackers_top.sql} (100%) diff --git a/sql/2025/privacy/most_common_bounce_domains.sql b/sql/2025/privacy/bounce_domains_top.sql similarity index 100% rename from sql/2025/privacy/most_common_bounce_domains.sql rename to sql/2025/privacy/bounce_domains_top.sql diff --git a/sql/2025/privacy/most_common_client_hints.sql b/sql/2025/privacy/client_hints_top.sql similarity index 100% rename from sql/2025/privacy/most_common_client_hints.sql rename to sql/2025/privacy/client_hints_top.sql diff --git a/sql/2025/privacy/number_of_websites_with_client_hints.sql b/sql/2025/privacy/client_hints_usage.sql similarity index 100% rename from sql/2025/privacy/number_of_websites_with_client_hints.sql rename to sql/2025/privacy/client_hints_usage.sql diff --git a/sql/2025/privacy/cookies_top_first_party.sql b/sql/2025/privacy/cookies_first_party_top.sql similarity index 100% rename from sql/2025/privacy/cookies_top_first_party.sql rename to sql/2025/privacy/cookies_first_party_top.sql diff --git a/sql/2025/privacy/cookies_top_third_party.sql b/sql/2025/privacy/cookies_third_party_top.sql similarity index 100% rename from sql/2025/privacy/cookies_top_third_party.sql rename to sql/2025/privacy/cookies_third_party_top.sql diff --git a/sql/2025/privacy/dnt_usage.sql b/sql/2025/privacy/dnt_usage.sql new file mode 100644 index 00000000000..c713043bf79 --- /dev/null +++ b/sql/2025/privacy/dnt_usage.sql @@ -0,0 +1,19 @@ +-- Pages that use DNT feature + +FROM `httparchive.blink_features.usage` +|> WHERE + date = '2025-07-01' AND + --rank <= 10000 AND + feature = 'NavigatorDoNotTrack' +|> SELECT DISTINCT + client, + rank, + num_urls, + pct_urls +|> PIVOT ( + ANY_VALUE(num_urls) AS pages_count, + ANY_VALUE(pct_urls) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY rank ASC diff --git a/sql/2025/privacy/fingerprinting_top.sql b/sql/2025/privacy/fingerprinting_top.sql new file mode 100644 index 00000000000..5dc696ea56a --- /dev/null +++ b/sql/2025/privacy/fingerprinting_top.sql @@ -0,0 +1,31 @@ +-- noqa: disable=PRS +-- Percent of websites using a fingerprinting library based on wappalyzer category + +WITH base_totals AS ( + SELECT + client, + COUNT(DISTINCT root_page) AS websites_total + FROM httparchive.crawl.pages + WHERE date = '2025-07-01' + GROUP BY client +) + +FROM httparchive.crawl.pages, + UNNEST(technologies) AS technology, + UNNEST(technology.categories) AS category +|> WHERE + date = '2025-07-01' AND + category = 'Browser fingerprinting' +|> AGGREGATE + COUNT(DISTINCT root_page) AS websites_count +GROUP BY client, technology.technology +|> JOIN base_totals USING (client) +|> EXTEND websites_count / websites_total AS websites_pct +|> DROP websites_total +|> PIVOT( + ANY_VALUE(websites_count) AS websites_count, + ANY_VALUE(websites_pct) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME websites_count_mobile AS mobile, websites_count_desktop AS desktop +|> ORDER BY mobile + desktop DESC diff --git a/sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql b/sql/2025/privacy/iab_tcf_v2_cmps_top.sql similarity index 100% rename from sql/2025/privacy/most_common_cmps_for_iab_tcf_v2.sql rename to sql/2025/privacy/iab_tcf_v2_cmps_top.sql diff --git a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql b/sql/2025/privacy/iab_tcf_v2_countries_top.sql similarity index 93% rename from sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql rename to sql/2025/privacy/iab_tcf_v2_countries_top.sql index f1a9afc1775..7874a03e8f5 100644 --- a/sql/2025/privacy/most_common_countries_for_iab_tcf_v2.sql +++ b/sql/2025/privacy/iab_tcf_v2_countries_top.sql @@ -18,7 +18,7 @@ base_data AS ( SELECT client, root_page, - STRING(custom_metrics.privacy.iab_tcf_v2.data.publisherCC) AS publisherCC + UPPER(SAFE.STRING(custom_metrics.privacy.iab_tcf_v2.data.publisherCC)) AS publisherCC FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' AND --rank = 1000 AND diff --git a/sql/2025/privacy/iab_usage.sql b/sql/2025/privacy/iab_usage.sql new file mode 100644 index 00000000000..272384e8d72 --- /dev/null +++ b/sql/2025/privacy/iab_usage.sql @@ -0,0 +1,57 @@ +-- noqa: disable=PRS +-- Counts of websites with IAB Frameworks + +WITH base_data AS ( + SELECT + client, + root_page, + SAFE.BOOL(custom_metrics.privacy.iab_tcf_v1.present) AS tcfv1, + SAFE.BOOL(custom_metrics.privacy.iab_tcf_v2.present) AS tcfv2, + SAFE.BOOL(custom_metrics.privacy.iab_gpp.present) AS gpp, + SAFE.BOOL(custom_metrics.privacy.iab_usp.present) AS usp, + SAFE.BOOL(custom_metrics.privacy.iab_tcf_v1.compliant_setup) AS tcfv1_compliant, + SAFE.BOOL(custom_metrics.privacy.iab_tcf_v2.compliant_setup) AS tcfv2_compliant, + custom_metrics.privacy.iab_gpp.data IS NOT NULL AS gpp_data + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' + --AND rank = 1000 +), + +aggregated AS ( + SELECT + client, + COUNT(DISTINCT root_page) AS total_websites, + COUNT(DISTINCT IF(tcfv1, root_page, NULL)) AS websites_with_tcfv1, + COUNT(DISTINCT IF(tcfv2, root_page, NULL)) AS websites_with_tcfv2, + COUNT(DISTINCT IF(usp, root_page, NULL)) AS websites_with_usp, + COUNT(DISTINCT IF(tcfv1 OR tcfv2, root_page, NULL)) AS websites_with_tcf, + COUNT(DISTINCT IF(tcfv1 OR tcfv2 OR usp OR gpp, root_page, NULL)) AS websites_with_any, + COUNT(DISTINCT IF(tcfv1 AND tcfv1_compliant, root_page, NULL)) AS websites_with_tcfv1_compliant, + COUNT(DISTINCT IF(tcfv2 AND tcfv2_compliant, root_page, NULL)) AS websites_with_tcfv2_compliant, + COUNT(DISTINCT IF(gpp, root_page, NULL)) AS websites_with_gpp, + COUNT(DISTINCT IF(gpp_data, root_page, NULL)) AS websites_with_gpp_data + FROM base_data + GROUP BY client +) + +FROM aggregated, + UNNEST([ + STRUCT('tcfv1' AS metric, websites_with_tcfv1 / total_websites AS pct_websites, websites_with_tcfv1 AS number_of_websites), + STRUCT('tcfv2', websites_with_tcfv2 / total_websites, websites_with_tcfv2), + STRUCT('usp', websites_with_usp / total_websites, websites_with_usp), + STRUCT('tcf', websites_with_tcf / total_websites, websites_with_tcf), + STRUCT('any_framework', websites_with_any / total_websites, websites_with_any), + STRUCT('tcfv1_compliant', websites_with_tcfv1_compliant / total_websites, websites_with_tcfv1_compliant), + STRUCT('tcfv2_compliant', websites_with_tcfv2_compliant / total_websites, websites_with_tcfv2_compliant), + STRUCT('gpp', websites_with_gpp / total_websites, websites_with_gpp), + STRUCT('gpp_data_available', websites_with_gpp_data / total_websites, websites_with_gpp_data) + ]) AS metric +|> SELECT client, metric.metric, metric.pct_websites, metric.number_of_websites +|> PIVOT( + ANY_VALUE(pct_websites) AS pct, + ANY_VALUE(number_of_websites) AS websites_count + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/iab_usp_strings_top.sql b/sql/2025/privacy/iab_usp_strings_top.sql new file mode 100644 index 00000000000..8eb0b3ee93e --- /dev/null +++ b/sql/2025/privacy/iab_usp_strings_top.sql @@ -0,0 +1,28 @@ +-- noqa: disable=PRS +-- Counts of US Privacy String values for websites using IAB US Privacy Framework +-- cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md + +WITH base_totals AS ( + SELECT + client, + COUNT(DISTINCT root_page) AS total_websites + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' + GROUP BY client +) + +FROM `httparchive.crawl.pages` +|> WHERE date = '2025-07-01' +|> EXTEND UPPER(SAFE.STRING(custom_metrics.privacy.iab_usp.privacy_string.uspString)) AS uspString +|> WHERE uspString IS NOT NULL +|> AGGREGATE COUNT(DISTINCT root_page) AS websites_count GROUP BY client, uspString +|> JOIN base_totals USING (client) +|> EXTEND websites_count / total_websites AS pct_websites +|> DROP total_websites +|> PIVOT( + ANY_VALUE(websites_count) AS websites_count, + ANY_VALUE(pct_websites) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/most_common_cname_domains.sql b/sql/2025/privacy/most_common_cname_domains.sql deleted file mode 100644 index 9d9c7a6ff60..00000000000 --- a/sql/2025/privacy/most_common_cname_domains.sql +++ /dev/null @@ -1,92 +0,0 @@ --- Most common CNAME domains - -CREATE TEMP FUNCTION CONVERT_CNAME_JSON(obj JSON) -RETURNS ARRAY> -LANGUAGE js AS """ -try { - const result = []; - for (const key in obj) { - result.push({ - origin: key, - cname: obj[key] - }); - } - return result; -} catch (e) { - return []; -} -"""; - --- Adguard CNAME Trackers source: --- https://github.com/AdguardTeam/cname-trackers/blob/master/script/src/cloaked-trackers.json -WITH adguard_trackers AS ( - SELECT - domain - FROM UNNEST(['cz.affilbox.cz', 'pl02.prolitteris.2cnt.net', 'a8.net', 'mm.actionlink.jp', 'mr-in.com', 'ebis.ne.jp', '0i0i0i0.com', 'ads.bid', 'at-o.net', 'actonservice.com', 'actonsoftware.com', '2o7.net', 'data.adobedc.net', 'sc.adobedc.net', 'sc.omtrdc.net', 'adocean.pl', 'aquaplatform.com', 'cdn18685953.ahacdn.me', 'thirdparty.bnc.lt', 'api.clickaine.com', 'tagcommander.com', 'track.sp.crdl.io', 'dnsdelegation.io', 'storetail.io', 'e.customeriomail.com', 'dataunlocker.com', 'monopoly-drain.ga', 'friendly-community.tk', 'nc0.co', 'customer.etracker.com', 'eulerian.net', 'extole.com', 'extole.io', 'fathomdns.com', 'genieespv.jp', 'ad-cloud.jp', 'goatcounter.com', 'heleric.com', 'iocnt.net', 'affex.org', 'k.keyade.com', 'ghochv3eng.trafficmanager.net', 'online-metrix.net', 'logly.co.jp', 'mailgun.org', 'ab1n.net', 'ntv.io', 'ntvpforever.com', 'postrelease.com', 'non.li', 'tracking.bp01.net', 't.eloqua.com', 'oghub.io', 'go.pardot.com', 'parsely.com', 'custom.plausible.io', 'popcashjs.b-cdn.net', 'rdtk.io', 'sailthru.com', 'exacttarget.com', 'a351fec2c318c11ea9b9b0a0ae18fb0b-1529426863.eu-central-1.elb.amazonaws.com', 'a5e652663674a11e997c60ac8a4ec150-1684524385.eu-central-1.elb.amazonaws.com', 'a88045584548111e997c60ac8a4ec150-1610510072.eu-central-1.elb.amazonaws.com', 'afc4d9aa2a91d11e997c60ac8a4ec150-2082092489.eu-central-1.elb.amazonaws.com', 'e.truedata.co', 'utiq-aws.net', 'webtrekk.net', 'wt-eu02.net', 'ak-is2.net', 'wizaly.com']) AS domain -), - -whotracksme AS ( - SELECT DISTINCT - domain, - category - FROM `httparchive.almanac.whotracksme` - WHERE date = '2025-07-01' -), - -cnames AS ( - SELECT - client, - cnames.cname, - page - --ARRAY_AGG(DISTINCT page LIMIT 2) AS page_examples - FROM `httparchive.crawl.pages`, - UNNEST(CONVERT_CNAME_JSON(custom_metrics.privacy.request_hostnames_with_cname)) AS cnames - WHERE date = '2025-07-01' AND - NET.REG_DOMAIN(cnames.origin) = NET.REG_DOMAIN(page) AND - NET.REG_DOMAIN(cnames.cname) != NET.REG_DOMAIN(page) - GROUP BY - client, - cnames.cname, - page -), - -pages_total AS ( - SELECT - client, - COUNT(DISTINCT page) AS total_pages - FROM `httparchive.crawl.pages` - WHERE date = '2025-07-01' - GROUP BY client -), - -cname_stats AS ( - SELECT - client, - NET.REG_DOMAIN(cname) AS cname, - adguard_trackers.domain IS NOT NULL AS adguard_known_cname, - whotracksme.category AS whotracksme_category, - COUNT(DISTINCT page) AS number_of_pages - --ANY_VALUE(page_examples) AS page_examples - FROM cnames - LEFT JOIN adguard_trackers - ON ENDS_WITH(cnames.cname, adguard_trackers.domain) - LEFT JOIN whotracksme - ON ENDS_WITH(cnames.cname, whotracksme.domain) - GROUP BY - client, - cname, - adguard_known_cname, - whotracksme_category -) - -SELECT - client, - cname, - adguard_known_cname, - whotracksme_category, - number_of_pages, - number_of_pages / total_pages AS pct_pages -FROM cname_stats -LEFT JOIN pages_total -USING (client) -ORDER BY number_of_pages DESC diff --git a/sql/2025/privacy/most_common_strings_for_iab_usp.sql b/sql/2025/privacy/most_common_strings_for_iab_usp.sql deleted file mode 100644 index 837b7ff2375..00000000000 --- a/sql/2025/privacy/most_common_strings_for_iab_usp.sql +++ /dev/null @@ -1,27 +0,0 @@ --- Counts of US Privacy String values for websites using IAB US Privacy Framework --- cf. https://github.com/InteractiveAdvertisingBureau/USPrivacy/blob/master/CCPA/US%20Privacy%20String.md - -WITH usp_data AS ( - SELECT - client, - page, - SAFE.STRING(custom_metrics.privacy.iab_usp.privacy_string.uspString) AS uspString, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS pages_total - FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE -) - -SELECT - client, - uspString, - COUNT(DISTINCT page) / ANY_VALUE(pages_total) AS pct_pages, - COUNT(DISTINCT page) AS number_of_pages -FROM usp_data -GROUP BY - client, - uspString -ORDER BY - pct_pages DESC -LIMIT 100 diff --git a/sql/2025/privacy/number_of_websites_with_nb_trackers.sql b/sql/2025/privacy/number_of_trackers_buckets.sql similarity index 99% rename from sql/2025/privacy/number_of_websites_with_nb_trackers.sql rename to sql/2025/privacy/number_of_trackers_buckets.sql index b54ad6d93cf..1556b106b2f 100644 --- a/sql/2025/privacy/number_of_websites_with_nb_trackers.sql +++ b/sql/2025/privacy/number_of_trackers_buckets.sql @@ -50,7 +50,9 @@ GROUP BY client, number_of_trackers, total_websites + UNION ALL + SELECT client, 'any_tracker' AS type, diff --git a/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql b/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql deleted file mode 100644 index da695ed3be7..00000000000 --- a/sql/2025/privacy/number_of_websites_using_each_fingerprinting.sql +++ /dev/null @@ -1,33 +0,0 @@ --- Percent of websites using a fingerprinting library based on wappalyzer category - -WITH totals AS ( - SELECT - client, - COUNT(DISTINCT page) AS total_websites - FROM httparchive.crawl.pages - WHERE - date = '2025-07-01' - GROUP BY - client -) - -SELECT - client, - technology.technology, - total_websites, - COUNT(DISTINCT page) AS number_of_websites, - COUNT(DISTINCT page) / total_websites AS percent_of_websites -FROM httparchive.crawl.pages -JOIN totals USING (client), - UNNEST(technologies) AS technology, - UNNEST(technology.categories) AS category -WHERE - date = '2025-07-01' AND - category = 'Browser fingerprinting' -GROUP BY - client, - total_websites, - technology -ORDER BY - client, - number_of_websites DESC diff --git a/sql/2025/privacy/number_of_websites_with_dnt.sql b/sql/2025/privacy/number_of_websites_with_dnt.sql deleted file mode 100644 index 9e79e93848c..00000000000 --- a/sql/2025/privacy/number_of_websites_with_dnt.sql +++ /dev/null @@ -1,34 +0,0 @@ --- Pages that request DNT status - -WITH blink AS ( - SELECT DISTINCT - client, - num_urls, - pct_urls - FROM `httparchive.blink_features.usage` - WHERE - date = '2025-07-01' AND - feature IN ('NavigatorDoNotTrack') -), - -pages AS ( - SELECT - client, - COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_doNotTrack), page, NULL)) AS num_urls, - COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_doNotTrack), page, NULL)) / COUNT(DISTINCT page) AS pct_urls - FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE - GROUP BY client -) - -SELECT - COALESCE(blink.client, pages.client) AS client, - blink.num_urls AS number_of_pages_usage_per_blink, - blink.pct_urls AS pct_of_websites_usage_per_blink, - pages.num_urls AS number_of_pages_usage_per_custom_metric, - pages.pct_urls AS pct_of_websites_usage_per_custom_metric -FROM blink -FULL OUTER JOIN pages -ON blink.client = pages.client diff --git a/sql/2025/privacy/number_of_websites_with_gpc.sql b/sql/2025/privacy/number_of_websites_with_gpc.sql deleted file mode 100644 index 667b7bf9f6d..00000000000 --- a/sql/2025/privacy/number_of_websites_with_gpc.sql +++ /dev/null @@ -1,34 +0,0 @@ --- Pages that provide `/.well-known/gpc.json` for Global Privacy Control - -WITH pages AS ( - SELECT - client, - COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/gpc.json`.found), page, NULL)) / COUNT(DISTINCT page) AS pct_pages_well_known, - COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/gpc.json`.found), page, NULL)) AS number_of_pages_well_known, - COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_globalPrivacyControl), page, NULL)) / COUNT(DISTINCT page) AS pct_pages_js_api, - COUNT(DISTINCT IF(SAFE.BOOL(custom_metrics.privacy.navigator_globalPrivacyControl), page, NULL)) AS number_of_pages_js_api - FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE - GROUP BY client -), - -headers AS ( - SELECT - client, - COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) / COUNT(DISTINCT page) AS pct_pages_headers, - COUNT(DISTINCT IF(headers.name = 'sec-gpc' AND headers.value = '1', page, NULL)) AS number_of_pages_headers - FROM `httparchive.crawl.requests`, - UNNEST(response_headers) headers - WHERE - date = '2025-07-01' AND - is_root_page = TRUE AND - is_main_document = TRUE - GROUP BY client -) - -SELECT * -FROM pages -FULL OUTER JOIN headers -USING (client) diff --git a/sql/2025/privacy/number_of_websites_with_iab.sql b/sql/2025/privacy/number_of_websites_with_iab.sql deleted file mode 100644 index 017ca4aac6f..00000000000 --- a/sql/2025/privacy/number_of_websites_with_iab.sql +++ /dev/null @@ -1,93 +0,0 @@ --- Counts of pages with IAB Frameworks --- TODO: check presence of multiple frameworks per page - -WITH privacy_custom_metrics_data AS ( - SELECT - client, - COUNT(0) AS number_of_pages, - COUNTIF(tcfv1) AS number_of_pages_with_tcfv1, - COUNTIF(tcfv2) AS number_of_pages_with_tcfv2, - COUNTIF(usp) AS number_of_pages_with_usp, - COUNTIF(tcfv1 OR tcfv2) AS number_of_pages_with_tcf, - COUNTIF(tcfv1 OR tcfv2 OR usp OR gpp) AS number_of_pages_with_any, - COUNTIF(tcfv1 AND tcfv1_compliant) AS number_of_pages_with_tcfv1_compliant, - COUNTIF(tcfv2 AND tcfv2_compliant) AS number_of_pages_with_tcfv2_compliant, - COUNTIF(gpp) AS number_of_pages_with_gpp, - COUNTIF(gpp_data) AS number_of_pages_with_gpp_data - FROM ( - SELECT - client, - SAFE.BOOL(metrics.iab_tcf_v1.present) AS tcfv1, - SAFE.BOOL(metrics.iab_tcf_v2.present) AS tcfv2, - SAFE.BOOL(metrics.iab_gpp.present) AS gpp, - SAFE.BOOL(metrics.iab_usp.present) AS usp, - SAFE.BOOL(metrics.iab_tcf_v1.compliant_setup) AS tcfv1_compliant, - SAFE.BOOL(metrics.iab_tcf_v2.compliant_setup) AS tcfv2_compliant, - metrics.iab_gpp.data IS NOT NULL AS gpp_data - FROM ( - SELECT - client, - custom_metrics.privacy AS metrics - FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE - ) - ) - GROUP BY client -) - -SELECT - client, - metric.metric, - metric.pct_pages, - metric.number_of_pages -FROM ( - SELECT - client, - ARRAY>[STRUCT( - 'tcfv1', - number_of_pages_with_tcfv1 / number_of_pages, - number_of_pages_with_tcfv1 - ), STRUCT( - 'tcfv2', - number_of_pages_with_tcfv2 / number_of_pages, - number_of_pages_with_tcfv2 - ), STRUCT( - 'usp', - number_of_pages_with_usp / number_of_pages, - number_of_pages_with_usp - ), STRUCT( - 'tcf', - number_of_pages_with_tcf / number_of_pages, - number_of_pages_with_tcf - ), STRUCT( - 'any_framework', - number_of_pages_with_any / number_of_pages, - number_of_pages_with_any - ), STRUCT( - 'tcfv1_compliant', - number_of_pages_with_tcfv1_compliant / number_of_pages, - number_of_pages_with_tcfv1_compliant - ), STRUCT( - 'tcfv2_compliant', - number_of_pages_with_tcfv2_compliant / number_of_pages, - number_of_pages_with_tcfv2_compliant - ), STRUCT( - 'gpp', - number_of_pages_with_gpp / number_of_pages, - number_of_pages_with_gpp - ), STRUCT( - 'gpp_data_available', - number_of_pages_with_gpp_data / number_of_pages, - number_of_pages_with_gpp_data - )] AS metrics - FROM privacy_custom_metrics_data -), - UNNEST(metrics) AS metric -ORDER BY - client; diff --git a/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql b/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql deleted file mode 100644 index 92fb30a988d..00000000000 --- a/sql/2025/privacy/number_of_websites_with_referrerpolicy.sql +++ /dev/null @@ -1,86 +0,0 @@ -WITH referrer_policy_custom_metrics AS ( - SELECT - client, - page, - SAFE.STRING(custom_metrics.privacy.referrerPolicy.entire_document_policy) AS meta_policy, - ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics.privacy.referrerPolicy.individual_requests)) > 0 AS individual_requests, - SAFE.INT64(custom_metrics.privacy.referrerPolicy.link_relations.A) > 0 AS link_relations - FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE -), - -referrer_policy_headers AS ( - SELECT - client, - page, - LOWER(response_header.value) AS header_policy - FROM `httparchive.crawl.requests`, - UNNEST(response_headers) AS response_header - WHERE - date = '2025-07-01' AND - is_root_page = TRUE AND - is_main_document = TRUE AND - response_header.name = 'referrer-policy' -) - -SELECT - client, - number_of_pages_with_entire_document_policy_meta / number_of_pages AS pct_pages_with_entire_document_policy_meta, - number_of_pages_with_entire_document_policy_meta, - number_of_pages_with_entire_document_policy_header / number_of_pages AS pct_pages_with_entire_document_policy_header, - number_of_pages_with_entire_document_policy_header, - number_of_pages_with_entire_document_policy / number_of_pages AS pct_pages_with_entire_document_policy, - number_of_pages_with_entire_document_policy, - number_of_pages_with_any_individual_requests / number_of_pages AS pct_pages_with_any_individual_requests, - number_of_pages_with_any_individual_requests, - number_of_pages_with_any_link_relations / number_of_pages AS pct_pages_with_any_link_relations, - number_of_pages_with_any_link_relations, - number_of_pages_with_any_referrer_policy / number_of_pages AS pct_pages_with_any_referrer_policy, - number_of_pages_with_any_referrer_policy -FROM ( - SELECT - client, - COUNT(DISTINCT page) AS number_of_pages, - COUNT(DISTINCT IF( - meta_policy IS NOT NULL, - page, NULL - )) AS number_of_pages_with_entire_document_policy_meta, - COUNT(DISTINCT IF( - header_policy IS NOT NULL, - page, NULL - )) AS number_of_pages_with_entire_document_policy_header, - COUNT( - DISTINCT IF( - meta_policy IS NOT NULL OR - header_policy IS NOT NULL, - page, NULL - ) - ) AS number_of_pages_with_entire_document_policy, - COUNT(DISTINCT IF( - individual_requests, - page, NULL - )) AS number_of_pages_with_any_individual_requests, - COUNT(DISTINCT IF( - link_relations, - page, NULL - )) AS number_of_pages_with_any_link_relations, - COUNT( - DISTINCT IF( - meta_policy IS NOT NULL OR - header_policy IS NOT NULL OR - individual_requests OR - link_relations, - page, NULL - ) - ) AS number_of_pages_with_any_referrer_policy - FROM - referrer_policy_custom_metrics - FULL OUTER JOIN - referrer_policy_headers - USING (client, page) - GROUP BY client -) -ORDER BY - client diff --git a/sql/2025/privacy/most_common_referrer_policy.sql b/sql/2025/privacy/referrer_policy_top.sql similarity index 50% rename from sql/2025/privacy/most_common_referrer_policy.sql rename to sql/2025/privacy/referrer_policy_top.sql index eed79b736e3..329c790db43 100644 --- a/sql/2025/privacy/most_common_referrer_policy.sql +++ b/sql/2025/privacy/referrer_policy_top.sql @@ -1,32 +1,29 @@ +-- noqa: disable=PRS -- Most common values for Referrer-Policy (at site level) -WITH totals AS ( +WITH base_totals AS ( SELECT client, - COUNT(DISTINCT page) AS total_pages + COUNT(DISTINCT root_page) AS total_websites FROM `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE + WHERE date = '2025-07-01' --AND rank = 1000 GROUP BY client ), referrer_policy_custom_metrics AS ( SELECT client, - page, + root_page, LOWER(TRIM(policy_meta)) AS policy_meta FROM `httparchive.crawl.pages`, UNNEST(SPLIT(SAFE.STRING(custom_metrics.privacy.referrerPolicy.entire_document_policy), ',')) AS policy_meta - WHERE - date = '2025-07-01' AND - is_root_page = TRUE + WHERE date = '2025-07-01' --AND rank = 1000 ), response_headers AS ( SELECT client, - page, + root_page, LOWER(response_header.name) AS name, LOWER(response_header.value) AS value FROM `httparchive.crawl.requests`, @@ -34,32 +31,30 @@ response_headers AS ( WHERE date = '2025-07-01' AND is_main_document = TRUE + --AND rank = 1000 ), referrer_policy_headers AS ( SELECT client, - page, + root_page, TRIM(policy_header) AS policy_header FROM response_headers, UNNEST(SPLIT(value, ',')) AS policy_header - WHERE - name = 'referrer-policy' + WHERE name = 'referrer-policy' ) -SELECT - client, - COALESCE(policy_header, policy_meta) AS policy, - COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages, - COUNT(DISTINCT page) AS number_of_pages FROM referrer_policy_custom_metrics -FULL OUTER JOIN referrer_policy_headers -USING (client, page) -JOIN totals -USING (client) -GROUP BY - client, - policy -ORDER BY - pct_pages DESC -LIMIT 100 +|> FULL OUTER JOIN referrer_policy_headers USING (client, root_page) +|> EXTEND COALESCE(policy_header, policy_meta) AS policy +|> AGGREGATE COUNT(DISTINCT root_page) AS number_of_websites GROUP BY client, policy +|> JOIN base_totals USING (client) +|> EXTEND number_of_websites / total_websites AS pct_websites +|> DROP total_websites +|> PIVOT( + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/referrer_policy_usage.sql b/sql/2025/privacy/referrer_policy_usage.sql new file mode 100644 index 00000000000..aa296a9217a --- /dev/null +++ b/sql/2025/privacy/referrer_policy_usage.sql @@ -0,0 +1,58 @@ +-- noqa: disable=PRS + +WITH referrer_policy_custom_metrics AS ( + SELECT + client, + root_page, + SAFE.STRING(custom_metrics.privacy.referrerPolicy.entire_document_policy) AS meta_policy, + ARRAY_LENGTH(JSON_QUERY_ARRAY(custom_metrics.privacy.referrerPolicy.individual_requests)) > 0 AS individual_requests, + SAFE.INT64(custom_metrics.privacy.referrerPolicy.link_relations.A) > 0 AS link_relations + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' +), + +referrer_policy_headers AS ( + SELECT + client, + root_page, + LOWER(response_header.value) AS header_policy + FROM `httparchive.crawl.requests`, + UNNEST(response_headers) AS response_header + WHERE + date = '2025-07-01' AND + is_main_document = TRUE AND + response_header.name = 'referrer-policy' +), + +aggregated AS ( + SELECT + client, + COUNT(DISTINCT root_page) AS total_websites, + COUNT(DISTINCT IF(meta_policy IS NOT NULL, root_page, NULL)) / COUNT(DISTINCT root_page) AS pct_entire_document_policy_meta, + COUNT(DISTINCT IF(header_policy IS NOT NULL, root_page, NULL)) / COUNT(DISTINCT root_page) AS pct_entire_document_policy_header, + COUNT(DISTINCT IF(meta_policy IS NOT NULL OR header_policy IS NOT NULL, root_page, NULL)) / COUNT(DISTINCT root_page) AS pct_entire_document_policy, + COUNT(DISTINCT IF(individual_requests, root_page, NULL)) / COUNT(DISTINCT root_page) AS pct_any_individual_requests, + COUNT(DISTINCT IF(link_relations, root_page, NULL)) / COUNT(DISTINCT root_page) AS pct_any_link_relations, + COUNT(DISTINCT IF(meta_policy IS NOT NULL OR header_policy IS NOT NULL OR individual_requests OR link_relations, root_page, NULL)) / COUNT(DISTINCT root_page) AS pct_any_referrer_policy + FROM referrer_policy_custom_metrics + FULL OUTER JOIN referrer_policy_headers + USING (client, root_page) + GROUP BY client +) + +FROM aggregated, + UNNEST([ + STRUCT('entire_document_policy_meta' AS metric, pct_entire_document_policy_meta AS pct), + STRUCT('entire_document_policy_header', pct_entire_document_policy_header), + STRUCT('entire_document_policy', pct_entire_document_policy), + STRUCT('any_individual_requests', pct_any_individual_requests), + STRUCT('any_link_relations', pct_any_link_relations), + STRUCT('any_referrer_policy', pct_any_referrer_policy) + ]) AS metric_data +|> SELECT client, metric_data.metric, metric_data.pct +|> PIVOT( + ANY_VALUE(pct) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY mobile + desktop DESC diff --git a/sql/2025/privacy/number_of_websites_with_related_origin_trials.sql b/sql/2025/privacy/related_origin_trials_top.sql similarity index 100% rename from sql/2025/privacy/number_of_websites_with_related_origin_trials.sql rename to sql/2025/privacy/related_origin_trials_top.sql diff --git a/sql/2025/privacy/number_of_websites_per_technology.sql b/sql/2025/privacy/technologies_top.sql similarity index 100% rename from sql/2025/privacy/number_of_websites_per_technology.sql rename to sql/2025/privacy/technologies_top.sql diff --git a/sql/2025/privacy/number_of_websites_per_technology_category.sql b/sql/2025/privacy/technology_category_top.sql similarity index 100% rename from sql/2025/privacy/number_of_websites_per_technology_category.sql rename to sql/2025/privacy/technology_category_top.sql diff --git a/sql/2025/privacy/most_common_tracker_categories.sql b/sql/2025/privacy/tracker_categories_top.sql similarity index 100% rename from sql/2025/privacy/most_common_tracker_categories.sql rename to sql/2025/privacy/tracker_categories_top.sql diff --git a/sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql b/sql/2025/privacy/whotracksme_trackers_top.sql similarity index 100% rename from sql/2025/privacy/number_of_websites_with_whotracksme_trackers.sql rename to sql/2025/privacy/whotracksme_trackers_top.sql diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index c8e1abab3b9..16c466f84ca 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": { "cellView": "form", "id": "U37785Bxt5tE" @@ -17,10 +17,16 @@ "outputs": [], "source": [ "# @title Configuration\n", + "import os\n", + "import sys\n", + "\n", "GCP_PROJECT = 'httparchive' #@param {type: \"string\"}\n", "almanac_year = 2025 #@param {type: \"integer\"}\n", "chapter_name = 'privacy' #@param {type: \"string\"}\n", - "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}" + "spreadsheet_url = 'https://docs.google.com/spreadsheets/d/1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4/edit' #@param {type: \"string\", placeholder:\"Enter spreadsheet URL\"}\n", + "\n", + "IN_COLAB = 'google.colab' in sys.modules\n", + "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT" ] }, { @@ -36,9 +42,10 @@ }, "outputs": [], "source": [ - "# @title Download repo (Colab only - skip when running locally)\n", - "!git clone https://github.com/HTTPArchive/almanac.httparchive.org.git\n", - "!cd almanac.httparchive.org/" + "# @title Download repo\n", + "if IN_COLAB:\n", + " !git clone https://github.com/HTTPArchive/almanac.httparchive.org.git\n", + " !cd almanac.httparchive.org/" ] }, { @@ -54,10 +61,11 @@ }, "outputs": [], "source": [ - "# @title Update chapter branch (Colab only - skip when running locally)\n", - "branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n", - "print(f\"Switching to branch: {branch_name}\")\n", - "!git checkout $branch_name && git pull" + "# @title Update chapter branch\n", + "if IN_COLAB:\n", + " branch_name = f'{chapter_name.lower()}-sql-{almanac_year}'\n", + " print(f\"Switching to branch: {branch_name}\")\n", + " !git checkout $branch_name && git pull" ] }, { @@ -66,14 +74,15 @@ "metadata": {}, "outputs": [], "source": [ - "# @title Authenticate (Colab only - skip when running locally)\n", - "from google.colab import auth\n", - "auth.authenticate_user()" + "# @title Authenticate in Google Colab\n", + "if IN_COLAB:\n", + " from google.colab import auth\n", + " auth.authenticate_user()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "cellView": "form", "id": "45dBifFPJAtO" @@ -83,7 +92,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "✓ Connected to spreadsheet with 26 existing sheets\n" + "✓ Connected to spreadsheet with 25 existing sheets\n" ] } ], @@ -94,8 +103,6 @@ "import gspread\n", "\n", "\n", - "os.environ[\"GOOGLE_CLOUD_PROJECT\"] = GCP_PROJECT\n", - "\n", "# Authenticate with required scopes for BigQuery and Google Sheets\n", "SCOPES = [\n", " 'https://www.googleapis.com/auth/spreadsheets',\n", @@ -115,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": { "cellView": "form", "colab": { @@ -130,1585 +137,61 @@ "name": "stdout", "output_type": "stream", "text": [ - "Looking for SQL files in: /Users/maxostapenko/Documents/GitHub/almanac.httparchive.org/sql/util/../2025/privacy/*.sql\n", - "Found 26 SQL files\n", - "\n", - "+-----------------------------+---------------+---------------+----------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+=============================+===============+===============+======================+\n", - "| cookies_top_first_party.sql | Processing... | Processing... | Processing... |\n", - "+-----------------------------+---------------+---------------+----------------------+\n", - "+-----------------------------+-------------+---------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+=============================+=============+=========+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+-----------------------------+-------------+---------+--------------------------+\n", - "+-----------------------------+---------------+---------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+=============================+===============+===============+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+-----------------------------+---------------+---------------+--------------------------+\n", - "| cookies_top_third_party.sql | Processing... | Processing... | Processing... |\n", - "+-----------------------------+---------------+---------------+--------------------------+\n", - "+-----------------------------+-------------+---------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+=============================+=============+=========+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+-----------------------------+-------------+---------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+-----------------------------+-------------+---------+--------------------------+\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+================================+===============+===============+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_bounce_domains.sql | Processing... | Processing... | Processing... |\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+================================+=============+=========+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+================================+===============+===============+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_client_hints.sql | Processing... | Processing... | Processing... |\n", - "+--------------------------------+---------------+---------------+--------------------------+\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+================================+=============+=========+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+--------------------------------+-------------+---------+--------------------------+\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+=====================================+===============+===============+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | Processing... | Processing... | Processing... |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+=====================================+=============+=========+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+=====================================+===============+===============+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_cname_domains.sql | Processing... | Processing... | Processing... |\n", - "+-------------------------------------+---------------+---------------+--------------------------+\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+=====================================+=============+=========+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+-------------------------------------+-------------+---------+--------------------------+\n", - "+------------------------------------------+---------------+---------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+==========================================+===============+===============+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+---------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+---------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | Processing... | Processing... | Processing... |\n", - "+------------------------------------------+---------------+---------------+--------------------------+\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/maxostapenko/Documents/GitHub/almanac.httparchive.org/.venv/lib/python3.14/site-packages/google/cloud/bigquery/table.py:1994: UserWarning: BigQuery Storage module not found, fetch data with the REST endpoint instead.\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+==========================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+==========================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | Processing... | Processing... | Processing... |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+==========================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+==========================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | Processing... | Processing... | Processing... |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+==========================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+==========================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | Processing... | Processing... | Processing... |\n", - "+------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+==========================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.020 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_third_parties.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+--------------------------------------+--------------------------+\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+======================================+==========================+\n", - "| cookies_top_first_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| cookies_top_third_party.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_bounce_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cmps_for_iab_tcf_v2.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_cname_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_countries_for_iab_tcf_v2.sql | 0.02 | Most Common Countries For Iab Tcf V2 | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_referrer_policy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_strings_for_iab_usp.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| most_common_tracker_categories.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_per_technology_category.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_using_each_fingerprinting.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_client_hints.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_dnt.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_gpc.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_iab.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_nb_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_referrerpolicy.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_related_origin_trials.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| number_of_websites_with_whotracksme_trackers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+--------------------------------------+--------------------------+\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+=============+=======================+==========================+\n", + "| _cname_domains_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| bounce_domains_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| client_hints_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| client_hints_usage.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| cookies_first_party_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| cookies_third_party_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| dnt_usage.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| fingerprinting_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| iab_tcf_v2_cmps_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| iab_tcf_v2_countries_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| iab_usage.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| iab_usp_strings_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| number_of_trackers_buckets.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| referrer_policy_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| referrer_policy_usage.sql | 0 | Referrer Policy Usage | ✓ Uploaded |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| related_origin_trials_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| technologies_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| technology_category_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| tracker_categories_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", + "| whotracksme_trackers_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", "\n", - "✓ Processed 26 queries\n" + "✓ Processed 25 queries\n" ] } ], @@ -1716,14 +199,13 @@ "# @title Upload query results to Google Sheets\n", "import glob\n", "import re\n", - "import os\n", "from gspread_dataframe import set_with_dataframe\n", "from IPython.display import clear_output\n", "from tabulate import tabulate\n", "\n", "\n", "# Query filters and options\n", - "filename_match = '(most_common_countries_for_iab_tcf_v2).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match = '(referrer_policy_usage).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = False # @param {type: \"boolean\"}\n", "overwrite_sheets = True # @param {type: \"boolean\"}\n", @@ -1755,7 +237,7 @@ " # Build table from current log plus preview entry if needed\n", " display_log = queries_processed_log if not preview else queries_processed_log + [log_entry]\n", " table = tabulate(display_log, headers=['Query', 'TB Billed', 'Sheet', 'Status/Skip Reason'], tablefmt=\"grid\")\n", - " #clear_output(wait=True)\n", + " clear_output(wait=True)\n", " print(table)\n", "\n", " # Process each SQL file\n", From 951a7d2fbe6d1c10096dc5e0b99a4f5022f82657 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 12 Jan 2026 00:33:31 +0100 Subject: [PATCH 18/27] Remove deprecated SQL scripts, and add new scripts for tracker distribution and technologies analysis --- ...stered_by_third_parties_and_publishers.sql | 82 ----------- ...er_of_privacy_sandbox_attested_domains.sql | 43 ------ .../privacy/number_of_trackers_buckets.sql | 99 ------------- ...doption-by-third-parties-by-publishers.sql | 138 ------------------ .../privacy/related_origin_trials_top.sql | 68 +++++---- sql/2025/privacy/technologies_top.sql | 34 ----- sql/2025/privacy/technology_category_top.sql | 22 --- ...inations_registered_by_most_publishers.sql | 71 --------- ...tions_registered_by_most_third_parties.sql | 71 --------- sql/2025/privacy/tracker_categories_top.sql | 85 ++++------- sql/2025/privacy/tracker_distribution.sql | 54 +++++++ sql/2025/privacy/tracker_technologies_top.sql | 33 +++++ .../privacy/whotracksme_categories_top.sql | 66 +++++++++ sql/2025/privacy/whotracksme_trackers_top.sql | 66 ++++----- sql/util/bq_to_sheets.ipynb | 116 +++++++-------- 15 files changed, 299 insertions(+), 749 deletions(-) delete mode 100644 sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql delete mode 100644 sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql delete mode 100644 sql/2025/privacy/number_of_trackers_buckets.sql delete mode 100644 sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql delete mode 100644 sql/2025/privacy/technologies_top.sql delete mode 100644 sql/2025/privacy/technology_category_top.sql delete mode 100644 sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql delete mode 100644 sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql create mode 100644 sql/2025/privacy/tracker_distribution.sql create mode 100644 sql/2025/privacy/tracker_technologies_top.sql create mode 100644 sql/2025/privacy/whotracksme_categories_top.sql diff --git a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql b/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql deleted file mode 100644 index 4b692ee59bd..00000000000 --- a/sql/2025/privacy/number_of_ara_destinations_registered_by_third_parties_and_publishers.sql +++ /dev/null @@ -1,82 +0,0 @@ --- Number of Attribution Reporting API Destinations (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) - --- Extracting ARA API source registration details being passed by a given third-party (passed AS "key") -CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) -RETURNS ARRAY -LANGUAGE js AS """ - if (!jsonObject) { - return []; - } - const values = jsonObject[key] || []; - const result = []; - - values.forEach(value => { - if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) { - const parts = value.replace('attribution-reporting-register-source|', '').split('|'); - parts.forEach(part => { - if (part.startsWith('destination=')) { - const destinations = part.replace('destination=', '').split(','); - destinations.forEach(destination => { - result.push('destination=' + destination.trim()); - }); - } else { - result.push(part.trim()); - } - }); - } - }); - - return result; -"""; - -WITH ara_features AS ( - SELECT - client, - CASE - WHEN rank <= 1000 THEN '1000' - WHEN rank <= 10000 THEN '10000' - WHEN rank <= 100000 THEN '100000' - WHEN rank <= 1000000 THEN '1000000' - WHEN rank <= 10000000 THEN '10000000' - ELSE 'Other' - END AS rank_group, - NET.REG_DOMAIN(page) AS publisher, - CASE - WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', '')) - ELSE NULL - END AS destination, - third_party_domain - FROM `httparchive.crawl.pages`, - UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain, - UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara - WHERE - date = '2025-07-01' AND - is_root_page = TRUE AND - ara LIKE 'destination%' -) - -SELECT - client, - rank_group, - COUNT(destination) AS total_destinations, - COUNT(DISTINCT destination) AS distinct_destinations, - ROUND(COUNT(DISTINCT destination) * 100 / COUNT(destination), 2) AS destination_pct, - COUNT(third_party_domain) AS total_third_party_domains, - COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains, - ROUND(COUNT(DISTINCT third_party_domain) * 100 / COUNT(third_party_domain), 2) AS third_party_domain_pct, - COUNT(publisher) AS total_publishers, - COUNT(DISTINCT publisher) AS distinct_publishers, - ROUND(COUNT(DISTINCT publisher) * 100 / COUNT(publisher), 2) AS publisher_pct -FROM ara_features -WHERE destination IS NOT NULL AND third_party_domain IS NOT NULL -GROUP BY client, rank_group -ORDER BY - client, - CASE rank_group - WHEN '1000' THEN 1 - WHEN '10000' THEN 2 - WHEN '100000' THEN 3 - WHEN '1000000' THEN 4 - WHEN '10000000' THEN 5 - ELSE 6 - END; diff --git a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql b/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql deleted file mode 100644 index a2f98701bd4..00000000000 --- a/sql/2025/privacy/number_of_privacy_sandbox_attested_domains.sql +++ /dev/null @@ -1,43 +0,0 @@ --- Privacy Sandbox Attestation and Related Websites JSON status (i.e., advertisers) registered, registering third-parties, and registering publishers (at site level) - -WITH wellknown AS ( - SELECT - client, - NET.HOST(page) AS host, - CASE - WHEN rank <= 1000 THEN '1000' - WHEN rank <= 10000 THEN '10000' - WHEN rank <= 100000 THEN '100000' - WHEN rank <= 1000000 THEN '1000000' - WHEN rank <= 10000000 THEN '10000000' - ELSE 'Other' - END AS rank_group, - SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/related-website-set.json`.found) AS rws, - SAFE.BOOL(custom_metrics.other.`well-known`.`/.well-known/privacy-sandbox-attestations.json`.found) AS attestation - FROM - `httparchive.crawl.pages` - WHERE - date = '2025-07-01' AND - is_root_page = TRUE -) - -SELECT - client, - rank_group, - SUM(CASE WHEN rws THEN 1 ELSE 0 END) AS related_websites_set, - SUM(CASE WHEN attestation THEN 1 ELSE 0 END) AS privacy_sandbox_attestation -FROM - wellknown -WHERE - rws OR attestation -GROUP BY client, rank_group -ORDER BY - client, - CASE rank_group - WHEN '1000' THEN 1 - WHEN '10000' THEN 2 - WHEN '100000' THEN 3 - WHEN '1000000' THEN 4 - WHEN '10000000' THEN 5 - ELSE 6 - END; diff --git a/sql/2025/privacy/number_of_trackers_buckets.sql b/sql/2025/privacy/number_of_trackers_buckets.sql deleted file mode 100644 index 1556b106b2f..00000000000 --- a/sql/2025/privacy/number_of_trackers_buckets.sql +++ /dev/null @@ -1,99 +0,0 @@ --- Number of websites that deploy a certain number of trackers - -WITH whotracksme AS ( - SELECT - domain, - category, - tracker - FROM `httparchive.almanac.whotracksme` - WHERE date = '2025-07-01' -), - -totals AS ( - SELECT - client, - COUNT(DISTINCT page) AS total_websites - FROM `httparchive.crawl.requests` - WHERE date = '2025-07-01' - GROUP BY client -) - -SELECT - client, - 'any' AS type, - number_of_trackers, - COUNT(DISTINCT page) AS number_of_websites, - total_websites, - COUNT(DISTINCT page) / total_websites AS pct_websites -FROM ( - SELECT - client, - page, - COUNT(DISTINCT tracker) AS number_of_trackers - FROM `httparchive.crawl.requests` - JOIN whotracksme - ON ( - NET.HOST(url) = domain OR - ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) - ) - WHERE - date = '2025-07-01' AND - NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) - GROUP BY - client, - page -) -JOIN - totals -USING (client) -GROUP BY - client, - number_of_trackers, - total_websites - -UNION ALL - -SELECT - client, - 'any_tracker' AS type, - number_of_trackers, - COUNT(DISTINCT page) AS number_of_websites, - total_websites, - COUNT(DISTINCT page) / total_websites AS pct_websites -FROM ( - SELECT - client, - page, - COUNT(DISTINCT tracker) AS number_of_trackers - FROM `httparchive.crawl.requests` - JOIN - whotracksme - ON ( - NET.HOST(url) = domain OR - ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) - ) - WHERE - date = '2025-07-01' AND - NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) AND -- third party - ( - -- categories selected from https://whotracks.me/blog/tracker_categories.html - whotracksme.category = 'advertising' OR - whotracksme.category = 'pornvertising' OR - whotracksme.category = 'site_analytics' OR - whotracksme.category = 'social_media' - ) - GROUP BY - client, - page -) -JOIN - totals -USING (client) -GROUP BY - client, - number_of_trackers, - total_websites -ORDER BY - client, - type, - number_of_trackers diff --git a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql b/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql deleted file mode 100644 index df1c63cb830..00000000000 --- a/sql/2025/privacy/privacy-sandbox-adoption-by-third-parties-by-publishers.sql +++ /dev/null @@ -1,138 +0,0 @@ --- Adoption of different Privacy Sandbox (PS) features by different third-parties and by different publishers - --- Extracting PS APIs being called by a given third-party (passed as "key") -CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) -RETURNS ARRAY -LANGUAGE js AS """ - if (!jsonObject) { - return []; - } - const values = jsonObject[key] || []; - - function splitByDelimiters(value) { - const delimiterRegex = new RegExp(',|, |\\n|\\u0000', 'g'); - return value.split(delimiterRegex).map(v => v.trim()).filter(v => v); - } - - const result = []; - const replacements = { - 'Ch': 'CH', 'Ua': 'UA', 'Wow64': 'WoW64', 'Dpr': 'DPR', 'Rtt': 'RTT', 'Ect': 'ECT', 'Etc': 'ETC', '-Architecture': '-Arch', '-Arc': '-Arch', '-Archh': '-Arch', - '-Factors': '-Factor', '-ETC': '-ECT', '-Modal': '-Model', '-UA-UA': '-UA', '-UAm': '-UA', 'UAmodel': 'UA-Model', 'UAplatform': 'UA-Platform', 'Secch-UA': 'Sec-CH-UA', - 'CH-Width': 'CH-Viewport-Width', '-UAodel': '-UA-Model', '-Platformua-Platform': '-Platform', '-Platformuser-Agent': '-Platform', '-Version"': '-Version' - }; - values.forEach(value => { - if (value.startsWith('accept-ch|')) { - const parts = splitByDelimiters(value.replace('accept-ch|', '')); - parts.forEach(part => { - if (["UA", "Arch", "Bitness", "Full-Version-List", "Mobile", "Model", "Platform", "Platform-Version", "WoW64"].includes(part)) { - result.push("Sec-CH-UA-" + part); - } else { - let formattedPart = part.split('-').map(segment => - segment.charAt(0).toUpperCase() + segment.slice(1).toLowerCase() - ).join('-'); - for (const [key, value] of Object.entries(replacements)) { - formattedPart = formattedPart.replace(new RegExp(key, 'g'), value); - } - result.push(formattedPart); - } - }); - } else { - result.push(value); - } - }); - - return result; -"""; - -WITH privacy_sandbox_features AS ( - SELECT - client, - CASE - WHEN rank <= 1000 THEN '1000' - WHEN rank <= 10000 THEN '10000' - WHEN rank <= 100000 THEN '100000' - WHEN rank <= 1000000 THEN '1000000' - WHEN rank <= 10000000 THEN '10000000' - ELSE 'Other' - END AS rank_group, - NET.REG_DOMAIN(page) AS publisher, - third_party_domain, - CASE - WHEN api LIKE '%opics%|%' - THEN - REPLACE(SUBSTR(api, 0, STRPOS(api, '|') - 1) || '-' || SPLIT(api, '|')[SAFE_OFFSET(1)], '|', '-') - WHEN api LIKE 'attribution-reporting-register-source%' - THEN - SPLIT(api, '|')[OFFSET(0)] - ELSE - api - END AS feature - FROM `httparchive.crawl.pages`, - UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain, - UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS api - WHERE - date = '2025-07-01' AND - is_root_page = TRUE -), - -grouped_features AS ( - SELECT - rank_group, - feature, - COUNT(DISTINCT publisher) AS publisher_count, - COUNT(DISTINCT third_party_domain) AS third_party_count - FROM privacy_sandbox_features - GROUP BY rank_group, feature -), - -aggregated_features AS ( - SELECT - feature, - SUM(CASE WHEN rank_group = '1000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_1000, - SUM(CASE WHEN rank_group = '1000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_1000, - SUM(CASE WHEN rank_group = '1000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_1000, - SUM(CASE WHEN rank_group = '1000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_1000, - SUM(CASE WHEN rank_group = '10000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_10000, - SUM(CASE WHEN rank_group = '10000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_10000, - SUM(CASE WHEN rank_group = '10000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_10000, - SUM(CASE WHEN rank_group = '10000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_10000, - SUM(CASE WHEN rank_group = '100000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_100000, - SUM(CASE WHEN rank_group = '100000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_100000, - SUM(CASE WHEN rank_group = '100000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_100000, - SUM(CASE WHEN rank_group = '100000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_100000, - SUM(CASE WHEN rank_group = '1000000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_1000000, - SUM(CASE WHEN rank_group = '1000000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_1000000, - SUM(CASE WHEN rank_group = '1000000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_1000000, - SUM(CASE WHEN rank_group = '1000000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_1000000, - SUM(CASE WHEN rank_group = '10000000' THEN publisher_count ELSE 0 END) AS total_publisher_leq_10000000, - SUM(CASE WHEN rank_group = '10000000' THEN publisher_count ELSE 0 END) AS distinct_publisher_leq_10000000, - SUM(CASE WHEN rank_group = '10000000' THEN third_party_count ELSE 0 END) AS total_third_parties_leq_10000000, - SUM(CASE WHEN rank_group = '10000000' THEN third_party_count ELSE 0 END) AS distinct_third_parties_leq_10000000 - FROM grouped_features - GROUP BY feature -) - -SELECT - feature AS privacy_sandbox_features, - total_publisher_leq_1000, - distinct_publisher_leq_1000, - total_third_parties_leq_1000, - distinct_third_parties_leq_1000, - total_publisher_leq_10000, - distinct_publisher_leq_10000, - total_third_parties_leq_10000, - distinct_third_parties_leq_10000, - total_publisher_leq_100000, - distinct_publisher_leq_100000, - total_third_parties_leq_100000, - distinct_third_parties_leq_100000, - total_publisher_leq_1000000, - distinct_publisher_leq_1000000, - total_third_parties_leq_1000000, - distinct_third_parties_leq_1000000, - total_publisher_leq_10000000, - distinct_publisher_leq_10000000, - total_third_parties_leq_10000000, - distinct_third_parties_leq_10000000 -FROM aggregated_features -ORDER BY feature; diff --git a/sql/2025/privacy/related_origin_trials_top.sql b/sql/2025/privacy/related_origin_trials_top.sql index a667110aab9..be07979ce90 100644 --- a/sql/2025/privacy/related_origin_trials_top.sql +++ b/sql/2025/privacy/related_origin_trials_top.sql @@ -1,3 +1,4 @@ +-- noqa: disable=PRS -- Pages that participate in the privacy-relayed origin trials CREATE TEMP FUNCTION `PARSE_ORIGIN_TRIAL`(token STRING) RETURNS STRUCT< @@ -27,28 +28,36 @@ DETERMINISTIC AS ( ) ); -WITH pages AS ( +WITH base_totals AS ( SELECT client, - page, + COUNT(DISTINCT root_page) AS total_websites + FROM `httparchive.crawl.pages` + WHERE + date = '2025-07-01' + GROUP BY client +), + +pages AS ( + SELECT + client, + root_page, custom_metrics.other.`origin-trials` AS ot_metrics, custom_metrics.other.almanac AS almanac_metrics FROM `httparchive.crawl.pages` WHERE - date = '2025-07-01' AND - is_root_page = TRUE + date = '2025-07-01' ), response_headers AS ( SELECT client, - page, - PARSE_ORIGIN_TRIAL(response_header.value) AS ot -- may not lowercase this value as it is a base64 string + root_page, + PARSE_ORIGIN_TRIAL(response_header.value) AS ot FROM `httparchive.crawl.requests`, UNNEST(response_headers) response_header WHERE date = '2025-07-01' AND - is_root_page = TRUE AND is_main_document = TRUE AND LOWER(response_header.name) = 'origin-trial' ), @@ -56,8 +65,8 @@ response_headers AS ( meta_tags AS ( SELECT client, - page, - PARSE_ORIGIN_TRIAL(SAFE.STRING(meta_node.content)) AS ot -- may not lowercase this value as it is a base64 string + root_page, + PARSE_ORIGIN_TRIAL(SAFE.STRING(meta_node.content)) AS ot FROM pages, UNNEST(JSON_QUERY_ARRAY(almanac_metrics.`meta-nodes`.nodes)) meta_node WHERE @@ -67,24 +76,18 @@ meta_tags AS ( ot_from_custom_metric AS ( SELECT client, - page, + root_page, PARSE_ORIGIN_TRIAL(SAFE.STRING(metric.token)) AS ot FROM pages, UNNEST(JSON_QUERY_ARRAY(ot_metrics)) metric -) +), -SELECT - client, - feature, - number_of_pages / total_pages AS pct_pages, - number_of_pages, - is_active -FROM ( +aggregated AS ( SELECT client, ot.feature, - ot.expiry >= CURRENT_TIMESTAMP() AS is_active, - COUNT(DISTINCT page) AS number_of_pages + --ot.expiry >= CURRENT_TIMESTAMP() AS is_active, + COUNT(DISTINCT root_page) AS number_of_websites FROM ( SELECT * FROM response_headers UNION ALL @@ -94,17 +97,18 @@ FROM ( ) GROUP BY client, - feature, - is_active + feature + --is_active ) -LEFT JOIN ( - SELECT - client, - COUNT(DISTINCT page) AS total_pages - FROM pages - GROUP BY - client + +FROM aggregated +|> JOIN base_totals USING (client) +|> EXTEND number_of_websites / total_websites AS pct_websites +|> DROP total_websites +|> PIVOT( + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct + FOR client IN ('desktop', 'mobile') ) -USING (client) -ORDER BY - number_of_pages DESC +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/technologies_top.sql b/sql/2025/privacy/technologies_top.sql deleted file mode 100644 index ff808a4eafc..00000000000 --- a/sql/2025/privacy/technologies_top.sql +++ /dev/null @@ -1,34 +0,0 @@ -WITH technologies AS ( - SELECT - client, - page, - category, - technology, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_websites - FROM `httparchive.crawl.pages`, - UNNEST(technologies) AS tech, - UNNEST(categories) AS category - WHERE - date = '2025-07-01' AND - is_root_page = TRUE -) - -SELECT - client, - technology, - COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages, - COUNT(DISTINCT page) AS number_of_pages, - ARRAY_AGG(DISTINCT category) AS categories -FROM technologies -WHERE - category IN ( - 'Analytics', 'Browser fingerprinting', 'Customer data platform', - 'Geolocation', - 'Advertising', 'Retargeting', 'Personalisation', 'Segmentation', - 'Cookie compliance' - ) -GROUP BY - client, - technology -ORDER BY - pct_pages DESC diff --git a/sql/2025/privacy/technology_category_top.sql b/sql/2025/privacy/technology_category_top.sql deleted file mode 100644 index ac40b734f59..00000000000 --- a/sql/2025/privacy/technology_category_top.sql +++ /dev/null @@ -1,22 +0,0 @@ -SELECT - client, - category, - COUNT(DISTINCT IF(category = tech_category, page, NULL)) / COUNT(DISTINCT page) AS pct_pages, - COUNT(DISTINCT IF(category = tech_category, page, NULL)) AS number_of_pages -FROM `httparchive.crawl.pages`, - UNNEST(technologies) AS tech, - UNNEST(categories) AS tech_category, - UNNEST([ - 'Analytics', 'Browser fingerprinting', 'Customer data platform', - 'Geolocation', - 'Advertising', 'Retargeting', 'Personalisation', 'Segmentation', - 'Cookie compliance' - ]) AS category -WHERE - date = '2025-07-01' AND - is_root_page = TRUE -GROUP BY - client, - category -ORDER BY - pct_pages DESC diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql deleted file mode 100644 index 2ce936f278c..00000000000 --- a/sql/2025/privacy/top_ara_destinations_registered_by_most_publishers.sql +++ /dev/null @@ -1,71 +0,0 @@ --- Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct publishers (at site level) - --- Extracting ARA API source registration details being passed by a given third-party (passed as "key") -CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) -RETURNS ARRAY -LANGUAGE js AS """ - if (!jsonObject) { - return []; - } - const values = jsonObject[key] || []; - const result = []; - - values.forEach(value => { - if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) { - const parts = value.replace('attribution-reporting-register-source|', '').split('|'); - parts.forEach(part => { - if (part.startsWith('destination=')) { - const destinations = part.replace('destination=', '').split(','); - destinations.forEach(destination => { - result.push('destination=' + destination.trim()); - }); - } else { - result.push(part.trim()); - } - }); - } - }); - - return result; -"""; - -WITH ara_features AS ( - SELECT - client, - CASE - WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', '')) - ELSE NULL - END AS destination, - COUNT(NET.REG_DOMAIN(page)) AS total_publishers, - COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_publishers, - COUNT(third_party_domain) AS total_third_party_domains, - COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains - FROM `httparchive.crawl.pages`, - UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain, - UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara - WHERE - date = '2025-07-01' AND - is_root_page = TRUE AND - ara LIKE 'destination%' - GROUP BY client, destination - HAVING destination IS NOT NULL -), - -ranked_features AS ( - SELECT - client, - destination, - total_publishers, - distinct_publishers, - total_third_party_domains, - distinct_third_party_domains, - ROW_NUMBER() OVER ( - PARTITION BY client - ORDER BY distinct_publishers DESC - ) AS publisher_rank - FROM ara_features -) - -SELECT * FROM ranked_features -WHERE publisher_rank <= 25 -ORDER BY client, distinct_publishers DESC; diff --git a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql b/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql deleted file mode 100644 index 5150224a2b5..00000000000 --- a/sql/2025/privacy/top_ara_destinations_registered_by_most_third_parties.sql +++ /dev/null @@ -1,71 +0,0 @@ --- Top 25 Attribution Reporting API Destinations (i.e., advertisers) registered by the most number of distinct third-parties (at site level) - --- Extracting ARA API source registration details being passed by a given third-party (passed as "key") -CREATE TEMP FUNCTION JSON_OBJECT_VALUES(jsonObject JSON, key STRING) -RETURNS ARRAY -LANGUAGE js AS """ - if (!jsonObject) { - return []; - } - const values = jsonObject[key] || []; - const result = []; - - values.forEach(value => { - if (value.toLowerCase().startsWith('attribution-reporting-register-source|')) { - const parts = value.replace('attribution-reporting-register-source|', '').split('|'); - parts.forEach(part => { - if (part.startsWith('destination=')) { - const destinations = part.replace('destination=', '').split(','); - destinations.forEach(destination => { - result.push('destination=' + destination.trim()); - }); - } else { - result.push(part.trim()); - } - }); - } - }); - - return result; -"""; - -WITH ara_features AS ( - SELECT - client, - CASE - WHEN ara LIKE 'destination=%' THEN NET.REG_DOMAIN(REPLACE(ara, 'destination=', '')) - ELSE NULL - END AS destination, - COUNT(NET.REG_DOMAIN(page)) AS total_publishers, - COUNT(DISTINCT NET.REG_DOMAIN(page)) AS distinct_publishers, - COUNT(third_party_domain) AS total_third_party_domains, - COUNT(DISTINCT third_party_domain) AS distinct_third_party_domains - FROM `httparchive.crawl.pages`, - UNNEST(JSON_KEYS(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage)) AS third_party_domain, - UNNEST(JSON_OBJECT_VALUES(custom_metrics.other.`privacy-sandbox`.privacySandBoxAPIUsage, third_party_domain)) AS ara - WHERE - date = '2025-07-01' AND - is_root_page = TRUE AND - ara LIKE 'destination%' - GROUP BY client, destination - HAVING destination IS NOT NULL -), - -ranked_features AS ( - SELECT - client, - destination, - total_publishers, - distinct_publishers, - total_third_party_domains, - distinct_third_party_domains, - ROW_NUMBER() OVER ( - PARTITION BY client - ORDER BY distinct_third_party_domains DESC - ) AS third_party_domain_rank - FROM ara_features -) - -SELECT * FROM ranked_features -WHERE third_party_domain_rank <= 25 -ORDER BY client, distinct_third_party_domains DESC; diff --git a/sql/2025/privacy/tracker_categories_top.sql b/sql/2025/privacy/tracker_categories_top.sql index 31fe6d707d5..ffdc76c8dd1 100644 --- a/sql/2025/privacy/tracker_categories_top.sql +++ b/sql/2025/privacy/tracker_categories_top.sql @@ -1,66 +1,33 @@ --- Percent of pages that deploy at least one tracker from each tracker category +-- noqa: disable=PRS -WITH whotracksme AS ( - SELECT - domain, - category, - tracker - FROM httparchive.almanac.whotracksme - WHERE date = '2025-07-01' -), - -totals AS ( +WITH base_totals AS ( SELECT client, - COUNT(DISTINCT page) AS total_websites - FROM httparchive.crawl.requests - WHERE - date = '2025-07-01' + COUNT(DISTINCT root_page) AS total_websites + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' GROUP BY client -), +) -tracker_categories AS ( - SELECT - client, - category, - page - FROM httparchive.crawl.requests - JOIN whotracksme - ON ( - NET.HOST(url) = domain OR - ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) +FROM `httparchive.crawl.pages`, + UNNEST(technologies) AS tech, + UNNEST(tech.categories) AS category +|> WHERE + date = '2025-07-01' AND + category IN ( + 'Analytics', 'Browser fingerprinting', 'Customer data platform', + 'Geolocation', + 'Advertising', 'Retargeting', 'Personalisation', 'Segmentation', + 'Cookie compliance' ) - WHERE - date = '2025-07-01' AND - NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) -- third party -), - -aggregated AS ( - SELECT - client, - category, - COUNT(DISTINCT page) AS number_of_websites - FROM tracker_categories - GROUP BY - client, - category - UNION ALL - SELECT - client, - 'any' AS category, - COUNT(DISTINCT page) AS number_of_websites - FROM tracker_categories - GROUP BY - client +|> AGGREGATE COUNT(DISTINCT root_page) AS number_of_websites GROUP BY client, category +|> JOIN base_totals USING (client) +|> EXTEND number_of_websites / total_websites AS pct_websites +|> DROP total_websites +|> PIVOT( + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct + FOR client IN ('desktop', 'mobile') ) - -SELECT - client, - category, - number_of_websites, - total_websites, - number_of_websites / total_websites AS pct_websites -FROM aggregated -JOIN totals -USING (client) -ORDER BY number_of_websites DESC +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/tracker_distribution.sql b/sql/2025/privacy/tracker_distribution.sql new file mode 100644 index 00000000000..52a738c6bf0 --- /dev/null +++ b/sql/2025/privacy/tracker_distribution.sql @@ -0,0 +1,54 @@ +-- noqa: disable=PRS +-- Number of websites that deploy a certain number of trackers + +WITH base_totals AS ( + SELECT + client, + COUNT(DISTINCT root_page) AS total_websites + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' + --AND rank = 1000 + GROUP BY client +), + +whotracksme AS ( + SELECT + NET.HOST(domain) AS domain, + tracker + FROM `httparchive.almanac.whotracksme` + WHERE date = '2025-07-01' + AND category IN ('advertising', 'pornvertising', 'site_analytics', 'social_media') +), + +tracker_counts AS ( + SELECT + client, + root_page, + COUNT(DISTINCT tracker) AS number_of_trackers + FROM `httparchive.crawl.requests` + JOIN whotracksme + ON + NET.HOST(url) = domain OR + ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) + WHERE + date = '2025-07-01' AND + --rank = 1000 AND + NOT ENDS_WITH('.' || NET.HOST(root_page), '.' || NET.HOST(url)) -- third-party + GROUP BY + client, + root_page +) + +FROM tracker_counts +|> AGGREGATE COUNT(DISTINCT root_page) AS number_of_websites GROUP BY client, number_of_trackers +|> EXTEND SUM(number_of_websites) OVER (PARTITION BY client ORDER BY number_of_trackers DESC) AS ccdf_websites +|> JOIN base_totals USING (client) +|> EXTEND ccdf_websites / total_websites AS ccdf +|> DROP total_websites, number_of_websites +|> PIVOT( + ANY_VALUE(ccdf_websites) AS websites_count, + ANY_VALUE(ccdf) AS ccdf + FOR client IN ('desktop', 'mobile') +) +|> RENAME ccdf_mobile AS mobile, ccdf_desktop AS desktop +|> ORDER BY number_of_trackers diff --git a/sql/2025/privacy/tracker_technologies_top.sql b/sql/2025/privacy/tracker_technologies_top.sql new file mode 100644 index 00000000000..73db1310343 --- /dev/null +++ b/sql/2025/privacy/tracker_technologies_top.sql @@ -0,0 +1,33 @@ +-- noqa: disable=PRS + +WITH base_totals AS ( + SELECT + client, + COUNT(DISTINCT root_page) AS total_websites + FROM `httparchive.crawl.pages` + WHERE date = '2025-07-01' + GROUP BY client +) + +FROM `httparchive.crawl.pages`, + UNNEST(technologies) AS tech, + UNNEST(tech.categories) AS category +|> WHERE + date = '2025-07-01' AND + category IN ( + 'Analytics', 'Browser fingerprinting', 'Customer data platform', + 'Geolocation', + 'Advertising', 'Retargeting', 'Personalisation', 'Segmentation', + 'Cookie compliance' + ) +|> AGGREGATE COUNT(DISTINCT root_page) AS number_of_websites GROUP BY client, tech.technology +|> JOIN base_totals USING (client) +|> EXTEND number_of_websites / total_websites AS pct_websites +|> DROP total_websites +|> PIVOT( + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop, +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/whotracksme_categories_top.sql b/sql/2025/privacy/whotracksme_categories_top.sql new file mode 100644 index 00000000000..5fe558a7c2e --- /dev/null +++ b/sql/2025/privacy/whotracksme_categories_top.sql @@ -0,0 +1,66 @@ +-- noqa: disable=PRS +-- Percent of websites that deploy at least one tracker from each tracker category + +WITH base_totals AS ( + SELECT + client, + COUNT(DISTINCT root_page) AS total_websites + FROM httparchive.crawl.pages + WHERE date = '2025-07-01' + GROUP BY client +), + +whotracksme AS ( + SELECT + NET.HOST(domain) AS domain, + category + FROM httparchive.almanac.whotracksme + WHERE date = '2025-07-01' +), + +tracker_categories AS ( + SELECT + client, + category, + root_page + FROM httparchive.crawl.requests + JOIN whotracksme + ON ( + NET.HOST(url) = domain OR + ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) + ) + WHERE + date = '2025-07-01' AND + NOT ENDS_WITH('.' || NET.HOST(root_page), '.' || NET.HOST(url)) -- third party +), + +aggregated AS ( + SELECT + client, + category, + COUNT(DISTINCT root_page) AS number_of_websites + FROM tracker_categories + GROUP BY + client, + category + UNION ALL + SELECT + client, + 'any' AS category, + COUNT(DISTINCT root_page) AS number_of_websites + FROM tracker_categories + GROUP BY + client +) + +FROM aggregated +|> JOIN base_totals USING (client) +|> EXTEND number_of_websites / total_websites AS pct_websites +|> DROP total_websites +|> PIVOT( + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/whotracksme_trackers_top.sql b/sql/2025/privacy/whotracksme_trackers_top.sql index 9691070ebf9..9f8a4362c39 100644 --- a/sql/2025/privacy/whotracksme_trackers_top.sql +++ b/sql/2025/privacy/whotracksme_trackers_top.sql @@ -1,43 +1,37 @@ -WITH whotracksme AS ( +-- noqa: disable=PRS + +WITH base_totals AS ( SELECT - domain, - category, - tracker - FROM `httparchive.almanac.whotracksme` + client, + COUNT(DISTINCT root_page) AS total_websites + FROM `httparchive.crawl.pages` WHERE date = '2025-07-01' + GROUP BY client ), -pre_aggregated AS ( +whotracksme AS ( SELECT - client, - category, - page, - tracker, - COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages - FROM `httparchive.crawl.requests` - JOIN whotracksme - ON NET.REG_DOMAIN(url) = domain - WHERE - date = '2025-07-01' AND - is_root_page = TRUE AND - NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) -- third party - GROUP BY - client, - category, - tracker, - page + NET.HOST(domain) AS domain, + category || ' / ' || tracker AS tracker + FROM `httparchive.almanac.whotracksme` + WHERE date = '2025-07-01' ) -SELECT - client, - category, - tracker, - COUNT(DISTINCT page) / ANY_VALUE(total_pages) AS pct_pages, - COUNT(DISTINCT page) AS number_of_pages -FROM pre_aggregated -GROUP BY - client, - category, - tracker -ORDER BY - pct_pages DESC +FROM `httparchive.crawl.requests` +|> JOIN whotracksme + ON NET.HOST(url) = domain OR + ENDS_WITH(NET.HOST(url), '.' || domain) +|> WHERE + date = '2025-07-01' AND + NOT ENDS_WITH('.' || NET.HOST(root_page), '.' || NET.HOST(url)) -- third party +|> AGGREGATE COUNT(DISTINCT root_page) AS number_of_websites GROUP BY client, tracker +|> JOIN base_totals USING (client) +|> EXTEND number_of_websites / total_websites AS pct_websites +|> DROP total_websites +|> PIVOT( + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct + FOR client IN ('desktop', 'mobile') +) +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index 16c466f84ca..b298964a8f2 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "cellView": "form", "colab": { @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "cellView": "form", "colab": { @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": { "cellView": "form", "id": "45dBifFPJAtO" @@ -92,7 +92,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "✓ Connected to spreadsheet with 25 existing sheets\n" + "✓ Connected to spreadsheet with 26 existing sheets\n" ] } ], @@ -122,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "cellView": "form", "colab": { @@ -137,61 +137,53 @@ "name": "stdout", "output_type": "stream", "text": [ - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+=============+=======================+==========================+\n", - "| _cname_domains_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| bounce_domains_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| client_hints_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| client_hints_usage.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| cookies_first_party_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| cookies_third_party_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| dnt_usage.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| fingerprinting_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| iab_tcf_v2_cmps_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| iab_tcf_v2_countries_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| iab_usage.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| iab_usp_strings_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| number_of_trackers_buckets.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| referrer_policy_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| referrer_policy_usage.sql | 0 | Referrer Policy Usage | ✓ Uploaded |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| related_origin_trials_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| technologies_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| technology_category_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| tracker_categories_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "| whotracksme_trackers_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+-------------+-----------------------+--------------------------+\n", - "\n", - "✓ Processed 25 queries\n" + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+===========================================================================+===============+===============+==========================+\n", + "| _cname_domains_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| bounce_domains_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| client_hints_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| client_hints_usage.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| cookies_first_party_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| cookies_third_party_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| dnt_usage.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| fingerprinting_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| iab_tcf_v2_cmps_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| iab_tcf_v2_countries_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| iab_usage.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| iab_usp_strings_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| referrer_policy_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| referrer_policy_usage.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| related_origin_trials_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| tracker_categories_top.sql | | | Filename filter mismatch |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", + "| tracker_distribution.sql | Processing... | Processing... | Processing... |\n", + "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n" ] } ], @@ -205,7 +197,7 @@ "\n", "\n", "# Query filters and options\n", - "filename_match = '(referrer_policy_usage).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match = '(tracker_distribution).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = False # @param {type: \"boolean\"}\n", "overwrite_sheets = True # @param {type: \"boolean\"}\n", From 9ab94bd0a49218c5bd26a2ef48802ec77ded988f Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 12 Jan 2026 01:29:10 +0100 Subject: [PATCH 19/27] updated metrics --- sql/2025/privacy/bounce_domains_top.sql | 32 +++++++++---------- sql/2025/privacy/client_hints_top.sql | 24 ++++++-------- sql/2025/privacy/iab_tcf_v2_cmps_top.sql | 13 ++++---- sql/2025/privacy/iab_tcf_v2_countries_top.sql | 12 +++---- 4 files changed, 38 insertions(+), 43 deletions(-) diff --git a/sql/2025/privacy/bounce_domains_top.sql b/sql/2025/privacy/bounce_domains_top.sql index 49280cb1a0a..7303ac670e5 100644 --- a/sql/2025/privacy/bounce_domains_top.sql +++ b/sql/2025/privacy/bounce_domains_top.sql @@ -17,7 +17,7 @@ WITH redirect_requests AS ( url, index, NET.REG_DOMAIN(header.value) AS location_domain, - page + root_page ), -- Find the first navigation redirect @@ -25,11 +25,11 @@ navigation_redirect AS ( FROM redirect_requests |> WHERE index = 1 AND - NET.REG_DOMAIN(page) = NET.REG_DOMAIN(url) AND + NET.REG_DOMAIN(root_page) = NET.REG_DOMAIN(url) AND NET.REG_DOMAIN(url) != location_domain |> SELECT client, - page, + root_page, location_domain AS bounce_domain ), @@ -38,12 +38,12 @@ bounce_redirect AS ( FROM redirect_requests |> WHERE index = 2 AND - NET.REG_DOMAIN(page) != NET.REG_DOMAIN(url) AND + NET.REG_DOMAIN(root_page) != NET.REG_DOMAIN(url) AND NET.REG_DOMAIN(url) != location_domain |> SELECT client, url, - page, + root_page, location_domain AS bounce_redirect_location_domain ), @@ -53,26 +53,26 @@ bounce_sequences AS ( |> JOIN bounce_redirect AS bounce ON nav.client = bounce.client AND - nav.page = bounce.page - |> AGGREGATE COUNT(DISTINCT nav.page) AS pages_count + nav.root_page = bounce.root_page + |> AGGREGATE COUNT(DISTINCT nav.root_page) AS websites_count GROUP BY nav.client, bounce_domain ), -pages_total AS ( +websites_total AS ( FROM `httparchive.crawl.pages` |> WHERE date = '2025-07-01' --AND rank = 1000 - |> AGGREGATE COUNT(DISTINCT page) AS total_pages GROUP BY client + |> AGGREGATE COUNT(DISTINCT root_page) AS total_websites GROUP BY client ) FROM bounce_sequences -|> JOIN pages_total USING (client) -|> EXTEND pages_count / total_pages AS pages_pct -|> DROP total_pages +|> JOIN websites_total USING (client) +|> EXTEND websites_count / total_websites AS websites_pct +|> DROP total_websites |> PIVOT( - ANY_VALUE(pages_count) AS cnt, - ANY_VALUE(pages_pct) AS pages_pct + ANY_VALUE(websites_count) AS cnt, + ANY_VALUE(websites_pct) AS pct FOR client IN ('desktop', 'mobile') ) -|> RENAME cnt_mobile AS mobile, cnt_desktop AS desktop -|> ORDER BY mobile + desktop DESC +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop, cnt_mobile AS mobile_count, cnt_desktop AS desktop_count +|> ORDER BY mobile_count + desktop_count DESC |> LIMIT 100 diff --git a/sql/2025/privacy/client_hints_top.sql b/sql/2025/privacy/client_hints_top.sql index 815f4638061..17a355a6600 100644 --- a/sql/2025/privacy/client_hints_top.sql +++ b/sql/2025/privacy/client_hints_top.sql @@ -12,7 +12,7 @@ headers AS ( |> JOIN UNNEST(response_headers) AS header |> WHERE LOWER(header.name) = 'accept-ch' |> LEFT JOIN UNNEST(SPLIT(LOWER(header.value), ',')) AS header_value - |> SELECT client, page, header_value + |> SELECT client, root_page, header_value ), @@ -21,28 +21,24 @@ meta_tags AS ( FROM `httparchive.crawl.pages` |> WHERE date = '2025-07-01' AND is_root_page --AND rank = 1000 |> JOIN UNNEST(JSON_QUERY_ARRAY(custom_metrics.other.almanac.`meta-nodes`.nodes)) AS meta_node - |> EXTEND - LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name, + |> EXTEND LOWER(SAFE.STRING(meta_node.`http-equiv`)) AS tag_name |> WHERE tag_name = 'accept-ch' |> LEFT JOIN UNNEST(SPLIT(LOWER(SAFE.STRING(meta_node.content)), ',')) AS tag_value - |> SELECT client, page, tag_value + |> SELECT client, root_page, tag_value ) FROM headers -|> FULL OUTER JOIN meta_tags USING (client, page) +|> FULL OUTER JOIN meta_tags USING (client, root_page) |> JOIN totals USING (client) |> EXTEND TRIM(COALESCE(header_value, tag_value)) AS value |> AGGREGATE -COUNT(DISTINCT page) AS number_of_pages, -COUNT(DISTINCT page) / ANY_VALUE(total_websites) AS pct_pages +COUNT(DISTINCT root_page) AS number_of_websites, +COUNT(DISTINCT root_page) / ANY_VALUE(total_websites) AS pct_websites GROUP BY client, value |> PIVOT( - ANY_VALUE(number_of_pages) AS pages_count, - ANY_VALUE(pct_pages) AS pct + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct FOR client IN ('desktop', 'mobile') ) -|> RENAME -pct_mobile AS mobile, -pct_desktop AS desktop -|> ORDER BY pages_count_mobile + pages_count_desktop DESC -|> LIMIT 200 +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/iab_tcf_v2_cmps_top.sql b/sql/2025/privacy/iab_tcf_v2_cmps_top.sql index 3ac96acd71b..f116451f84c 100644 --- a/sql/2025/privacy/iab_tcf_v2_cmps_top.sql +++ b/sql/2025/privacy/iab_tcf_v2_cmps_top.sql @@ -1,3 +1,4 @@ +-- noqa: disable=PRS -- Counts of CMPs using IAB Transparency & Consent Framework -- cf. https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/TCFv2/IAB%20Tech%20Lab%20-%20CMP%20API%20v2.md--tcdata -- CMP vendor list: https://iabeurope.eu/cmp-list/ @@ -6,15 +7,15 @@ FROM `httparchive.crawl.pages` |> WHERE date = '2025-07-01' --AND rank = 1000 |> EXTEND SAFE.INT64(custom_metrics.privacy.iab_tcf_v2.data.cmpId) AS cmpId, -COUNT(DISTINCT page) OVER (PARTITION BY client) AS total_pages +COUNT(DISTINCT root_page) OVER (PARTITION BY client) AS total_websites |> AGGREGATE -COUNT(0) AS number_of_pages, -COUNT(0) / ANY_VALUE(total_pages) AS pct_pages +COUNT(DISTINCT root_page) AS number_of_websites, +COUNT(DISTINCT root_page) / ANY_VALUE(total_websites) AS pct_websites GROUP BY client, cmpId |> PIVOT ( - ANY_VALUE(number_of_pages) AS pages_count, - ANY_VALUE(pct_pages) AS pct + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_websites) AS pct FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY pages_count_mobile + pages_count_desktop DESC +|> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/2025/privacy/iab_tcf_v2_countries_top.sql b/sql/2025/privacy/iab_tcf_v2_countries_top.sql index 7874a03e8f5..a7359e134b5 100644 --- a/sql/2025/privacy/iab_tcf_v2_countries_top.sql +++ b/sql/2025/privacy/iab_tcf_v2_countries_top.sql @@ -26,16 +26,14 @@ base_data AS ( ) FROM base_data -|> AGGREGATE - COUNT(DISTINCT root_page) AS number_of_pages -GROUP BY client, publisherCC +|> AGGREGATE COUNT(DISTINCT root_page) AS number_of_websites GROUP BY client, publisherCC |> JOIN base_totals USING (client) -|> EXTEND number_of_pages / total_websites AS pct_of_pages +|> EXTEND number_of_websites / total_websites AS pct_of_websites |> DROP total_websites |> PIVOT( - ANY_VALUE(number_of_pages) AS pages_count, - ANY_VALUE(pct_of_pages) AS pct + ANY_VALUE(number_of_websites) AS websites_count, + ANY_VALUE(pct_of_websites) AS pct FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY pages_count_mobile + pages_count_desktop DESC +|> ORDER BY websites_count_desktop + websites_count_mobile DESC From 864fddd0f21b15b5af2e75698ba631305d650179 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 12 Jan 2026 01:30:27 +0100 Subject: [PATCH 20/27] formatting --- sql/2025/privacy/tracker_technologies_top.sql | 2 +- sql/util/bq_to_sheets.ipynb | 108 ++++++++++-------- sql/util/haveibeenpwned.py | 5 - 3 files changed, 59 insertions(+), 56 deletions(-) diff --git a/sql/2025/privacy/tracker_technologies_top.sql b/sql/2025/privacy/tracker_technologies_top.sql index 73db1310343..92385e7b6e4 100644 --- a/sql/2025/privacy/tracker_technologies_top.sql +++ b/sql/2025/privacy/tracker_technologies_top.sql @@ -29,5 +29,5 @@ FROM `httparchive.crawl.pages`, ANY_VALUE(pct_websites) AS pct FOR client IN ('desktop', 'mobile') ) -|> RENAME pct_mobile AS mobile, pct_desktop AS desktop, +|> RENAME pct_mobile AS mobile, pct_desktop AS desktop |> ORDER BY websites_count_desktop + websites_count_mobile DESC diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index b298964a8f2..bcc7195844e 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -92,7 +92,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "✓ Connected to spreadsheet with 26 existing sheets\n" + "✓ Connected to spreadsheet with 25 existing sheets\n" ] } ], @@ -122,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "cellView": "form", "colab": { @@ -137,53 +137,61 @@ "name": "stdout", "output_type": "stream", "text": [ - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+===========================================================================+===============+===============+==========================+\n", - "| _cname_domains_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| bounce_domains_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| client_hints_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| client_hints_usage.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| cookies_first_party_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| cookies_third_party_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| dnt_usage.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| fingerprinting_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| iab_tcf_v2_cmps_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| iab_tcf_v2_countries_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| iab_usage.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| iab_usp_strings_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| referrer_policy_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| referrer_policy_usage.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| related_origin_trials_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| tracker_categories_top.sql | | | Filename filter mismatch |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n", - "| tracker_distribution.sql | Processing... | Processing... | Processing... |\n", - "+---------------------------------------------------------------------------+---------------+---------------+--------------------------+\n" + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+============================================================================+=============+==========================+==========================+\n", + "| _cname_domains_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| _number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| _number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| _privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| _top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| _top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| bounce_domains_top.sql | 5.131 | Bounce Domains Top | ✓ Uploaded |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| client_hints_top.sql | 1.338 | Client Hints Top | ✓ Uploaded |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| client_hints_usage.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| cookies_first_party_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| cookies_third_party_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| dnt_usage.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| fingerprinting_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| iab_tcf_v2_cmps_top.sql | 0.02 | Iab Tcf V2 Cmps Top | ✓ Uploaded |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| iab_tcf_v2_countries_top.sql | 0.02 | Iab Tcf V2 Countries Top | ✓ Uploaded |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| iab_usage.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| iab_usp_strings_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| referrer_policy_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| referrer_policy_usage.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| related_origin_trials_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| tracker_categories_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| tracker_distribution.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| tracker_technologies_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| whotracksme_categories_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "| whotracksme_trackers_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "\n", + "✓ Processed 25 queries\n" ] } ], @@ -197,7 +205,7 @@ "\n", "\n", "# Query filters and options\n", - "filename_match = '(tracker_distribution).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match = '(iab_tcf_v2_cmps_top|iab_tcf_v2_countries_top|client_hints_top|bounce_domains_top).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = False # @param {type: \"boolean\"}\n", "overwrite_sheets = True # @param {type: \"boolean\"}\n", diff --git a/sql/util/haveibeenpwned.py b/sql/util/haveibeenpwned.py index 269adf2dd6e..a89133467d9 100644 --- a/sql/util/haveibeenpwned.py +++ b/sql/util/haveibeenpwned.py @@ -1,23 +1,19 @@ """ Retrieves breach data from the Have I Been Pwned API and loads it into BigQuery. - """ import pandas as pd import requests # pylint: disable=import-error from bq_writer import bigquery, write_to_bq -# Fetch breach data from API response = requests.get("https://haveibeenpwned.com/api/v2/breaches", timeout=10) breaches = response.json() df = pd.DataFrame(breaches) -# Convert date fields df["BreachDate"] = pd.to_datetime(df["BreachDate"], errors="coerce") df["AddedDate"] = pd.to_datetime(df["AddedDate"], errors="coerce") df["ModifiedDate"] = pd.to_datetime(df["ModifiedDate"], errors="coerce") -# Define BigQuery schema schema = [ bigquery.SchemaField("Name", "STRING"), bigquery.SchemaField("Title", "STRING"), @@ -41,5 +37,4 @@ bigquery.SchemaField("DisclosureUrl", "STRING"), ] -# Write to BigQuery write_to_bq(df, "httparchive.almanac.breaches", schema, write_disposition="WRITE_TRUNCATE") From 8c4e816e05c451fe2e3c1b856bc9be65f6ee6bff Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 13 Jan 2026 10:20:44 +0100 Subject: [PATCH 21/27] 3p cookie domains --- .../cookie_domains_third_party_top.sql | 22 +++ sql/util/bq_to_sheets.ipynb | 136 ++++++++++-------- 2 files changed, 100 insertions(+), 58 deletions(-) create mode 100644 sql/2025/privacy/cookie_domains_third_party_top.sql diff --git a/sql/2025/privacy/cookie_domains_third_party_top.sql b/sql/2025/privacy/cookie_domains_third_party_top.sql new file mode 100644 index 00000000000..c5abc8d2e41 --- /dev/null +++ b/sql/2025/privacy/cookie_domains_third_party_top.sql @@ -0,0 +1,22 @@ +FROM `httparchive.crawl.pages` +|> WHERE date = '2025-07-01' -- AND rank = 1000 +|> EXTEND COUNT(DISTINCT NET.HOST(root_page)) OVER (PARTITION BY client) AS total_domains +|> JOIN UNNEST(JSON_QUERY_ARRAY(custom_metrics.cookies)) AS cookie +|> EXTEND +NET.HOST(root_page) AS firstparty_domain, +NET.HOST(SAFE.STRING(cookie.domain)) AS cookie_domain +|> WHERE NOT ENDS_WITH('.' || firstparty_domain, '.' || cookie_domain) +|> AGGREGATE +COUNT(DISTINCT firstparty_domain) AS domain_count, +COUNT(DISTINCT firstparty_domain) / ANY_VALUE(total_domains) AS pct_domains +GROUP BY client, cookie_domain +|> PIVOT ( + ANY_VALUE(domain_count) AS domain_count, + ANY_VALUE(pct_domains) AS pct_domains + FOR client IN ('desktop', 'mobile') +) +|> RENAME +pct_domains_mobile AS mobile, +pct_domains_desktop AS desktop +|> ORDER BY domain_count_mobile + domain_count_desktop DESC +|> LIMIT 1000 diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index bcc7195844e..0bcb265e392 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": { "cellView": "form", "id": "45dBifFPJAtO" @@ -92,7 +92,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "✓ Connected to spreadsheet with 25 existing sheets\n" + "✓ Connected to spreadsheet with 20 existing sheets\n" ] } ], @@ -137,61 +137,63 @@ "name": "stdout", "output_type": "stream", "text": [ - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+============================================================================+=============+==========================+==========================+\n", - "| _cname_domains_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| _number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| _number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| _privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| _top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| _top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| bounce_domains_top.sql | 5.131 | Bounce Domains Top | ✓ Uploaded |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| client_hints_top.sql | 1.338 | Client Hints Top | ✓ Uploaded |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| client_hints_usage.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| cookies_first_party_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| cookies_third_party_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| dnt_usage.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| fingerprinting_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| iab_tcf_v2_cmps_top.sql | 0.02 | Iab Tcf V2 Cmps Top | ✓ Uploaded |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| iab_tcf_v2_countries_top.sql | 0.02 | Iab Tcf V2 Countries Top | ✓ Uploaded |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| iab_usage.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| iab_usp_strings_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| referrer_policy_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| referrer_policy_usage.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| related_origin_trials_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| tracker_categories_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| tracker_distribution.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| tracker_technologies_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| whotracksme_categories_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", - "| whotracksme_trackers_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------+--------------------------+\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| Query | TB Billed | Sheet | Status/Skip Reason |\n", + "+============================================================================+=============+================================+==========================+\n", + "| _cname_domains_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| _number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| _number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| _privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| _top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| _top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| bounce_domains_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| client_hints_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| client_hints_usage.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| cookie_domains_third_party_top.sql | 0.081 | Cookie Domains Third Party Top | ✓ Uploaded |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| cookies_first_party_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| cookies_third_party_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| dnt_usage.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| fingerprinting_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| iab_tcf_v2_cmps_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| iab_tcf_v2_countries_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| iab_usage.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| iab_usp_strings_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| referrer_policy_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| referrer_policy_usage.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| related_origin_trials_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| tracker_categories_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| tracker_distribution.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| tracker_technologies_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| whotracksme_categories_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", + "| whotracksme_trackers_top.sql | | | Filename filter mismatch |\n", + "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", "\n", - "✓ Processed 25 queries\n" + "✓ Processed 26 queries\n" ] } ], @@ -205,7 +207,7 @@ "\n", "\n", "# Query filters and options\n", - "filename_match = '(iab_tcf_v2_cmps_top|iab_tcf_v2_countries_top|client_hints_top|bounce_domains_top).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match = '(cookie_domains_third_party_top).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = False # @param {type: \"boolean\"}\n", "overwrite_sheets = True # @param {type: \"boolean\"}\n", @@ -305,6 +307,24 @@ "\n", " print(f\"\\n✓ Processed {len(queries_processed_log)} queries\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "QueryJob" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] } ], "metadata": { @@ -312,7 +332,7 @@ "provenance": [] }, "kernelspec": { - "display_name": ".venv (3.14.2)", + "display_name": ".venv", "language": "python", "name": "python3" }, From 220d0b870cf299e5499db85327e4b17dafdbaeef Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 13 Jan 2026 10:22:32 +0100 Subject: [PATCH 22/27] switch the columns for a chart --- sql/2025/privacy/fingerprinting_top.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/2025/privacy/fingerprinting_top.sql b/sql/2025/privacy/fingerprinting_top.sql index 5dc696ea56a..02dd77606d3 100644 --- a/sql/2025/privacy/fingerprinting_top.sql +++ b/sql/2025/privacy/fingerprinting_top.sql @@ -24,8 +24,8 @@ GROUP BY client, technology.technology |> DROP websites_total |> PIVOT( ANY_VALUE(websites_count) AS websites_count, - ANY_VALUE(websites_pct) AS pct + ANY_VALUE(websites_pct) AS websites_pct FOR client IN ('desktop', 'mobile') ) -|> RENAME websites_count_mobile AS mobile, websites_count_desktop AS desktop -|> ORDER BY mobile + desktop DESC +|> RENAME websites_pct_mobile AS mobile, websites_pct_desktop AS desktop +|> ORDER BY websites_count_mobile + websites_count_desktop DESC From edc9fb1d533a2b90ebe07a9fb9b89db6feedda55 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 14 Jan 2026 00:13:19 +0100 Subject: [PATCH 23/27] exclude android.clients.google.com --- sql/2025/privacy/tracker_distribution.sql | 8 ++++---- sql/2025/privacy/whotracksme_categories_top.sql | 6 +++--- sql/2025/privacy/whotracksme_trackers_top.sql | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sql/2025/privacy/tracker_distribution.sql b/sql/2025/privacy/tracker_distribution.sql index 52a738c6bf0..1eeb7a5efa7 100644 --- a/sql/2025/privacy/tracker_distribution.sql +++ b/sql/2025/privacy/tracker_distribution.sql @@ -26,14 +26,14 @@ tracker_counts AS ( root_page, COUNT(DISTINCT tracker) AS number_of_trackers FROM `httparchive.crawl.requests` - JOIN whotracksme + LEFT JOIN whotracksme ON NET.HOST(url) = domain OR ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) WHERE - date = '2025-07-01' AND - --rank = 1000 AND - NOT ENDS_WITH('.' || NET.HOST(root_page), '.' || NET.HOST(url)) -- third-party + date = '2025-07-01' + --AND rank = 1000 + AND url NOT IN ('https://android.clients.google.com/checkin', 'https://android.clients.google.com/c2dm/register3') GROUP BY client, root_page diff --git a/sql/2025/privacy/whotracksme_categories_top.sql b/sql/2025/privacy/whotracksme_categories_top.sql index 5fe558a7c2e..819c41eb535 100644 --- a/sql/2025/privacy/whotracksme_categories_top.sql +++ b/sql/2025/privacy/whotracksme_categories_top.sql @@ -30,8 +30,8 @@ tracker_categories AS ( ENDS_WITH(NET.HOST(url), CONCAT('.', domain)) ) WHERE - date = '2025-07-01' AND - NOT ENDS_WITH('.' || NET.HOST(root_page), '.' || NET.HOST(url)) -- third party + date = '2025-07-01' + AND url NOT IN ('https://android.clients.google.com/checkin', 'https://android.clients.google.com/c2dm/register3') ), aggregated AS ( @@ -63,4 +63,4 @@ FROM aggregated FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC diff --git a/sql/2025/privacy/whotracksme_trackers_top.sql b/sql/2025/privacy/whotracksme_trackers_top.sql index 9f8a4362c39..01fffd14aaf 100644 --- a/sql/2025/privacy/whotracksme_trackers_top.sql +++ b/sql/2025/privacy/whotracksme_trackers_top.sql @@ -22,8 +22,8 @@ FROM `httparchive.crawl.requests` ON NET.HOST(url) = domain OR ENDS_WITH(NET.HOST(url), '.' || domain) |> WHERE - date = '2025-07-01' AND - NOT ENDS_WITH('.' || NET.HOST(root_page), '.' || NET.HOST(url)) -- third party + date = '2025-07-01' + AND url NOT IN ('https://android.clients.google.com/checkin', 'https://android.clients.google.com/c2dm/register3') |> AGGREGATE COUNT(DISTINCT root_page) AS number_of_websites GROUP BY client, tracker |> JOIN base_totals USING (client) |> EXTEND number_of_websites / total_websites AS pct_websites @@ -34,4 +34,4 @@ FROM `httparchive.crawl.requests` FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC From d280cd3671015ffb45fbbee4c63a87a117f6d111 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 14 Jan 2026 00:15:29 +0100 Subject: [PATCH 24/27] fix order by --- sql/2025/privacy/bounce_domains_top.sql | 2 +- sql/2025/privacy/client_hints_top.sql | 2 +- sql/2025/privacy/client_hints_usage.sql | 17 +-- .../cookie_domains_third_party_top.sql | 2 +- sql/2025/privacy/cookies_first_party_top.sql | 2 +- sql/2025/privacy/cookies_third_party_top.sql | 2 +- sql/2025/privacy/iab_tcf_v2_cmps_top.sql | 2 +- sql/2025/privacy/iab_tcf_v2_countries_top.sql | 2 +- sql/2025/privacy/iab_usp_strings_top.sql | 2 +- sql/2025/privacy/referrer_policy_top.sql | 2 +- .../privacy/related_origin_trials_top.sql | 2 +- sql/2025/privacy/tracker_categories_top.sql | 2 +- sql/2025/privacy/tracker_technologies_top.sql | 2 +- sql/util/bq_to_sheets.ipynb | 108 ++---------------- 14 files changed, 26 insertions(+), 123 deletions(-) diff --git a/sql/2025/privacy/bounce_domains_top.sql b/sql/2025/privacy/bounce_domains_top.sql index 7303ac670e5..0d7fa802f0f 100644 --- a/sql/2025/privacy/bounce_domains_top.sql +++ b/sql/2025/privacy/bounce_domains_top.sql @@ -74,5 +74,5 @@ FROM bounce_sequences FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop, cnt_mobile AS mobile_count, cnt_desktop AS desktop_count -|> ORDER BY mobile_count + desktop_count DESC +|> ORDER BY COALESCE(mobile_count, 0) + COALESCE(desktop_count, 0) DESC |> LIMIT 100 diff --git a/sql/2025/privacy/client_hints_top.sql b/sql/2025/privacy/client_hints_top.sql index 17a355a6600..9a3033b5061 100644 --- a/sql/2025/privacy/client_hints_top.sql +++ b/sql/2025/privacy/client_hints_top.sql @@ -41,4 +41,4 @@ GROUP BY client, value FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC diff --git a/sql/2025/privacy/client_hints_usage.sql b/sql/2025/privacy/client_hints_usage.sql index bf349874deb..00c4d5efeb8 100644 --- a/sql/2025/privacy/client_hints_usage.sql +++ b/sql/2025/privacy/client_hints_usage.sql @@ -26,17 +26,11 @@ accept_ch_meta AS ( SELECT DISTINCT client, root_page - FROM ( - SELECT - client, - root_page, - custom_metrics.other.almanac AS metrics - FROM `httparchive.crawl.pages` - WHERE date = '2025-07-01' - --AND rank = 1000 - ), - UNNEST(JSON_QUERY_ARRAY(metrics.`meta-nodes`.nodes)) AS meta_node - WHERE LOWER(SAFE.STRING(meta_node.`http-equiv`)) = 'accept-ch' + FROM `httparchive.crawl.pages`, + UNNEST(JSON_QUERY_ARRAY(custom_metrics.other.almanac.`meta-nodes`.nodes)) AS meta_node + WHERE date = '2025-07-01' + --AND rank = 1000 + AND LOWER(SAFE.STRING(meta_node.`http-equiv`)) = 'accept-ch' ), -- Combine both sources @@ -58,4 +52,3 @@ GROUP BY all_accept_ch.client FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_mobile + websites_count_desktop DESC diff --git a/sql/2025/privacy/cookie_domains_third_party_top.sql b/sql/2025/privacy/cookie_domains_third_party_top.sql index c5abc8d2e41..c38269479b8 100644 --- a/sql/2025/privacy/cookie_domains_third_party_top.sql +++ b/sql/2025/privacy/cookie_domains_third_party_top.sql @@ -18,5 +18,5 @@ GROUP BY client, cookie_domain |> RENAME pct_domains_mobile AS mobile, pct_domains_desktop AS desktop -|> ORDER BY domain_count_mobile + domain_count_desktop DESC +|> ORDER BY COALESCE(domain_count_mobile, 0) + COALESCE(domain_count_desktop, 0) DESC |> LIMIT 1000 diff --git a/sql/2025/privacy/cookies_first_party_top.sql b/sql/2025/privacy/cookies_first_party_top.sql index 917e9cf41ab..62f6cb0d2c7 100644 --- a/sql/2025/privacy/cookies_first_party_top.sql +++ b/sql/2025/privacy/cookies_first_party_top.sql @@ -23,5 +23,5 @@ GROUP BY client, cookie_name |> RENAME pct_domains_mobile AS mobile, pct_domains_desktop AS desktop -|> ORDER BY domain_count_mobile + domain_count_desktop DESC +|> ORDER BY COALESCE(domain_count_mobile, 0) + COALESCE(domain_count_desktop, 0) DESC |> LIMIT 1000 diff --git a/sql/2025/privacy/cookies_third_party_top.sql b/sql/2025/privacy/cookies_third_party_top.sql index 9122940796d..dff37874242 100644 --- a/sql/2025/privacy/cookies_third_party_top.sql +++ b/sql/2025/privacy/cookies_third_party_top.sql @@ -19,5 +19,5 @@ GROUP BY client, cookie_details |> RENAME pct_domains_mobile AS mobile, pct_domains_desktop AS desktop -|> ORDER BY domain_count_mobile + domain_count_desktop DESC +|> ORDER BY COALESCE(domain_count_mobile, 0) + COALESCE(domain_count_desktop, 0) DESC |> LIMIT 1000 diff --git a/sql/2025/privacy/iab_tcf_v2_cmps_top.sql b/sql/2025/privacy/iab_tcf_v2_cmps_top.sql index f116451f84c..602e10ca6af 100644 --- a/sql/2025/privacy/iab_tcf_v2_cmps_top.sql +++ b/sql/2025/privacy/iab_tcf_v2_cmps_top.sql @@ -18,4 +18,4 @@ GROUP BY client, cmpId FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC \ No newline at end of file diff --git a/sql/2025/privacy/iab_tcf_v2_countries_top.sql b/sql/2025/privacy/iab_tcf_v2_countries_top.sql index a7359e134b5..0f2115b7254 100644 --- a/sql/2025/privacy/iab_tcf_v2_countries_top.sql +++ b/sql/2025/privacy/iab_tcf_v2_countries_top.sql @@ -36,4 +36,4 @@ FROM base_data FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC diff --git a/sql/2025/privacy/iab_usp_strings_top.sql b/sql/2025/privacy/iab_usp_strings_top.sql index 8eb0b3ee93e..c8c53b13d2e 100644 --- a/sql/2025/privacy/iab_usp_strings_top.sql +++ b/sql/2025/privacy/iab_usp_strings_top.sql @@ -25,4 +25,4 @@ FROM `httparchive.crawl.pages` FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC diff --git a/sql/2025/privacy/referrer_policy_top.sql b/sql/2025/privacy/referrer_policy_top.sql index 329c790db43..0e6ff60b66f 100644 --- a/sql/2025/privacy/referrer_policy_top.sql +++ b/sql/2025/privacy/referrer_policy_top.sql @@ -57,4 +57,4 @@ FROM referrer_policy_custom_metrics FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC diff --git a/sql/2025/privacy/related_origin_trials_top.sql b/sql/2025/privacy/related_origin_trials_top.sql index be07979ce90..a6542d1c904 100644 --- a/sql/2025/privacy/related_origin_trials_top.sql +++ b/sql/2025/privacy/related_origin_trials_top.sql @@ -111,4 +111,4 @@ FROM aggregated FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC diff --git a/sql/2025/privacy/tracker_categories_top.sql b/sql/2025/privacy/tracker_categories_top.sql index ffdc76c8dd1..819106eb537 100644 --- a/sql/2025/privacy/tracker_categories_top.sql +++ b/sql/2025/privacy/tracker_categories_top.sql @@ -30,4 +30,4 @@ FROM `httparchive.crawl.pages`, FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC diff --git a/sql/2025/privacy/tracker_technologies_top.sql b/sql/2025/privacy/tracker_technologies_top.sql index 92385e7b6e4..876bb9c3ded 100644 --- a/sql/2025/privacy/tracker_technologies_top.sql +++ b/sql/2025/privacy/tracker_technologies_top.sql @@ -30,4 +30,4 @@ FROM `httparchive.crawl.pages`, FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY websites_count_desktop + websites_count_mobile DESC +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC diff --git a/sql/util/bq_to_sheets.ipynb b/sql/util/bq_to_sheets.ipynb index 0bcb265e392..e8d95822b24 100644 --- a/sql/util/bq_to_sheets.ipynb +++ b/sql/util/bq_to_sheets.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "cellView": "form", "id": "U37785Bxt5tE" @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "cellView": "form", "colab": { @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "cellView": "form", "colab": { @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -82,20 +82,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "cellView": "form", "id": "45dBifFPJAtO" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✓ Connected to spreadsheet with 20 existing sheets\n" - ] - } - ], + "outputs": [], "source": [ "# @title Setup BigQuery and Google Sheets clients\n", "import google.auth\n", @@ -122,7 +114,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "cellView": "form", "colab": { @@ -132,71 +124,7 @@ "id": "nblNil985Tjt", "outputId": "658cf8f9-cee5-44d0-a6cd-abcabd4038e2" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| Query | TB Billed | Sheet | Status/Skip Reason |\n", - "+============================================================================+=============+================================+==========================+\n", - "| _cname_domains_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| _number_of_ara_destinations_registered_by_third_parties_and_publishers.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| _number_of_privacy_sandbox_attested_domains.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| _privacy-sandbox-adoption-by-third-parties-by-publishers.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| _top_ara_destinations_registered_by_most_publishers.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| _top_ara_destinations_registered_by_most_third_parties.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| bounce_domains_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| client_hints_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| client_hints_usage.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| cookie_domains_third_party_top.sql | 0.081 | Cookie Domains Third Party Top | ✓ Uploaded |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| cookies_first_party_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| cookies_third_party_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| dnt_usage.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| fingerprinting_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| iab_tcf_v2_cmps_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| iab_tcf_v2_countries_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| iab_usage.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| iab_usp_strings_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| referrer_policy_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| referrer_policy_usage.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| related_origin_trials_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| tracker_categories_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| tracker_distribution.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| tracker_technologies_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| whotracksme_categories_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "| whotracksme_trackers_top.sql | | | Filename filter mismatch |\n", - "+----------------------------------------------------------------------------+-------------+--------------------------------+--------------------------+\n", - "\n", - "✓ Processed 26 queries\n" - ] - } - ], + "outputs": [], "source": [ "# @title Upload query results to Google Sheets\n", "import glob\n", @@ -207,7 +135,7 @@ "\n", "\n", "# Query filters and options\n", - "filename_match = '(cookie_domains_third_party_top).sql' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", + "filename_match = 'whotracksme.+' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "filename_match_exclude = '' # @param {type: \"raw\", placeholder: \"Enter regexp wrapped in quotes\"}\n", "dry_run = False # @param {type: \"boolean\"}\n", "overwrite_sheets = True # @param {type: \"boolean\"}\n", @@ -307,24 +235,6 @@ "\n", " print(f\"\\n✓ Processed {len(queries_processed_log)} queries\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "QueryJob" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] } ], "metadata": { From 0d23ecb953ac009afc4eaa54ca5525d62294524a Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 14 Jan 2026 00:17:02 +0100 Subject: [PATCH 25/27] lint --- sql/2025/privacy/iab_tcf_v2_cmps_top.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/2025/privacy/iab_tcf_v2_cmps_top.sql b/sql/2025/privacy/iab_tcf_v2_cmps_top.sql index 602e10ca6af..3768bc77078 100644 --- a/sql/2025/privacy/iab_tcf_v2_cmps_top.sql +++ b/sql/2025/privacy/iab_tcf_v2_cmps_top.sql @@ -18,4 +18,4 @@ GROUP BY client, cmpId FOR client IN ('desktop', 'mobile') ) |> RENAME pct_mobile AS mobile, pct_desktop AS desktop -|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC \ No newline at end of file +|> ORDER BY COALESCE(websites_count_desktop, 0) + COALESCE(websites_count_mobile, 0) DESC From 6c24d450ba2e9d2c739273ce7f426f935689c68d Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 14 Jan 2026 21:35:55 +0100 Subject: [PATCH 26/27] split requirements.txt --- sql/util/requirements.txt | 8 ++++++++ src/requirements.txt | 12 +----------- 2 files changed, 9 insertions(+), 11 deletions(-) create mode 100644 sql/util/requirements.txt diff --git a/sql/util/requirements.txt b/sql/util/requirements.txt new file mode 100644 index 00000000000..ac1288dc9f9 --- /dev/null +++ b/sql/util/requirements.txt @@ -0,0 +1,8 @@ +pandas==2.3.3 +google-cloud-bigquery==3.40.0 +requests==2.32.5 +tabulate==0.9.0 +gspread==6.2.1 +gspread-dataframe==4.0.0 +ipykernel==7.1.0 +db-dtypes==1.5.0 diff --git a/src/requirements.txt b/src/requirements.txt index 81e28ea1403..b3e1de6ac47 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -4,14 +4,4 @@ gunicorn==23.0.0 pytest==9.0.2 pytest-watch==4.2.0 pytest-cov==7.0.0 -sqlfluff==3.5.0 - -# sql/util/* dependencies -pandas==2.3.3 -google-cloud-bigquery==3.40.0 -requests==2.32.5 -tabulate==0.9.0 -gspread==6.2.1 -gspread-dataframe==4.0.0 -ipykernel==7.1.0 -db-dtypes==1.5.0 +sqlfluff==3.5.0 \ No newline at end of file From 8d9e84a1de8a590d056d2a92b832d39d622bbae5 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 14 Jan 2026 21:51:25 +0100 Subject: [PATCH 27/27] lint --- .github/linters/.python-lint | 9 +++------ src/requirements.txt | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.github/linters/.python-lint b/.github/linters/.python-lint index 6b46d7370db..c576a485feb 100644 --- a/.github/linters/.python-lint +++ b/.github/linters/.python-lint @@ -31,10 +31,6 @@ persistent=yes # Specify a configuration file. #rcfile= -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages -suggestion-mode=yes - # Allow loading of arbitrary C extensions. Extensions are imported into the # active Python interpreter and may run arbitrary code. unsafe-load-any-extension=no @@ -470,5 +466,6 @@ valid-metaclass-classmethod-first-arg=mcs [EXCEPTIONS] # Exceptions that will emit a warning when being caught. Defaults to -# "Exception" -overgeneral-exceptions=Exception +# ["builtins.BaseException", "builtins.Exception"] +overgeneral-exceptions = builtins.BaseException, + builtins.Exception diff --git a/src/requirements.txt b/src/requirements.txt index b3e1de6ac47..eb86638b690 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -4,4 +4,4 @@ gunicorn==23.0.0 pytest==9.0.2 pytest-watch==4.2.0 pytest-cov==7.0.0 -sqlfluff==3.5.0 \ No newline at end of file +sqlfluff==3.5.0