Skip to content

Commit e7314a9

Browse files
committed
chapter + tools
1 parent 79bfd6c commit e7314a9

30 files changed

Lines changed: 524 additions & 70 deletions

sql/util/generate_chart_markup.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import re
2+
import os
3+
import sys
4+
from googleapiclient.discovery import build
5+
import google.auth
6+
7+
# Configuration
8+
SPREADSHEET_ID = '1Svyw40Th7VbigX6lpR1lb1WXwTUVKZWrK7O2YELrml4'
9+
PUBCHART_ID = '2PACX-1vRC5wrzy5NEsWNHn9w38RLsMURRScnP4jgjO1mDiVhsfFCY55tujlTUZhUaEWzmPtJza0QA7w8S4uK5'
10+
SQL_DIR = '../2025/privacy' # Relative to this script's location
11+
12+
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
13+
14+
def get_sql_to_sheet_map(sql_dir):
15+
mapping = {}
16+
if not os.path.exists(sql_dir):
17+
print(f"Directory not found: {sql_dir}")
18+
return mapping
19+
for filename in os.listdir(sql_dir):
20+
if filename.endswith(".sql"):
21+
# Generate sheet name from filename using the regex:
22+
# re.sub(r'(\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()
23+
sheet_name = re.sub(r'(\.sql|[^a-zA-Z0-9]+)', ' ', filename).strip().title()
24+
mapping[sheet_name] = filename
25+
return mapping
26+
27+
def generate_figure_markup(spreadsheet_id, sql_dir):
28+
try:
29+
credentials, project = google.auth.default(scopes=SCOPES)
30+
sheets_service = build('sheets', 'v4', cache_discovery=False, credentials=credentials)
31+
except Exception as e:
32+
print(f"Authentication failed: {e}")
33+
print("Please ensure you have application default credentials set up.")
34+
return
35+
36+
sql_map = get_sql_to_sheet_map(sql_dir)
37+
response = sheets_service.spreadsheets().get(spreadsheetId=spreadsheet_id, includeGridData=False).execute()
38+
sheets = response.get('sheets', [])
39+
40+
for sheet in sheets:
41+
sheet_name = sheet['properties']['title']
42+
sheet_id = sheet['properties']['sheetId']
43+
charts = sheet.get('charts', [])
44+
45+
sql_file = sql_map.get(sheet_name)
46+
if not sql_file:
47+
# Try to match case-insensitively or show warning
48+
sql_file = "TODO.sql"
49+
50+
for chart in charts:
51+
title = chart['spec'].get('title', 'Untitled Chart')
52+
chart_id = chart['chartId']
53+
54+
# Slugify for image name
55+
image_name = re.sub(r'[^a-z0-9]+', '-', title.lower()).strip('-') + ".png"
56+
57+
# Construct markup
58+
markup = f"""{{{{ figure_markup(
59+
image="{image_name}",
60+
caption="{title}",
61+
description="",
62+
chart_url="https://docs.google.com/spreadsheets/d/e/{PUBCHART_ID}/pubchart?oid={chart_id}&format=interactive",
63+
sheets_gid="{sheet_id}",
64+
sql_file="{sql_file}"
65+
)
66+
}}}}"""
67+
print(markup)
68+
print()
69+
70+
if __name__ == "__main__":
71+
# Resolve relative SQL_DIR based on script location
72+
script_dir = os.path.dirname(os.path.abspath(__file__))
73+
absolute_sql_dir = os.path.normpath(os.path.join(script_dir, SQL_DIR))
74+
75+
print(f"Processing Spreadsheet: {SPREADSHEET_ID}")
76+
print(f"SQL Directory: {absolute_sql_dir}\n")
77+
78+
generate_figure_markup(SPREADSHEET_ID, absolute_sql_dir)

sql/util/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pandas==2.3.3
22
google-cloud-bigquery==3.40.0
3+
google-api-python-client==2.188.0
34
requests==2.32.5
45
tabulate==0.9.0
56
gspread==6.2.1

src/config/2025.json

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@
6868
"chapter_number": "8",
6969
"title": "Privacy",
7070
"slug": "privacy",
71-
"hero_dir": "2020",
72-
"todo": true
71+
"hero_dir": "2020"
7372
},
7473
{
7574
"part": "II",

src/config/contributors.json

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3240,6 +3240,10 @@
32403240
"analysts",
32413241
"developers",
32423242
"authors"
3243+
],
3244+
"2025": [
3245+
"analysts",
3246+
"editors"
32433247
]
32443248
},
32453249
"website": "https://maxostapenko.com"
@@ -3769,7 +3773,8 @@
37693773
],
37703774
"2025": [
37713775
"leads",
3772-
"committee"
3776+
"committee",
3777+
"authors"
37733778
]
37743779
},
37753780
"twitter": "nrllah",
@@ -5151,6 +5156,7 @@
51515156
"name": "Vinod Tiwari",
51525157
"teams": {
51535158
"2025": [
5159+
"authors",
51545160
"reviewers"
51555161
]
51565162
},
@@ -5418,5 +5424,18 @@
54185424
]
54195425
},
54205426
"twitter": "_cybai"
5427+
},
5428+
"RumaisaHabib": {
5429+
"avatar_url": "66083065",
5430+
"github": "RumaisaHabib",
5431+
"name": "Rumaisa Habib",
5432+
"teams": {
5433+
"2025": [
5434+
"authors"
5435+
]
5436+
},
5437+
"website": "https://rumaisahabib.com/",
5438+
"linkedin": "rumaisahabib"
54215439
}
5440+
54225441
}

src/content/en/2025/privacy.md

Lines changed: 189 additions & 66 deletions
Large diffs are not rendered by default.

src/eng.traineddata

4.96 MB
Binary file not shown.

src/package-lock.json

Lines changed: 128 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"test": "node ./tools/test",
1818
"generate": "node ./tools/generate",
1919
"figure-images": "node ./tools/generate/generate_figure_images",
20+
"chart-descriptions": "node ./tools/generate/generate_chart_descriptions",
2021
"watch": "node ./tools/generate/chapter_watcher",
2122
"ebooks": "node ./tools/generate/generate_ebook_pdfs",
2223
"deploy": "./tools/scripts/deploy.sh",
@@ -47,7 +48,8 @@
4748
"run-script-os": "1.1.6",
4849
"showdown": "2.1.0",
4950
"smartypants": "0.2.2",
51+
"tesseract.js": "^7.0.0",
5052
"web-vitals": "5.1.0",
5153
"xml-js": "1.6.11"
5254
}
53-
}
55+
}
37.3 KB
Loading

0 commit comments

Comments
 (0)