Skip to content
29 changes: 27 additions & 2 deletions scripts/leaderboard.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""Fetch contributor stats from all NextCommunity repos and update the leaderboard."""

import html
import os
import re
import sys
import urllib.error
import urllib.parse
import urllib.request
import json
from bisect import bisect_right
Expand All @@ -16,6 +18,9 @@
LEADERBOARD_START = "<!-- LEADERBOARD:START -->"
LEADERBOARD_END = "<!-- LEADERBOARD:END -->"
SITE_REPO_NAME = "NextCommunity.github.io"

# GitHub usernames: alphanumeric and single hyphens, 1-39 characters.
_GITHUB_LOGIN_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-]{0,37}[A-Za-z0-9])?$")
Comment thread
jbampton marked this conversation as resolved.
Outdated
DOTGITHUB_REPO_NAME = ".github"

# Self-documenting record for each commit entry collected across all repos.
Expand Down Expand Up @@ -707,6 +712,24 @@ def build_leaderboard(token=None):
return sorted_contributors, had_errors, levels_data


def _contributor_cell(login):
"""Return a pure-HTML table cell with avatar and username link.

``login`` is validated against GitHub's username pattern before use.
Raises ``ValueError`` if the login contains unexpected characters.
"""
if not _GITHUB_LOGIN_RE.match(login):
raise ValueError(f"Invalid GitHub login: {login!r}")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Raising a ValueError here will cause the entire script to crash during the markdown generation phase if an invalid login is encountered (e.g., from a legacy account or a manual entry in EMAIL_ALIASES). Since this script likely runs in a CI/CD pipeline to update the README, a single invalid username would break the leaderboard for the whole organization.

Consider validating the login earlier in the pipeline (e.g., in build_leaderboard) and handling invalid entries gracefully by logging a warning and skipping the contributor or using a safe fallback.

safe_login = urllib.parse.quote(login, safe="")
escaped_login = html.escape(login)
return (
f'<a href="https://github.com/{safe_login}">'
f'<img src="https://avatars.githubusercontent.com/{safe_login}?s=64"'
f' width="32" height="32" alt="{escaped_login}\'s avatar"><br>'
f"@{escaped_login}</a>"
)


def generate_markdown(contributors, levels_data):
"""Generate a gamified markdown leaderboard from contributor data."""
rank_badges = {1: "🥇", 2: "🥈", 3: "🥉"}
Expand Down Expand Up @@ -767,8 +790,9 @@ def generate_markdown(contributors, levels_data):
commits_display += f" · 🤝 {coauthored}"
commits_display += f" · 📦 {repos_count}"

contributor_cell = _contributor_cell(login)
lines.append(
f"| {rank} | [@{login}](https://github.com/{login})"
f"| {rank} | {contributor_cell}"
f" | {level} | {rarity_display} | {commits_display}"
f" | {prog} | {streak_display}"
f" | {badges} | {points_display} |"
Expand Down Expand Up @@ -812,8 +836,9 @@ def generate_markdown(contributors, levels_data):
breakdown_parts.append(f"📁 {other_c}")
breakdown = " · ".join(breakdown_parts) if breakdown_parts else "—"

contributor_cell = _contributor_cell(login)
lines.append(
f"| {i} | [@{login}](https://github.com/{login})"
f"| {i} | {contributor_cell}"
f" | {first_date} | {last_date}"
f" | {days_active} | {cpd}"
f" | {breakdown} | Top {pctile}% |"
Expand Down