feat: port remaining cohort scripts and make reusable

- Port 19 cohort scripts from /home/naomi/docs/cohort/ - Replace all hardcoded tokens and dotenv usage with os.environ - Add pandas==3.0.1 dependency - Add E501 to ruff ignore list for Discord message string content - Make remove_resigned_members.py reusable (empty RESIGNED_IDS constant) - Make update_roster_messages.py reusable (iterates all teams from JSON) - Exclude 12 one-off/event-specific scripts as non-reusable
2026-02-23 15:23:10 -08:00
parent e481823e06
commit 4fdb5d06f1
20 changed files with 2108 additions and 1 deletions
@@ -0,0 +1,516 @@
+#!/usr/bin/env python3
+"""Catch-Up Activity Report.
+
+Generates a markdown report of Discord and GitHub activity since Feb 15, 2026.
+Covers Discord messages in team channels (+ threads) and GitHub activity
+(PRs opened, issues opened, issue comments, PR comments, PR reviews, commits).
+"""
+
+import asyncio
+import json
+import os
+import subprocess
+from datetime import datetime, timezone
+
+import aiohttp
+
+DISCORD_BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
+DISCORD_API_BASE = "https://discord.com/api/v10"
+GITHUB_API_BASE = "https://api.github.com"
+GITHUB_ORG = "nhcarrigan-spring-2026-cohort"
+
+CUTOFF = datetime(2026, 2, 15, 0, 0, 0, tzinfo=timezone.utc)
+CUTOFF_ISO = CUTOFF.isoformat().replace("+00:00", "Z")
+
+OUTPUT_FILE = "catch_up_report.md"
+
+TEXT_CHANNEL_IDS: dict[str, str] = {
+    "Crimson Dahlia": "1464316744909852682",
+    "Rose Camellia": "1464316751268286611",
+    "Amber Wisteria": "1464316761410113641",
+    "Ivory Orchid": "1464316770889240730",
+    "Teal Iris": "1464316776459407448",
+    "Peach Gardenia": "1464316785040953543",
+    "Violet Carnation": "1464316805261824032",
+    "Azure Lotus": "1464316814455472139",
+    "Coral Sunflower": "1464316819711066263",
+    "Indigo Tulip": "1464316826384072925",
+    "Scarlet Hydrangea": "1464316839306985506",
+    "Mint Narcissus": "1464316844251807952",
+    "Sage Marigold": "1464316850669093040",
+}
+
+
+def team_repo_slug(team_name: str) -> str:
+    """Convert a team name to its repository slug."""
+    return team_name.lower().replace(" ", "-")
+
+
+def get_github_token() -> str:
+    """Retrieve the GitHub token via the gh CLI."""
+    result = subprocess.run(
+        ["gh", "auth", "token"], capture_output=True, text=True, check=True
+    )
+    return result.stdout.strip()
+
+
+class ActivityCollector:
+    """Collects Discord and GitHub activity for the catch-up report."""
+
+    def __init__(self, discord_token: str, github_token: str) -> None:
+        self.discord_headers = {
+            "Authorization": f"Bot {discord_token}",
+            "Content-Type": "application/json",
+        }
+        self.github_headers = {
+            "Authorization": f"Bearer {github_token}",
+            "Accept": "application/vnd.github+json",
+            "X-GitHub-Api-Version": "2022-11-28",
+        }
+        self.session: aiohttp.ClientSession | None = None
+
+    async def __aenter__(self) -> "ActivityCollector":
+        self.session = aiohttp.ClientSession()
+        return self
+
+    async def __aexit__(
+        self, exc_type: object, exc_val: object, exc_tb: object
+    ) -> None:
+        if self.session:
+            await self.session.close()
+
+    async def get_discord_username(self, user_id: str) -> str:
+        """Fetch a Discord user's display name or username."""
+        url = f"{DISCORD_API_BASE}/users/{user_id}"
+        async with self.session.get(url, headers=self.discord_headers) as response:
+            if response.status == 429:
+                retry_after = float((await response.json()).get("retry_after", 1))
+                await asyncio.sleep(retry_after)
+                return await self.get_discord_username(user_id)
+            if response.status != 200:
+                return "*(unknown)*"
+            data = await response.json()
+            return data.get("global_name") or data.get("username") or "*(unknown)*"
+
+    async def _get_discord_thread_ids(self, channel_id: str) -> list[str]:
+        """Return IDs of all active and archived threads in a channel."""
+        thread_ids: list[str] = []
+
+        url = f"{DISCORD_API_BASE}/channels/{channel_id}/threads/active"
+        async with self.session.get(url, headers=self.discord_headers) as response:
+            if response.status == 200:
+                data = await response.json()
+                thread_ids.extend(t["id"] for t in data.get("threads", []))
+
+        for archive_type in ("public", "private"):
+            url = (
+                f"{DISCORD_API_BASE}/channels/{channel_id}"
+                f"/threads/archived/{archive_type}"
+            )
+            async with self.session.get(url, headers=self.discord_headers) as response:
+                if response.status == 200:
+                    data = await response.json()
+                    thread_ids.extend(t["id"] for t in data.get("threads", []))
+
+        return thread_ids
+
+    async def _count_messages_in_channel(
+        self, channel_id: str, label: str = ""
+    ) -> dict[str, int]:
+        """Count messages per Discord user ID since CUTOFF."""
+        counts: dict[str, int] = {}
+        before_id: str | None = None
+        page = 0
+
+        while True:
+            url = f"{DISCORD_API_BASE}/channels/{channel_id}/messages?limit=100"
+            if before_id:
+                url += f"&before={before_id}"
+
+            async with self.session.get(url, headers=self.discord_headers) as response:
+                if response.status == 429:
+                    retry_after = float((await response.json()).get("retry_after", 1))
+                    print(f"    [Discord] rate limited, waiting {retry_after:.1f}s...")
+                    await asyncio.sleep(retry_after)
+                    continue
+                if response.status != 200:
+                    print(f"  [Discord] channel {channel_id} → HTTP {response.status}")
+                    break
+
+                messages: list[dict] = await response.json()
+                if not messages:
+                    break
+
+                page += 1
+                prefix = f" ({label})" if label else ""
+                print(
+                    f"    [Discord]{prefix} page {page} — {len(messages)} messages fetched",  # noqa: E501
+                    end="\r",
+                )
+
+                reached_cutoff = False
+                for message in messages:
+                    ts = datetime.fromisoformat(
+                        message["timestamp"].replace("Z", "+00:00")
+                    )
+                    if ts < CUTOFF:
+                        reached_cutoff = True
+                        break
+                    if message["author"].get("bot", False):
+                        continue
+                    author_id = message["author"]["id"]
+                    counts[author_id] = counts.get(author_id, 0) + 1
+
+                if reached_cutoff or len(messages) < 100:
+                    print()
+                    break
+
+                before_id = messages[-1]["id"]
+                await asyncio.sleep(0.5)
+
+        return counts
+
+    async def collect_discord_counts(
+        self, team_name: str, channel_id: str, member_ids: list[str]
+    ) -> dict[str, int]:
+        """Return message counts per member for a team's channel and threads."""
+        print("  [Discord] Scanning main channel...")
+        totals: dict[str, int] = await self._count_messages_in_channel(
+            channel_id, label="main"
+        )
+
+        thread_ids = await self._get_discord_thread_ids(channel_id)
+        total_threads = len(thread_ids)
+        for i, thread_id in enumerate(thread_ids, start=1):
+            print(f"  [Discord] Scanning thread {i}/{total_threads}...")
+            thread_counts = await self._count_messages_in_channel(
+                thread_id, label=f"thread {i}/{total_threads}"
+            )
+            for user_id, count in thread_counts.items():
+                totals[user_id] = totals.get(user_id, 0) + count
+            await asyncio.sleep(0.3)
+
+        if total_threads == 0:
+            print("  [Discord] No threads found.")
+
+        return {member_id: totals.get(member_id, 0) for member_id in member_ids}
+
+    async def _github_get_all_pages(self, url: str, params: dict) -> list[dict]:
+        """Fetch all pages from a paginated GitHub REST API endpoint."""
+        results: list[dict] = []
+        page = 1
+
+        while True:
+            paged_params = {**params, "per_page": 100, "page": page}
+            async with self.session.get(
+                url, headers=self.github_headers, params=paged_params
+            ) as response:
+                if response.status in (404, 422):
+                    break
+                if response.status == 403:
+                    print(f"  [GitHub] rate limited on {url}, waiting 60s...")
+                    await asyncio.sleep(60)
+                    continue
+                if response.status != 200:
+                    print(f"  [GitHub] {url} → HTTP {response.status}")
+                    break
+
+                data: list[dict] = await response.json()
+                if not data:
+                    break
+
+                results.extend(data)
+
+                if len(data) < 100:
+                    break
+                page += 1
+                await asyncio.sleep(0.2)
+
+        return results
+
+    async def collect_github_counts(
+        self, team_name: str, github_usernames: list[str]
+    ) -> dict[str, dict[str, int]]:
+        """Return activity counts per member for a team's GitHub repository."""
+        repo_slug = team_repo_slug(team_name)
+        repo = f"{GITHUB_ORG}/{repo_slug}"
+        print(f"  [GitHub] repo: {repo}")
+
+        counts: dict[str, dict[str, int]] = {
+            username: {
+                "prs_opened": 0,
+                "issues_opened": 0,
+                "issue_comments": 0,
+                "pr_comments": 0,
+                "pr_reviews": 0,
+                "commits": 0,
+            }
+            for username in github_usernames
+            if username
+        }
+
+        def resolve_username(login: str) -> str | None:
+            lower = login.lower()
+            for u in github_usernames:
+                if u and u.lower() == lower:
+                    return u
+            return None
+
+        print("  [GitHub] Fetching PRs...")
+        prs = await self._github_get_all_pages(
+            f"{GITHUB_API_BASE}/repos/{repo}/pulls",
+            {"state": "all", "sort": "created", "direction": "desc"},
+        )
+        print(f"  [GitHub] {len(prs)} PRs fetched — counting opens since cutoff...")
+        for pr in prs:
+            created_at = datetime.fromisoformat(pr["created_at"].replace("Z", "+00:00"))
+            if created_at < CUTOFF:
+                break
+            login = pr["user"]["login"]
+            username = resolve_username(login)
+            if username:
+                counts[username]["prs_opened"] += 1
+
+        print("  [GitHub] Fetching issues...")
+        issues = await self._github_get_all_pages(
+            f"{GITHUB_API_BASE}/repos/{repo}/issues",
+            {
+                "state": "all",
+                "sort": "created",
+                "direction": "desc",
+                "since": CUTOFF_ISO,
+            },
+        )
+        print(f"  [GitHub] {len(issues)} issues/PRs fetched — counting issue opens...")
+        for issue in issues:
+            if "pull_request" in issue:
+                continue
+            created_at = datetime.fromisoformat(
+                issue["created_at"].replace("Z", "+00:00")
+            )
+            if created_at < CUTOFF:
+                continue
+            login = issue["user"]["login"]
+            username = resolve_username(login)
+            if username:
+                counts[username]["issues_opened"] += 1
+
+        print("  [GitHub] Fetching issue comments...")
+        issue_comments = await self._github_get_all_pages(
+            f"{GITHUB_API_BASE}/repos/{repo}/issues/comments",
+            {"sort": "created", "direction": "desc", "since": CUTOFF_ISO},
+        )
+        print(f"  [GitHub] {len(issue_comments)} issue comments fetched.")
+        for comment in issue_comments:
+            created_at = datetime.fromisoformat(
+                comment["created_at"].replace("Z", "+00:00")
+            )
+            if created_at < CUTOFF:
+                continue
+            login = comment["user"]["login"]
+            username = resolve_username(login)
+            if username:
+                counts[username]["issue_comments"] += 1
+
+        print("  [GitHub] Fetching PR review comments...")
+        pr_comments = await self._github_get_all_pages(
+            f"{GITHUB_API_BASE}/repos/{repo}/pulls/comments",
+            {"sort": "created", "direction": "desc", "since": CUTOFF_ISO},
+        )
+        print(f"  [GitHub] {len(pr_comments)} PR review comments fetched.")
+        for comment in pr_comments:
+            created_at = datetime.fromisoformat(
+                comment["created_at"].replace("Z", "+00:00")
+            )
+            if created_at < CUTOFF:
+                continue
+            login = comment["user"]["login"]
+            username = resolve_username(login)
+            if username:
+                counts[username]["pr_comments"] += 1
+
+        all_pr_numbers = [pr["number"] for pr in prs]
+        total_prs = len(all_pr_numbers)
+        print(f"  [GitHub] Fetching reviews for {total_prs} PRs...")
+        for i, pr_number in enumerate(all_pr_numbers, start=1):
+            print(f"  [GitHub] PR reviews: {i}/{total_prs}", end="\r")
+            reviews = await self._github_get_all_pages(
+                f"{GITHUB_API_BASE}/repos/{repo}/pulls/{pr_number}/reviews",
+                {},
+            )
+            for review in reviews:
+                submitted_at_raw = review.get("submitted_at")
+                if not submitted_at_raw:
+                    continue
+                submitted_at = datetime.fromisoformat(
+                    submitted_at_raw.replace("Z", "+00:00")
+                )
+                if submitted_at < CUTOFF:
+                    continue
+                login = review["user"]["login"]
+                username = resolve_username(login)
+                if username:
+                    counts[username]["pr_reviews"] += 1
+            await asyncio.sleep(0.1)
+        if total_prs > 0:
+            print()
+
+        member_list = list(counts.keys())
+        total_members = len(member_list)
+        print(f"  [GitHub] Fetching commits for {total_members} members...")
+        for i, username in enumerate(member_list, start=1):
+            print(f"  [GitHub] Commits: {i}/{total_members} ({username})", end="\r")
+            commits = await self._github_get_all_pages(
+                f"{GITHUB_API_BASE}/repos/{repo}/commits",
+                {"author": username, "since": CUTOFF_ISO},
+            )
+            counts[username]["commits"] = len(commits)
+            await asyncio.sleep(0.2)
+        if total_members > 0:
+            print()
+
+        return counts
+
+
+def build_report(
+    team_data: list[dict],
+    discord_to_github: dict[str, str],
+    discord_usernames: dict[str, str],
+    discord_results: dict[str, dict[str, int]],
+    github_results: dict[str, dict[str, dict[str, int]]],
+) -> str:
+    """Build the markdown activity report."""
+    lines = [
+        "# Catch-Up Activity Report",
+        "",
+        f"**Period:** 2026-02-15 00:00 UTC → "
+        f"{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M')} UTC",
+        "",
+        "## Activity by Team",
+        "",
+        "| Discord ID | Discord Username | GitHub Username | Team | "
+        "Discord Messages | PRs Opened | Issues Opened | Issue Comments | "
+        "PR Comments | PR Reviews | Commits |",
+        "|------------|-----------------|-----------------|------|"
+        "-----------------|------------|---------------|----------------|"
+        "-------------|------------|---------|",
+    ]
+
+    for team in team_data:
+        team_name = team["name"]
+        if team_name == "Jade Jasmine":
+            continue
+
+        member_ids = team["leaders"] + team["participants"]
+        team_discord_counts = discord_results.get(team_name, {})
+        team_github_counts = github_results.get(team_name, {})
+
+        for member_id in member_ids:
+            github_username = discord_to_github.get(member_id, "")
+            discord_username = discord_usernames.get(member_id, "*(unknown)*")
+            discord_msg_count = team_discord_counts.get(member_id, 0)
+
+            if github_username:
+                gh = team_github_counts.get(github_username, {})
+                prs = gh.get("prs_opened", 0)
+                issues = gh.get("issues_opened", 0)
+                issue_comments = gh.get("issue_comments", 0)
+                pr_comments = gh.get("pr_comments", 0)
+                pr_reviews = gh.get("pr_reviews", 0)
+                commits = gh.get("commits", 0)
+            else:
+                prs = issues = issue_comments = pr_comments = pr_reviews = commits = (
+                    "N/A"
+                )
+
+            lines.append(
+                f"| {member_id} | {discord_username} | {github_username or 'N/A'} "
+                f"| {team_name} | {discord_msg_count} | {prs} | {issues} "
+                f"| {issue_comments} | {pr_comments} | {pr_reviews} | {commits} |"
+            )
+
+    lines.append("")
+    lines.append(
+        f"*Generated at {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC*"
+    )
+
+    return "\n".join(lines)
+
+
+async def main() -> None:
+    """Run the catch-up activity report."""
+    print("Loading data files...")
+    with open("team_assignments.json") as f:
+        team_data: list[dict] = json.load(f)
+
+    with open("discord_to_github.json") as f:
+        discord_to_github: dict[str, str] = json.load(f)
+
+    print("Getting GitHub token via gh CLI...")
+    github_token = get_github_token()
+
+    print(f"\nCollecting activity since {CUTOFF.isoformat()}...\n")
+
+    discord_results: dict[str, dict[str, int]] = {}
+    github_results: dict[str, dict[str, dict[str, int]]] = {}
+    discord_usernames: dict[str, str] = {}
+
+    async with ActivityCollector(DISCORD_BOT_TOKEN, github_token) as collector:
+        all_member_ids: list[str] = []
+        for team in team_data:
+            if team["name"] == "Jade Jasmine":
+                continue
+            all_member_ids.extend(team["leaders"] + team["participants"])
+
+        unique_member_ids = list(dict.fromkeys(all_member_ids))
+        total_members = len(unique_member_ids)
+        print(f"Fetching Discord usernames for {total_members} members...")
+        for i, member_id in enumerate(unique_member_ids, start=1):
+            if member_id not in discord_usernames:
+                print(f"  username {i}/{total_members}...", end="\r")
+                discord_usernames[member_id] = await collector.get_discord_username(
+                    member_id
+                )
+                await asyncio.sleep(0.3)
+        print(f"  Done — {total_members} usernames fetched.          ")
+
+        for team in team_data:
+            team_name = team["name"]
+            if team_name == "Jade Jasmine":
+                continue
+
+            print(f"\n=== {team_name} ===")
+            channel_id = TEXT_CHANNEL_IDS[team_name]
+            member_ids = team["leaders"] + team["participants"]
+
+            discord_results[team_name] = await collector.collect_discord_counts(
+                team_name, channel_id, member_ids
+            )
+
+            github_usernames_for_team = [
+                discord_to_github[mid]
+                for mid in member_ids
+                if mid in discord_to_github and discord_to_github[mid]
+            ]
+
+            github_results[team_name] = await collector.collect_github_counts(
+                team_name, github_usernames_for_team
+            )
+
+    print("\nBuilding report...")
+    report = build_report(
+        team_data,
+        discord_to_github,
+        discord_usernames,
+        discord_results,
+        github_results,
+    )
+
+    with open(OUTPUT_FILE, "w") as f:
+        f.write(report)
+
+    print(f"\n✅ Report saved to {OUTPUT_FILE}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())