docs: add data file documentation and fix data path resolution

All Python cohort scripts now use DATA_DIR = Path(__file__).parent.parent.parent / "data"
to correctly resolve the repo-root data/ directory regardless of the working directory
set by run.sh. All TypeScript scripts have expanded JSDoc headers documenting data file
requirements and environment variables.
This commit is contained in:
2026-02-23 15:42:03 -08:00
parent 4fdb5d06f1
commit a40188413a
39 changed files with 424 additions and 53 deletions
+17 -3
View File
@@ -1,15 +1,29 @@
#!/usr/bin/env python3
"""Add GitHub users to their appropriate teams in nhcarrigan-spring-2026-cohort org"""
"""Add GitHub users to their appropriate teams in the cohort GitHub organisation.
Uses the GitHub CLI to add each member to their corresponding team and, for
leaders, to the team's leaders sub-team.
Data files (place in data/):
- team_assignments.json Team rosters with leaders and participants per team
- discord_to_github.json Mapping of Discord IDs to GitHub usernames
Env vars:
- None (uses `gh` CLI for authentication)
"""
import json
import subprocess
import time
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
# Load team assignments and Discord to GitHub mappings
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
teams = json.load(f)
with open("discord_to_github.json") as f:
with open(DATA_DIR / "discord_to_github.json") as f:
discord_to_github = json.load(f)
# Map team names to GitHub team slugs
+23 -3
View File
@@ -1,6 +1,26 @@
"""Analyse applicant availability from a markdown table and produce UTC block stats.
Reads a markdown table of availability responses and a Discord verification file,
then produces a JSON summary of coverage across morning/afternoon/evening UTC blocks
for each day of the week.
Data files (place in data/):
- table.md Markdown table of applicant availability responses
- discord_verification.json Discord ID verification results (from verify_discord.py)
Outputs (written to data/):
- availability_analysis.json UTC block distribution per applicant
Env vars:
- None
"""
import json
import re
from collections import defaultdict
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
@@ -99,7 +119,7 @@ def analyze_applicant_availability(timezone_str: str, day_slots: dict) -> dict:
def parse_table_md() -> list[dict]:
"""Parse table.md and extract availability data"""
with open("table.md") as f:
with open(DATA_DIR / "table.md") as f:
content = f.read()
lines = content.strip().split("\n")
@@ -131,7 +151,7 @@ def parse_table_md() -> list[dict]:
def main():
with open("discord_verification.json") as f:
with open(DATA_DIR / "discord_verification.json") as f:
verification = json.load(f)
verified_ids = {v[0] for v in verification["verified"]}
@@ -167,7 +187,7 @@ def main():
}
)
with open("availability_analysis.json", "w") as f:
with open(DATA_DIR / "availability_analysis.json", "w") as f:
json.dump(availability_results, f, indent=2)
block_distribution = defaultdict(int)
+4 -1
View File
@@ -6,9 +6,12 @@ Respects Discord rate limits with proper backoff and retry logic.
import json
import os
import time
from pathlib import Path
import requests
DATA_DIR = Path(__file__).parent.parent.parent / "data"
BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
GUILD_ID = "692816967895220344"
COHORT_ROLE_ID = "1464314780935258112"
@@ -48,7 +51,7 @@ def assign_role_with_retry(user_id: str, role_id: str, max_retries: int = 5) ->
def main():
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
teams = json.load(f)
all_users = []
+4 -1
View File
@@ -6,9 +6,12 @@ Respects Discord rate limits with proper backoff and retry logic.
import json
import os
import time
from pathlib import Path
import requests
DATA_DIR = Path(__file__).parent.parent.parent / "data"
BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
GUILD_ID = "692816967895220344"
@@ -64,7 +67,7 @@ def assign_role_with_retry(user_id: str, role_id: str, max_retries: int = 5) ->
def main():
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
teams = json.load(f)
print(f"Assigning team roles to {len(teams)} teams...")
+5 -2
View File
@@ -11,9 +11,12 @@ import json
import os
import subprocess
from datetime import datetime, timezone
from pathlib import Path
import aiohttp
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DISCORD_BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
DISCORD_API_BASE = "https://discord.com/api/v10"
GITHUB_API_BASE = "https://api.github.com"
@@ -440,10 +443,10 @@ def build_report(
async def main() -> None:
"""Run the catch-up activity report."""
print("Loading data files...")
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
team_data: list[dict] = json.load(f)
with open("discord_to_github.json") as f:
with open(DATA_DIR / "discord_to_github.json") as f:
discord_to_github: dict[str, str] = json.load(f)
print("Getting GitHub token via gh CLI...")
+13 -2
View File
@@ -1,4 +1,15 @@
"""Quick dry-run to check Discord message lengths before sending."""
"""Dry-run check of Discord message lengths before sending the activity report.
Parses the catch_up_report.md table, formats each team's data into a monospace
Discord table, and reports whether any message would exceed Discord's 2000-char limit.
Run this before send_activity_report.py to catch length issues early.
Data files (place in data/):
- catch_up_report.md Activity report generated by catch_up_report.py
Env vars:
- None
"""
FIELDS = [
("Discord Username", "Name", 18),
@@ -11,7 +22,7 @@ FIELDS = [
("Commits", "Commits", 7),
]
REPORT_PATH = "catch_up_report.md"
REPORT_PATH = "data/catch_up_report.md"
def parse_report(path: str) -> dict[str, list[dict]]:
+18 -4
View File
@@ -1,6 +1,17 @@
#!/usr/bin/env python3
"""Discord Team Activity Checker
Checks for team members who haven't sent messages in their channels within 36 hours
"""Check for team members who have not sent a message in their channel within 36 hours.
Scans each team's Discord channel and flags members with no recent activity.
Optionally sends a direct mention message to inactive members.
Data files (place in data/):
- team_assignments.json Team rosters with leaders and participants per team
Outputs (written to data/):
- discord_activity_report.json Inactive members per team with last-seen timestamps
Env vars:
- DISCORD_BOT_TOKEN Bot token for the Discord API
"""
import asyncio
@@ -8,16 +19,19 @@ import json
import os
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
import aiohttp
DATA_DIR = Path(__file__).parent.parent.parent / "data"
# Configuration
DISCORD_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
DISCORD_API_BASE = "https://discord.com/api/v10"
INACTIVE_THRESHOLD_HOURS = 36
# Load team assignments from file
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
team_data = json.load(f)
# Build TEAMS dictionary with channel IDs and member lists
@@ -233,7 +247,7 @@ async def main():
print("\n" + "=" * 80)
# Save results to JSON
with open("discord_activity_report.json", "w") as f:
with open(DATA_DIR / "discord_activity_report.json", "w") as f:
json.dump(
{
"generated_at": datetime.now(timezone.utc).isoformat(),
@@ -1,8 +1,27 @@
"""Evaluate the technical proficiency of cohort applicants using their GitHub profiles.
Fetches each applicant's public GitHub repositories and scores their proficiency as
Beginner, Intermediate, or Advanced based on language variety, repo count, commit
activity, and presence of certain technologies.
Data files (place in data/):
- applicants_to_evaluate.json List of applicants with GitHub usernames
Outputs (written to data/):
- proficiency_evaluations.json Proficiency scores and tech stacks per applicant
Env vars:
- None (uses public GitHub API; may be rate-limited without authentication)
"""
import json
import re
import time
import urllib.error
import urllib.request
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
# GitHub API (no auth needed for public repos, but rate limited)
GITHUB_API = "https://api.github.com"
@@ -234,7 +253,7 @@ def evaluate_applicant(applicant: dict, index: int, total: int) -> dict:
def main():
# Load applicants
with open("applicants_to_evaluate.json") as f:
with open(DATA_DIR / "applicants_to_evaluate.json") as f:
applicants = json.load(f)
print(f"Evaluating {len(applicants)} applicants...\n")
@@ -249,7 +268,7 @@ def main():
print(f" Progress: {i + 1}/{len(applicants)} complete")
# Save results
with open("proficiency_evaluations.json", "w") as f:
with open(DATA_DIR / "proficiency_evaluations.json", "w") as f:
json.dump(evaluations, f, indent=2)
# Summary
+10 -2
View File
@@ -1,7 +1,15 @@
#!/usr/bin/env python3
"""Fix permissions for cohort-team-michael-and-yoon channel.
"""Fix Send Messages / Send Messages in Threads permissions for a Discord channel.
Deny Send Messages in Threads for @everyone and @cohort.
Denies Send Messages and Send Messages in Threads for both @everyone and the
@cohort role on the target channel. Update CHANNEL_ID and COHORT_ROLE_ID before
running.
Data files (place in data/):
- None
Env vars:
- DISCORD_BOT_TOKEN Bot token for the Discord API
"""
import asyncio
+30 -7
View File
@@ -1,23 +1,46 @@
"""Generate markdown participant and leader profile files for the cohort.
Reads all evaluation data files and produces two markdown files summarising
each member's tech stack, availability, proficiency, and leadership assessment.
Data files (place in data/):
- discord_verification.json Discord ID verification results (from verify_discord.py)
- proficiency_evaluations.json Proficiency scores (from evaluate_technical_proficiency.py)
- availability_analysis.json Availability UTC blocks (from analyse_availability.py)
- leadership_candidates.json List of applicants who expressed interest in leading
- leadership_evaluations.json Leadership assessment scores
Outputs (written to data/):
- participants.md Markdown profile for each participant
- leaders.md Markdown profile for each leader candidate
Env vars:
- None
"""
import json
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
BLOCK_EMOJIS = {"mornings": "🌅", "afternoons": "☀️", "evenings": "🌆", "nights": "🌙"}
def load_all_data():
"""Load all evaluation data files"""
with open("discord_verification.json") as f:
with open(DATA_DIR / "discord_verification.json") as f:
verification = json.load(f)
with open("proficiency_evaluations.json") as f:
with open(DATA_DIR / "proficiency_evaluations.json") as f:
proficiency = json.load(f)
with open("availability_analysis.json") as f:
with open(DATA_DIR / "availability_analysis.json") as f:
availability = json.load(f)
with open("leadership_candidates.json") as f:
with open(DATA_DIR / "leadership_candidates.json") as f:
candidates = json.load(f)
with open("leadership_evaluations.json") as f:
with open(DATA_DIR / "leadership_evaluations.json") as f:
leadership = json.load(f)
return verification, proficiency, availability, candidates, leadership
@@ -230,14 +253,14 @@ def main():
participants_md = generate_participants_md(
non_leader_ids, verified_usernames, prof_by_id, avail_by_id
)
with open("participants.md", "w") as f:
with open(DATA_DIR / "participants.md", "w") as f:
f.write(participants_md)
print(f"Generated participants.md with {len(non_leader_ids)} participants")
leaders_md = generate_leaders_md(
leader_ids, verified_usernames, prof_by_id, avail_by_id, lead_by_id
)
with open("leaders.md", "w") as f:
with open(DATA_DIR / "leaders.md", "w") as f:
f.write(leaders_md)
print(f"Generated leaders.md with {len(leader_ids)} leaders")
+16 -1
View File
@@ -1,5 +1,20 @@
"""Generate hourly timeslot JSON for use with Crabfit scheduling tool.
Produces a list of ISO-format datetime strings covering every hour across the
scheduling window. Update the start_date and end_date constants before running.
Outputs (written to data/):
- crabfit_timeslots.json List of hourly timeslot strings
Env vars:
- None
"""
import json
from datetime import datetime, timedelta
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
# Generate hourly time slots from Feb 1 to March 3, 2026
# 24 hours a day, America/Los_Angeles timezone
@@ -18,7 +33,7 @@ print(f"First: {times[0]}")
print(f"Last: {times[-1]}")
# Save to file for use
with open("/home/naomi/docs/cohort/crabfit_timeslots.json", "w") as f:
with open(DATA_DIR / "crabfit_timeslots.json", "w") as f:
json.dump(times, f)
print("Saved to crabfit_timeslots.json")
+4 -1
View File
@@ -4,9 +4,12 @@
import asyncio
import json
import os
from pathlib import Path
import aiohttp
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DISCORD_BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
GUILD_ID = "692816967895220344"
COHORT_ROLE_ID = "1464314780935258112"
@@ -72,7 +75,7 @@ async def main() -> None:
f"{i}. {member['display_name']} (@{member['username']}) - ID: {member['id']}" # noqa: E501
)
with open("active_cohort_members.json", "w") as f:
with open(DATA_DIR / "active_cohort_members.json", "w") as f:
json.dump(cohort_members, f, indent=2)
print("\nSaved to active_cohort_members.json")
+11 -1
View File
@@ -1,5 +1,15 @@
#!/usr/bin/env python3
"""Remove cohort and team roles from inactive Discord members."""
"""Remove the Cohort and team-specific Discord roles from a list of members.
Update INACTIVE_MEMBERS and MEMBER_TO_TEAM before running to target the correct
members. Removes both the cohort-wide role and the member's team role.
Data files (place in data/):
- None (member IDs and team mappings are defined as constants in the script)
Env vars:
- DISCORD_BOT_TOKEN Bot token for the Discord API
"""
import asyncio
import os
+9 -6
View File
@@ -18,9 +18,12 @@ import json
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
import aiohttp
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DISCORD_BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
DISCORD_API_BASE = "https://discord.com/api/v10"
DISCORD_GUILD_ID = "692816967895220344"
@@ -71,7 +74,7 @@ class MemberRemover:
def find_member_info(self) -> bool:
"""Find which team the member is on and their role."""
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
teams = json.load(f)
for team in teams:
@@ -88,7 +91,7 @@ class MemberRemover:
def remove_from_team_assignments(self) -> None:
"""Remove member from team_assignments.json."""
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
teams = json.load(f)
for team in teams:
@@ -103,19 +106,19 @@ class MemberRemover:
f"✅ Removed from {team['name']} participants in team_assignments.json"
)
with open("team_assignments.json", "w") as f:
with open(DATA_DIR / "team_assignments.json", "w") as f:
json.dump(teams, f, indent=2)
def remove_from_discord_to_github(self) -> None:
"""Remove member from discord_to_github.json."""
with open("discord_to_github.json") as f:
with open(DATA_DIR / "discord_to_github.json") as f:
mappings = json.load(f)
if self.discord_id in mappings:
self.github_username = mappings[self.discord_id]
del mappings[self.discord_id]
with open("discord_to_github.json", "w") as f:
with open(DATA_DIR / "discord_to_github.json", "w") as f:
json.dump(mappings, f, indent=2)
print(f"✅ Removed {self.github_username} from discord_to_github.json")
@@ -135,7 +138,7 @@ class MemberRemover:
async def remove_discord_roles(self) -> None:
"""Remove Discord roles from the member."""
with open("team_message_ids.json") as f:
with open(DATA_DIR / "team_message_ids.json") as f:
team_message_data = json.load(f)
if self.team_name not in team_message_data:
+5 -2
View File
@@ -6,6 +6,9 @@ before running this script.
"""
import json
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
# Discord IDs of members who have resigned - update before running
RESIGNED_IDS: list[str] = []
@@ -13,7 +16,7 @@ RESIGNED_IDS: list[str] = []
def main() -> None:
"""Remove resigned members from team assignments."""
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
teams = json.load(f)
changes_made = []
@@ -36,7 +39,7 @@ def main() -> None:
changes_made.append(f"Removed {removed} participant(s) from {team_name}")
print(f"⚠️ {team_name}: Proficiency distribution may need updating")
with open("team_assignments.json", "w") as f:
with open(DATA_DIR / "team_assignments.json", "w") as f:
json.dump(teams, f, indent=2)
if changes_made:
+14 -2
View File
@@ -1,4 +1,14 @@
"""Send formatted activity report tables to each team channel via Amari bot."""
"""Send formatted activity report tables to each team's Discord channel.
Parses catch_up_report.md and posts a monospace table of each member's Discord
and GitHub activity stats to their respective team channel.
Data files (place in data/):
- catch_up_report.md Activity report generated by catch_up_report.py
Env vars:
- DISCORD_BOT_TOKEN Bot token for the Discord API
"""
import asyncio
import os
@@ -25,7 +35,9 @@ CHANNEL_IDS = {
"Sage Marigold": "1464316850669093040",
}
REPORT_PATH = os.path.join(os.path.dirname(__file__), "catch_up_report.md")
REPORT_PATH = os.path.join(
os.path.dirname(__file__), "..", "..", "data", "catch_up_report.md"
)
FIELDS = [
("Discord Username", "Name", 18),
+16 -2
View File
@@ -1,11 +1,25 @@
"""Send check-in messages to all team channels."""
"""Send biweekly check-in messages to all team Discord channels.
Posts a check-in prompt to each team channel and automatically creates a thread
for responses. Members who do not respond by the deadline may be removed for
inactivity.
Data files (place in data/):
- team_message_ids.json Channel IDs per team (generated by send_team_messages.py)
Env vars:
- DISCORD_BOT_TOKEN Bot token for the Discord API
"""
import asyncio
import json
import os
from pathlib import Path
import aiohttp
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DISCORD_BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
DISCORD_API = "https://discord.com/api/v10"
GUILD_ID = "692816967895220344"
@@ -22,7 +36,7 @@ CHECK_IN_MESSAGE = """@everyone it is time for Naomi to do a check in. I need **
async def send_checkin_to_teams() -> None:
"""Send check-in message to all team channels (except Jade Jasmine)."""
with open("team_message_ids.json") as f:
with open(DATA_DIR / "team_message_ids.json") as f:
team_data = json.load(f)
headers = {
+17 -3
View File
@@ -1,12 +1,26 @@
#!/usr/bin/env python3
"""Send team check-in messages to all 14 teams."""
"""Send a capacity check-in message to each team channel.
Posts a message asking whether the team feels able to complete their project
given their current member count, and invites them to request support if needed.
Data files (place in data/):
- team_message_ids.json Channel and role IDs per team (from send_team_messages.py)
- team_assignments.json Team rosters used to report current member counts
Env vars:
- DISCORD_BOT_TOKEN Bot token for the Discord API
"""
import asyncio
import json
import os
from pathlib import Path
import aiohttp
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DISCORD_BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
NAOMI_ID = "465650873650118659"
@@ -58,10 +72,10 @@ async def send_message(
async def main() -> None:
"""Send check-in messages to all teams."""
with open("team_message_ids.json") as f:
with open(DATA_DIR / "team_message_ids.json") as f:
team_data = json.load(f)
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
teams = json.load(f)
async with aiohttp.ClientSession() as session:
+22 -3
View File
@@ -1,10 +1,29 @@
"""Send initial welcome and roster messages to all team Discord channels.
Creates a pinned roster message in each team channel and stores the resulting
message ID, channel ID, and role ID in team_message_ids.json for use by
other scripts (send_checkin.py, update_roster_messages.py, etc.).
Data files (place in data/):
- team_assignments.json Team rosters with leaders and participants per team
- applicants_to_evaluate.json Applicant data including Discord channel/role IDs
Outputs (written to data/):
- team_message_ids.json Channel ID, message ID, and role ID per team
Env vars:
- DISCORD_BOT_TOKEN Bot token for the Discord API
"""
import json
import os
import time
from pathlib import Path
import requests
# Amari's bot token
DATA_DIR = Path(__file__).parent.parent.parent / "data"
TOKEN = os.environ["DISCORD_BOT_TOKEN"]
GUILD_ID = "692816967895220344"
@@ -72,12 +91,12 @@ TEAMS = {
}
# Load team assignments and convert to dict by team name
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
team_list = json.load(f)
team_data = {team["name"]: team for team in team_list}
# Load applicants to get project_url by discord_id
with open("applicants_to_evaluate.json") as f:
with open(DATA_DIR / "applicants_to_evaluate.json") as f:
applicants = json.load(f)
applicant_lookup = {str(a["discord_id"]): a for a in applicants}
+6 -3
View File
@@ -3,9 +3,12 @@
import asyncio
import json
import os
from pathlib import Path
import aiohttp
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DISCORD_BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
GUILD_ID = "692816967895220344"
@@ -62,13 +65,13 @@ def generate_roster(team: dict, discord_to_github: dict) -> str:
async def main() -> None:
"""Update roster messages for all teams."""
with open("team_message_ids.json") as f:
with open(DATA_DIR / "team_message_ids.json") as f:
team_data = json.load(f)
with open("team_assignments.json") as f:
with open(DATA_DIR / "team_assignments.json") as f:
teams = json.load(f)
with open("discord_to_github.json") as f:
with open(DATA_DIR / "discord_to_github.json") as f:
discord_to_github = json.load(f)
async with aiohttp.ClientSession() as session:
+20 -2
View File
@@ -1,8 +1,26 @@
"""Verify Discord user IDs from a markdown table of applicant data.
Reads a markdown table containing Discord IDs and checks each one against the
Discord API to confirm the user exists. Handles rate limits automatically.
Data files (place in data/):
- table.md Markdown table of applicants including a Discord ID column
Outputs (written to data/):
- discord_verification.json Verification result (valid/invalid) per Discord ID
Env vars:
- DISCORD_BOT_TOKEN Bot token for the Discord API
"""
import json
import os
import time
import urllib.error
import urllib.request
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
# Configuration
BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
@@ -10,7 +28,7 @@ GUILD_ID = "692816967895220344"
BASE_URL = "https://discord.com/api/v10"
# Read Discord IDs from table.md
with open("table.md") as f:
with open(DATA_DIR / "table.md") as f:
content = f.read()
lines = content.strip().split("\n")
@@ -104,7 +122,7 @@ print(f"Missing: {len(missing)}")
print(f"Errors: {len(errors)}")
# Save results
with open("discord_verification.json", "w") as f:
with open(DATA_DIR / "discord_verification.json", "w") as f:
json.dump({"verified": verified, "missing": missing, "errors": errors}, f, indent=2)
print("\nResults saved to discord_verification.json")