docs: add data file documentation and fix data path resolution

All Python cohort scripts now use DATA_DIR = Path(__file__).parent.parent.parent / "data"
to correctly resolve the repo-root data/ directory regardless of the working directory
set by run.sh. All TypeScript scripts have expanded JSDoc headers documenting data file
requirements and environment variables.
This commit is contained in:
2026-02-23 15:42:03 -08:00
parent 4fdb5d06f1
commit a40188413a
39 changed files with 424 additions and 53 deletions
@@ -1,8 +1,27 @@
"""Evaluate the technical proficiency of cohort applicants using their GitHub profiles.
Fetches each applicant's public GitHub repositories and scores their proficiency as
Beginner, Intermediate, or Advanced based on language variety, repo count, commit
activity, and presence of certain technologies.
Data files (place in data/):
- applicants_to_evaluate.json List of applicants with GitHub usernames
Outputs (written to data/):
- proficiency_evaluations.json Proficiency scores and tech stacks per applicant
Env vars:
- None (uses public GitHub API; may be rate-limited without authentication)
"""
import json
import re
import time
import urllib.error
import urllib.request
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
# GitHub API (no auth needed for public repos, but rate limited)
GITHUB_API = "https://api.github.com"
@@ -234,7 +253,7 @@ def evaluate_applicant(applicant: dict, index: int, total: int) -> dict:
def main():
# Load applicants
with open("applicants_to_evaluate.json") as f:
with open(DATA_DIR / "applicants_to_evaluate.json") as f:
applicants = json.load(f)
print(f"Evaluating {len(applicants)} applicants...\n")
@@ -249,7 +268,7 @@ def main():
print(f" Progress: {i + 1}/{len(applicants)} complete")
# Save results
with open("proficiency_evaluations.json", "w") as f:
with open(DATA_DIR / "proficiency_evaluations.json", "w") as f:
json.dump(evaluations, f, indent=2)
# Summary