docs: add data file documentation and fix data path resolution

All Python cohort scripts now use DATA_DIR = Path(__file__).parent.parent.parent / "data"
to correctly resolve the repo-root data/ directory regardless of the working directory
set by run.sh. All TypeScript scripts have expanded JSDoc headers documenting data file
requirements and environment variables.
This commit is contained in:
2026-02-23 15:42:03 -08:00
parent 4fdb5d06f1
commit a40188413a
39 changed files with 424 additions and 53 deletions
+23 -3
View File
@@ -1,6 +1,26 @@
"""Analyse applicant availability from a markdown table and produce UTC block stats.
Reads a markdown table of availability responses and a Discord verification file,
then produces a JSON summary of coverage across morning/afternoon/evening UTC blocks
for each day of the week.
Data files (place in data/):
- table.md Markdown table of applicant availability responses
- discord_verification.json Discord ID verification results (from verify_discord.py)
Outputs (written to data/):
- availability_analysis.json UTC block distribution per applicant
Env vars:
- None
"""
import json
import re
from collections import defaultdict
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
DAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
@@ -99,7 +119,7 @@ def analyze_applicant_availability(timezone_str: str, day_slots: dict) -> dict:
def parse_table_md() -> list[dict]:
"""Parse table.md and extract availability data"""
with open("table.md") as f:
with open(DATA_DIR / "table.md") as f:
content = f.read()
lines = content.strip().split("\n")
@@ -131,7 +151,7 @@ def parse_table_md() -> list[dict]:
def main():
with open("discord_verification.json") as f:
with open(DATA_DIR / "discord_verification.json") as f:
verification = json.load(f)
verified_ids = {v[0] for v in verification["verified"]}
@@ -167,7 +187,7 @@ def main():
}
)
with open("availability_analysis.json", "w") as f:
with open(DATA_DIR / "availability_analysis.json", "w") as f:
json.dump(availability_results, f, indent=2)
block_distribution = defaultdict(int)