Files
ephemere/python/cohort/generate_member_files.py
T
naomi a40188413a docs: add data file documentation and fix data path resolution
All Python cohort scripts now use DATA_DIR = Path(__file__).parent.parent.parent / "data"
to correctly resolve the repo-root data/ directory regardless of the working directory
set by run.sh. All TypeScript scripts have expanded JSDoc headers documenting data file
requirements and environment variables.
2026-02-23 15:42:03 -08:00

270 lines
8.4 KiB
Python

"""Generate markdown participant and leader profile files for the cohort.
Reads all evaluation data files and produces two markdown files summarising
each member's tech stack, availability, proficiency, and leadership assessment.
Data files (place in data/):
- discord_verification.json Discord ID verification results (from verify_discord.py)
- proficiency_evaluations.json Proficiency scores (from evaluate_technical_proficiency.py)
- availability_analysis.json Availability UTC blocks (from analyse_availability.py)
- leadership_candidates.json List of applicants who expressed interest in leading
- leadership_evaluations.json Leadership assessment scores
Outputs (written to data/):
- participants.md Markdown profile for each participant
- leaders.md Markdown profile for each leader candidate
Env vars:
- None
"""
import json
from pathlib import Path
DATA_DIR = Path(__file__).parent.parent.parent / "data"
BLOCK_EMOJIS = {"mornings": "🌅", "afternoons": "☀️", "evenings": "🌆", "nights": "🌙"}
def load_all_data():
"""Load all evaluation data files"""
with open(DATA_DIR / "discord_verification.json") as f:
verification = json.load(f)
with open(DATA_DIR / "proficiency_evaluations.json") as f:
proficiency = json.load(f)
with open(DATA_DIR / "availability_analysis.json") as f:
availability = json.load(f)
with open(DATA_DIR / "leadership_candidates.json") as f:
candidates = json.load(f)
with open(DATA_DIR / "leadership_evaluations.json") as f:
leadership = json.load(f)
return verification, proficiency, availability, candidates, leadership
def build_lookup_dicts(verification, proficiency, availability, leadership):
"""Build lookup dictionaries by discord_id"""
verified_usernames = {v[0]: v[1] for v in verification["verified"]}
prof_by_id = {p["discord_id"]: p for p in proficiency}
avail_by_id = {a["discord_id"]: a for a in availability}
lead_by_id = {l["discord_id"]: l for l in leadership}
return verified_usernames, prof_by_id, avail_by_id, lead_by_id
def format_availability_blocks(blocks):
"""Format availability blocks with emojis"""
if not blocks:
return "No consistent availability"
formatted = []
for block in ["mornings", "afternoons", "evenings", "nights"]:
if block in blocks:
formatted.append(f"{BLOCK_EMOJIS[block]} {block.capitalize()}")
return ", ".join(formatted)
def format_tech_stack(tech_stack):
"""Format tech stack list"""
if not tech_stack:
return "Not specified"
return ", ".join(sorted(tech_stack))
def generate_participants_md(
non_leader_ids, verified_usernames, prof_by_id, avail_by_id
):
"""Generate participants.md for non-leaders"""
lines = [
"# Cohort Participants",
"",
f"**Total Participants**: {len(non_leader_ids)}",
"",
"---",
"",
]
beginner_count = 0
intermediate_count = 0
advanced_count = 0
for discord_id in sorted(non_leader_ids):
if discord_id not in verified_usernames:
continue
username = verified_usernames.get(discord_id, "Unknown")
prof = prof_by_id.get(discord_id, {})
avail = avail_by_id.get(discord_id, {})
proficiency = prof.get("final_proficiency", "unknown")
tech_stack = prof.get("tech_stack", [])
blocks = avail.get("available_blocks", [])
notes = prof.get("notes", [])
if proficiency == "beginner":
beginner_count += 1
elif proficiency == "intermediate":
intermediate_count += 1
elif proficiency == "advanced":
advanced_count += 1
lines.append(f"## {discord_id}")
lines.append(f"**Username**: @{username}")
lines.append(f"**Technical Proficiency**: {proficiency.capitalize()}")
lines.append(f"**Tech Stack**: {format_tech_stack(tech_stack)}")
lines.append(f"**Availability**: {format_availability_blocks(blocks)}")
if notes:
lines.append(f"**Notes**: {', '.join(notes)}")
lines.append("")
verified_count = len([d for d in non_leader_ids if d in verified_usernames])
summary = [
"# Cohort Participants",
"",
f"**Total Participants**: {verified_count}",
"",
"### Proficiency Breakdown",
f"- Beginner: {beginner_count}",
f"- Intermediate: {intermediate_count}",
f"- Advanced: {advanced_count}",
"",
"---",
"",
]
return "\n".join(summary + lines[6:])
def leadership_fit_label(score):
"""Convert leadership score to label"""
if score >= 6:
return "Excellent"
elif score >= 4:
return "Good"
elif score >= 2:
return "Adequate"
else:
return "Needs Review"
def generate_leaders_md(
leader_ids, verified_usernames, prof_by_id, avail_by_id, lead_by_id
):
"""Generate leaders.md for leadership candidates"""
verified_leaders = [id for id in leader_ids if id in verified_usernames]
lines = [
"# Cohort Leaders",
"",
f"**Total Leaders**: {len(verified_leaders)}",
"",
"---",
"",
]
sorted_leaders = sorted(
verified_leaders,
key=lambda x: lead_by_id.get(x, {}).get("leadership_score", 0),
reverse=True,
)
for discord_id in sorted_leaders:
username = verified_usernames.get(discord_id, "Unknown")
prof = prof_by_id.get(discord_id, {})
avail = avail_by_id.get(discord_id, {})
lead = lead_by_id.get(discord_id, {})
proficiency = prof.get("final_proficiency", "unknown")
tech_stack = prof.get("tech_stack", [])
blocks = avail.get("available_blocks", [])
leadership_score = lead.get("leadership_score", 0)
leadership_fit = lead.get("leadership_fit", "unknown")
leadership_notes = lead.get("notes", [])
prof_notes = prof.get("notes", [])
lines.append(f"## {discord_id}")
lines.append(f"**Username**: @{username}")
fit = leadership_fit.capitalize()
lines.append(f"**Leadership Fit**: {fit} (Score: {leadership_score})")
lines.append(f"**Technical Proficiency**: {proficiency.capitalize()}")
lines.append(f"**Tech Stack**: {format_tech_stack(tech_stack)}")
lines.append(f"**Availability**: {format_availability_blocks(blocks)}")
if leadership_notes:
lines.append(f"**Leadership Notes**: {', '.join(leadership_notes)}")
if prof_notes:
lines.append(f"**Technical Notes**: {', '.join(prof_notes)}")
lines.append("")
excellent = sum(
1
for id in verified_leaders
if lead_by_id.get(id, {}).get("leadership_fit") == "excellent"
)
good = sum(
1
for id in verified_leaders
if lead_by_id.get(id, {}).get("leadership_fit") == "good"
)
adequate = sum(
1
for id in verified_leaders
if lead_by_id.get(id, {}).get("leadership_fit") == "adequate"
)
summary = [
"# Cohort Leaders",
"",
f"**Total Leaders**: {len(verified_leaders)}",
"",
"### Leadership Fit Breakdown",
f"- Excellent: {excellent}",
f"- Good: {good}",
f"- Adequate: {adequate}",
"",
"---",
"",
]
return "\n".join(summary + lines[6:])
def main():
verification, proficiency, availability, candidates, leadership = load_all_data()
verified_usernames, prof_by_id, avail_by_id, lead_by_id = build_lookup_dicts(
verification, proficiency, availability, leadership
)
leader_ids = set(candidates["leaders"])
non_leader_ids = set(candidates["non_leaders"])
verified_ids = set(verified_usernames.keys())
leader_ids = leader_ids & verified_ids
non_leader_ids = non_leader_ids & verified_ids
participants_md = generate_participants_md(
non_leader_ids, verified_usernames, prof_by_id, avail_by_id
)
with open(DATA_DIR / "participants.md", "w") as f:
f.write(participants_md)
print(f"Generated participants.md with {len(non_leader_ids)} participants")
leaders_md = generate_leaders_md(
leader_ids, verified_usernames, prof_by_id, avail_by_id, lead_by_id
)
with open(DATA_DIR / "leaders.md", "w") as f:
f.write(leaders_md)
print(f"Generated leaders.md with {len(leader_ids)} leaders")
if __name__ == "__main__":
main()