ephemere/python/cohort/generate_member_files.py

"""Generate markdown participant and leader profile files for the cohort.

Reads all evaluation data files and produces two markdown files summarising
each member's tech stack, availability, proficiency, and leadership assessment.

Data files (place in data/):
  - discord_verification.json   Discord ID verification results (from verify_discord.py)
  - proficiency_evaluations.json  Proficiency scores (from evaluate_technical_proficiency.py)
  - availability_analysis.json  Availability UTC blocks (from analyse_availability.py)
  - leadership_candidates.json  List of applicants who expressed interest in leading
  - leadership_evaluations.json  Leadership assessment scores

Outputs (written to data/):
  - participants.md  Markdown profile for each participant
  - leaders.md       Markdown profile for each leader candidate

Env vars:
  - None
"""

import json
from pathlib import Path

DATA_DIR = Path(__file__).parent.parent.parent / "data"

BLOCK_EMOJIS = {"mornings": "🌅", "afternoons": "☀️", "evenings": "🌆", "nights": "🌙"}


def load_all_data():
    """Load all evaluation data files"""
    with open(DATA_DIR / "discord_verification.json") as f:
        verification = json.load(f)

    with open(DATA_DIR / "proficiency_evaluations.json") as f:
        proficiency = json.load(f)

    with open(DATA_DIR / "availability_analysis.json") as f:
        availability = json.load(f)

    with open(DATA_DIR / "leadership_candidates.json") as f:
        candidates = json.load(f)

    with open(DATA_DIR / "leadership_evaluations.json") as f:
        leadership = json.load(f)

    return verification, proficiency, availability, candidates, leadership


def build_lookup_dicts(verification, proficiency, availability, leadership):
    """Build lookup dictionaries by discord_id"""
    verified_usernames = {v[0]: v[1] for v in verification["verified"]}

    prof_by_id = {p["discord_id"]: p for p in proficiency}

    avail_by_id = {a["discord_id"]: a for a in availability}

    lead_by_id = {l["discord_id"]: l for l in leadership}

    return verified_usernames, prof_by_id, avail_by_id, lead_by_id


def format_availability_blocks(blocks):
    """Format availability blocks with emojis"""
    if not blocks:
        return "No consistent availability"

    formatted = []
    for block in ["mornings", "afternoons", "evenings", "nights"]:
        if block in blocks:
            formatted.append(f"{BLOCK_EMOJIS[block]} {block.capitalize()}")
    return ", ".join(formatted)


def format_tech_stack(tech_stack):
    """Format tech stack list"""
    if not tech_stack:
        return "Not specified"
    return ", ".join(sorted(tech_stack))


def generate_participants_md(
    non_leader_ids, verified_usernames, prof_by_id, avail_by_id
):
    """Generate participants.md for non-leaders"""
    lines = [
        "# Cohort Participants",
        "",
        f"**Total Participants**: {len(non_leader_ids)}",
        "",
        "---",
        "",
    ]

    beginner_count = 0
    intermediate_count = 0
    advanced_count = 0

    for discord_id in sorted(non_leader_ids):
        if discord_id not in verified_usernames:
            continue

        username = verified_usernames.get(discord_id, "Unknown")
        prof = prof_by_id.get(discord_id, {})
        avail = avail_by_id.get(discord_id, {})

        proficiency = prof.get("final_proficiency", "unknown")
        tech_stack = prof.get("tech_stack", [])
        blocks = avail.get("available_blocks", [])
        notes = prof.get("notes", [])

        if proficiency == "beginner":
            beginner_count += 1
        elif proficiency == "intermediate":
            intermediate_count += 1
        elif proficiency == "advanced":
            advanced_count += 1

        lines.append(f"## {discord_id}")
        lines.append(f"**Username**: @{username}")
        lines.append(f"**Technical Proficiency**: {proficiency.capitalize()}")
        lines.append(f"**Tech Stack**: {format_tech_stack(tech_stack)}")
        lines.append(f"**Availability**: {format_availability_blocks(blocks)}")
        if notes:
            lines.append(f"**Notes**: {', '.join(notes)}")
        lines.append("")

    verified_count = len([d for d in non_leader_ids if d in verified_usernames])
    summary = [
        "# Cohort Participants",
        "",
        f"**Total Participants**: {verified_count}",
        "",
        "### Proficiency Breakdown",
        f"- Beginner: {beginner_count}",
        f"- Intermediate: {intermediate_count}",
        f"- Advanced: {advanced_count}",
        "",
        "---",
        "",
    ]

    return "\n".join(summary + lines[6:])


def leadership_fit_label(score):
    """Convert leadership score to label"""
    if score >= 6:
        return "Excellent"
    elif score >= 4:
        return "Good"
    elif score >= 2:
        return "Adequate"
    else:
        return "Needs Review"


def generate_leaders_md(
    leader_ids, verified_usernames, prof_by_id, avail_by_id, lead_by_id
):
    """Generate leaders.md for leadership candidates"""
    verified_leaders = [id for id in leader_ids if id in verified_usernames]

    lines = [
        "# Cohort Leaders",
        "",
        f"**Total Leaders**: {len(verified_leaders)}",
        "",
        "---",
        "",
    ]

    sorted_leaders = sorted(
        verified_leaders,
        key=lambda x: lead_by_id.get(x, {}).get("leadership_score", 0),
        reverse=True,
    )

    for discord_id in sorted_leaders:
        username = verified_usernames.get(discord_id, "Unknown")
        prof = prof_by_id.get(discord_id, {})
        avail = avail_by_id.get(discord_id, {})
        lead = lead_by_id.get(discord_id, {})

        proficiency = prof.get("final_proficiency", "unknown")
        tech_stack = prof.get("tech_stack", [])
        blocks = avail.get("available_blocks", [])

        leadership_score = lead.get("leadership_score", 0)
        leadership_fit = lead.get("leadership_fit", "unknown")
        leadership_notes = lead.get("notes", [])
        prof_notes = prof.get("notes", [])

        lines.append(f"## {discord_id}")
        lines.append(f"**Username**: @{username}")
        fit = leadership_fit.capitalize()
        lines.append(f"**Leadership Fit**: {fit} (Score: {leadership_score})")
        lines.append(f"**Technical Proficiency**: {proficiency.capitalize()}")
        lines.append(f"**Tech Stack**: {format_tech_stack(tech_stack)}")
        lines.append(f"**Availability**: {format_availability_blocks(blocks)}")
        if leadership_notes:
            lines.append(f"**Leadership Notes**: {', '.join(leadership_notes)}")
        if prof_notes:
            lines.append(f"**Technical Notes**: {', '.join(prof_notes)}")
        lines.append("")

    excellent = sum(
        1
        for id in verified_leaders
        if lead_by_id.get(id, {}).get("leadership_fit") == "excellent"
    )
    good = sum(
        1
        for id in verified_leaders
        if lead_by_id.get(id, {}).get("leadership_fit") == "good"
    )
    adequate = sum(
        1
        for id in verified_leaders
        if lead_by_id.get(id, {}).get("leadership_fit") == "adequate"
    )

    summary = [
        "# Cohort Leaders",
        "",
        f"**Total Leaders**: {len(verified_leaders)}",
        "",
        "### Leadership Fit Breakdown",
        f"- Excellent: {excellent}",
        f"- Good: {good}",
        f"- Adequate: {adequate}",
        "",
        "---",
        "",
    ]

    return "\n".join(summary + lines[6:])


def main():
    verification, proficiency, availability, candidates, leadership = load_all_data()

    verified_usernames, prof_by_id, avail_by_id, lead_by_id = build_lookup_dicts(
        verification, proficiency, availability, leadership
    )

    leader_ids = set(candidates["leaders"])
    non_leader_ids = set(candidates["non_leaders"])

    verified_ids = set(verified_usernames.keys())
    leader_ids = leader_ids & verified_ids
    non_leader_ids = non_leader_ids & verified_ids

    participants_md = generate_participants_md(
        non_leader_ids, verified_usernames, prof_by_id, avail_by_id
    )
    with open(DATA_DIR / "participants.md", "w") as f:
        f.write(participants_md)
    print(f"Generated participants.md with {len(non_leader_ids)} participants")

    leaders_md = generate_leaders_md(
        leader_ids, verified_usernames, prof_by_id, avail_by_id, lead_by_id
    )
    with open(DATA_DIR / "leaders.md", "w") as f:
        f.write(leaders_md)
    print(f"Generated leaders.md with {len(leader_ids)} leaders")


if __name__ == "__main__":
    main()