ephemere/python/cohort/check_lengths.py

"""Dry-run check of Discord message lengths before sending the activity report.

Parses the catch_up_report.md table, formats each team's data into a monospace
Discord table, and reports whether any message would exceed Discord's 2000-char limit.
Run this before send_activity_report.py to catch length issues early.

Data files (place in data/):
  - catch_up_report.md  Activity report generated by catch_up_report.py

Env vars:
  - None
"""

FIELDS = [
    ("Discord Username", "Name", 18),
    ("Discord Messages", "Msgs", 5),
    ("PRs Opened", "PRs", 4),
    ("Issues Opened", "Issues", 6),
    ("Issue Comments", "Issue♟", 7),
    ("PR Comments", "PR♟", 5),
    ("PR Reviews", "Reviews", 7),
    ("Commits", "Commits", 7),
]

REPORT_PATH = "data/catch_up_report.md"


def parse_report(path: str) -> dict[str, list[dict]]:
    """Parse the markdown table from catch_up_report.md into team → rows."""
    teams: dict[str, list[dict]] = {}
    with open(path, encoding="utf-8") as f:
        lines = f.readlines()
    header_line = None
    for i, line in enumerate(lines):
        if line.startswith("| Discord ID |"):
            header_line = i
            break
    headers = [h.strip() for h in lines[header_line].strip().strip("|").split("|")]
    for line in lines[header_line + 2 :]:
        line = line.strip()
        if not line.startswith("|"):
            break
        vals = [v.strip() for v in line.strip().strip("|").split("|")]
        row = dict(zip(headers, vals))
        teams.setdefault(row["Team"], []).append(row)
    return teams


def format_table(members: list[dict]) -> str:
    """Format a team's member list as a monospace table for Discord."""
    members = sorted(members, key=lambda r: int(r["Discord Messages"]), reverse=True)
    col_widths = [w for _, _, w in FIELDS]
    col_headers = [h for _, h, _ in FIELDS]
    max_name = max(len(m["Discord Username"]) for m in members)
    col_widths[0] = max(col_widths[0], max_name)

    def pad(val: str, width: int, right_align: bool = False) -> str:
        return val.rjust(width) if right_align else val.ljust(width)

    header_row = "  ".join(
        pad(col_headers[i], col_widths[i], right_align=(i > 0))
        for i in range(len(FIELDS))
    )
    separator = "  ".join("-" * w for w in col_widths)
    rows = []
    for m in members:
        vals = [m[key] for key, _, _ in FIELDS]
        row = "  ".join(
            pad(vals[i], col_widths[i], right_align=(i > 0)) for i in range(len(FIELDS))
        )
        rows.append(row)
    return "\n".join([header_row, separator] + rows)


def main() -> None:
    """Check Discord message lengths for all teams."""
    teams = parse_report(REPORT_PATH)
    for team, members in teams.items():
        table = format_table(members)
        msg = f"**{team} — Activity Report (Feb 15–23)**\n```\n{table}\n```"
        status = "OK" if len(msg) <= 2000 else f"OVER by {len(msg) - 2000}"
        print(f"{team}: {len(msg)} chars — {status}")


if __name__ == "__main__":
    main()