Files
ephemere/python/cohort/check_lengths.py
T
naomi a40188413a docs: add data file documentation and fix data path resolution
All Python cohort scripts now use DATA_DIR = Path(__file__).parent.parent.parent / "data"
to correctly resolve the repo-root data/ directory regardless of the working directory
set by run.sh. All TypeScript scripts have expanded JSDoc headers documenting data file
requirements and environment variables.
2026-02-23 15:42:03 -08:00

87 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Dry-run check of Discord message lengths before sending the activity report.
Parses the catch_up_report.md table, formats each team's data into a monospace
Discord table, and reports whether any message would exceed Discord's 2000-char limit.
Run this before send_activity_report.py to catch length issues early.
Data files (place in data/):
- catch_up_report.md Activity report generated by catch_up_report.py
Env vars:
- None
"""
FIELDS = [
("Discord Username", "Name", 18),
("Discord Messages", "Msgs", 5),
("PRs Opened", "PRs", 4),
("Issues Opened", "Issues", 6),
("Issue Comments", "Issue♟", 7),
("PR Comments", "PR♟", 5),
("PR Reviews", "Reviews", 7),
("Commits", "Commits", 7),
]
REPORT_PATH = "data/catch_up_report.md"
def parse_report(path: str) -> dict[str, list[dict]]:
"""Parse the markdown table from catch_up_report.md into team → rows."""
teams: dict[str, list[dict]] = {}
with open(path, encoding="utf-8") as f:
lines = f.readlines()
header_line = None
for i, line in enumerate(lines):
if line.startswith("| Discord ID |"):
header_line = i
break
headers = [h.strip() for h in lines[header_line].strip().strip("|").split("|")]
for line in lines[header_line + 2 :]:
line = line.strip()
if not line.startswith("|"):
break
vals = [v.strip() for v in line.strip().strip("|").split("|")]
row = dict(zip(headers, vals))
teams.setdefault(row["Team"], []).append(row)
return teams
def format_table(members: list[dict]) -> str:
"""Format a team's member list as a monospace table for Discord."""
members = sorted(members, key=lambda r: int(r["Discord Messages"]), reverse=True)
col_widths = [w for _, _, w in FIELDS]
col_headers = [h for _, h, _ in FIELDS]
max_name = max(len(m["Discord Username"]) for m in members)
col_widths[0] = max(col_widths[0], max_name)
def pad(val: str, width: int, right_align: bool = False) -> str:
return val.rjust(width) if right_align else val.ljust(width)
header_row = " ".join(
pad(col_headers[i], col_widths[i], right_align=(i > 0))
for i in range(len(FIELDS))
)
separator = " ".join("-" * w for w in col_widths)
rows = []
for m in members:
vals = [m[key] for key, _, _ in FIELDS]
row = " ".join(
pad(vals[i], col_widths[i], right_align=(i > 0)) for i in range(len(FIELDS))
)
rows.append(row)
return "\n".join([header_row, separator] + rows)
def main() -> None:
"""Check Discord message lengths for all teams."""
teams = parse_report(REPORT_PATH)
for team, members in teams.items():
table = format_table(members)
msg = f"**{team} — Activity Report (Feb 1523)**\n```\n{table}\n```"
status = "OK" if len(msg) <= 2000 else f"OVER by {len(msg) - 2000}"
print(f"{team}: {len(msg)} chars — {status}")
if __name__ == "__main__":
main()