generated from nhcarrigan/template
a40188413a
All Python cohort scripts now use DATA_DIR = Path(__file__).parent.parent.parent / "data" to correctly resolve the repo-root data/ directory regardless of the working directory set by run.sh. All TypeScript scripts have expanded JSDoc headers documenting data file requirements and environment variables.
129 lines
4.3 KiB
Python
129 lines
4.3 KiB
Python
"""Verify Discord user IDs from a markdown table of applicant data.
|
|
|
|
Reads a markdown table containing Discord IDs and checks each one against the
|
|
Discord API to confirm the user exists. Handles rate limits automatically.
|
|
|
|
Data files (place in data/):
|
|
- table.md Markdown table of applicants including a Discord ID column
|
|
|
|
Outputs (written to data/):
|
|
- discord_verification.json Verification result (valid/invalid) per Discord ID
|
|
|
|
Env vars:
|
|
- DISCORD_BOT_TOKEN Bot token for the Discord API
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import time
|
|
import urllib.error
|
|
import urllib.request
|
|
from pathlib import Path
|
|
|
|
DATA_DIR = Path(__file__).parent.parent.parent / "data"
|
|
|
|
# Configuration
|
|
BOT_TOKEN = os.environ["DISCORD_BOT_TOKEN"]
|
|
GUILD_ID = "692816967895220344"
|
|
BASE_URL = "https://discord.com/api/v10"
|
|
|
|
# Read Discord IDs from table.md
|
|
with open(DATA_DIR / "table.md") as f:
|
|
content = f.read()
|
|
|
|
lines = content.strip().split("\n")
|
|
|
|
# Find the table header line (starts with |)
|
|
header_line = None
|
|
header_idx = 0
|
|
for i, line in enumerate(lines):
|
|
if line.startswith("| Discord"):
|
|
header_line = line
|
|
header_idx = i
|
|
break
|
|
|
|
if not header_line:
|
|
print("Could not find table header!")
|
|
exit(1)
|
|
|
|
headers = [h.strip() for h in header_line.split("|")[1:-1]]
|
|
|
|
discord_idx = 0 # Discord ID is the first column
|
|
|
|
discord_ids = []
|
|
for line in lines[header_idx + 2 :]: # Skip header and separator
|
|
if not line.startswith("|"):
|
|
continue
|
|
cols = [c.strip() for c in line.split("|")[1:-1]]
|
|
if len(cols) > discord_idx:
|
|
discord_id = cols[discord_idx].strip()
|
|
if discord_id and discord_id.isdigit():
|
|
discord_ids.append(discord_id)
|
|
|
|
print(f"Found {len(discord_ids)} Discord IDs to verify")
|
|
|
|
# Verify each ID against the guild
|
|
verified = []
|
|
missing = []
|
|
errors = []
|
|
|
|
for i, discord_id in enumerate(discord_ids):
|
|
url = f"{BASE_URL}/guilds/{GUILD_ID}/members/{discord_id}"
|
|
req = urllib.request.Request(url)
|
|
req.add_header("Authorization", f"Bot {BOT_TOKEN}")
|
|
|
|
try:
|
|
response = urllib.request.urlopen(req)
|
|
data = json.loads(response.read().decode())
|
|
username = data.get("user", {}).get("username", "Unknown")
|
|
verified.append((discord_id, username))
|
|
print(f"[{i + 1}/{len(discord_ids)}] ✓ {discord_id} - {username}")
|
|
except urllib.error.HTTPError as e:
|
|
if e.code == 404:
|
|
missing.append(discord_id)
|
|
print(f"[{i + 1}/{len(discord_ids)}] ✗ {discord_id} - NOT IN SERVER")
|
|
elif e.code == 429:
|
|
# Rate limited - wait and retry
|
|
retry_after = json.loads(e.read().decode()).get("retry_after", 1)
|
|
print(
|
|
f"[{i + 1}/{len(discord_ids)}] Rate limited, waiting {retry_after}s..."
|
|
)
|
|
time.sleep(retry_after + 0.5)
|
|
# Retry
|
|
try:
|
|
req2 = urllib.request.Request(url)
|
|
req2.add_header("Authorization", f"Bot {BOT_TOKEN}")
|
|
response = urllib.request.urlopen(req2)
|
|
data = json.loads(response.read().decode())
|
|
username = data.get("user", {}).get("username", "Unknown")
|
|
verified.append((discord_id, username))
|
|
msg = f"[{i + 1}/{len(discord_ids)}] ✓ {discord_id}"
|
|
print(f"{msg} - {username} (after retry)")
|
|
except urllib.error.HTTPError as e2:
|
|
if e2.code == 404:
|
|
missing.append(discord_id)
|
|
msg = f"[{i + 1}/{len(discord_ids)}] ✗ {discord_id}"
|
|
print(f"{msg} - NOT IN SERVER (after retry)")
|
|
else:
|
|
errors.append((discord_id, f"HTTP {e2.code}"))
|
|
print(
|
|
f"[{i + 1}/{len(discord_ids)}] ? {discord_id} - Error {e2.code}"
|
|
)
|
|
else:
|
|
errors.append((discord_id, f"HTTP {e.code}"))
|
|
print(f"[{i + 1}/{len(discord_ids)}] ? {discord_id} - Error {e.code}")
|
|
|
|
# Small delay to avoid rate limits
|
|
time.sleep(0.1)
|
|
|
|
print("\n=== SUMMARY ===")
|
|
print(f"Verified: {len(verified)}")
|
|
print(f"Missing: {len(missing)}")
|
|
print(f"Errors: {len(errors)}")
|
|
|
|
# Save results
|
|
with open(DATA_DIR / "discord_verification.json", "w") as f:
|
|
json.dump({"verified": verified, "missing": missing, "errors": errors}, f, indent=2)
|
|
|
|
print("\nResults saved to discord_verification.json")
|