Files
ephemere/python/cohort/analyse_availability.py
naomi 6b5fa40599
CI / dependency-pin-check-typescript (push) Successful in 4s
CI / dependency-pin-check-python (push) Successful in 3s
CI / typescript (push) Successful in 9m38s
CI / python (push) Successful in 9m23s
Security Scan and Upload / Security & DefectDojo Upload (push) Successful in 1m6s
feat: add multi-lang support and cohort scripts (#1)
### Explanation

_No response_

### Issue

_No response_

### Attestations

- [ ] I have read and agree to the [Code of Conduct](https://docs.nhcarrigan.com/community/coc/)
- [ ] I have read and agree to the [Community Guidelines](https://docs.nhcarrigan.com/community/guide/).
- [ ] My contribution complies with the [Contributor Covenant](https://docs.nhcarrigan.com/dev/covenant/).

### Dependencies

- [ ] I have pinned the dependencies to a specific patch version.

### Style

- [ ] I have run the linter and resolved any errors.
- [ ] My pull request uses an appropriate title, matching the conventional commit standards.
- [ ] My scope of feat/fix/chore/etc. correctly matches the nature of changes in my pull request.

### Tests

- [ ] My contribution adds new code, and I have added tests to cover it.
- [ ] My contribution modifies existing code, and I have updated the tests to reflect these changes.
- [ ] All new and existing tests pass locally with my changes.
- [ ] Code coverage remains at or above the configured threshold.

### Documentation

_No response_

### Versioning

_No response_

Co-authored-by: Hikari <hikari@nhcarrigan.com>
Reviewed-on: #1
Co-authored-by: Naomi Carrigan <commits@nhcarrigan.com>
Co-committed-by: Naomi Carrigan <commits@nhcarrigan.com>
2026-01-23 20:07:16 -08:00

192 lines
6.0 KiB
Python

import json
import re
from collections import defaultdict
DAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
UTC_BLOCKS = {
"mornings": (6, 12), # 06:00 - 12:00 UTC
"afternoons": (12, 18), # 12:00 - 18:00 UTC
"evenings": (18, 24), # 18:00 - 00:00 UTC
"nights": (0, 6), # 00:00 - 06:00 UTC
}
def parse_utc_offset(timezone_str: str) -> float:
"""Extract UTC offset from timezone string like 'America/New_York (UTC-5)'"""
match = re.search(r"UTC([+-]?\d+(?::\d+)?)", timezone_str)
if match:
offset_str = match.group(1)
if ":" in offset_str:
parts = offset_str.split(":")
hours = int(parts[0])
minutes = int(parts[1]) if len(parts) > 1 else 0
if hours < 0:
return hours - minutes / 60
return hours + minutes / 60
return float(offset_str)
return 0
def parse_time_slots(time_str: str) -> list[tuple[int, int]]:
"""Parse time slots like '17:00-18:00' or '07:00-08:00; 19:00-20:00'"""
slots = []
if not time_str or time_str.lower() in ["n/a", "na", ""]:
return slots
parts = time_str.split(";")
for part in parts:
part = part.strip()
match = re.search(r"(\d{1,2}):(\d{2})\s*-\s*(\d{1,2}):(\d{2})", part)
if match:
start_hour = int(match.group(1))
end_hour = int(match.group(3))
slots.append((start_hour, end_hour))
return slots
def local_hour_to_utc(local_hour: int, utc_offset: float) -> int:
"""Convert local hour to UTC hour"""
utc_hour = local_hour - utc_offset
return int(utc_hour) % 24
def get_utc_blocks_for_hour(utc_hour: int) -> list[str]:
"""Determine which UTC block(s) an hour falls into"""
blocks = []
for block_name, (start, end) in UTC_BLOCKS.items():
if block_name == "nights":
if utc_hour >= 0 and utc_hour < 6:
blocks.append(block_name)
elif block_name == "evenings":
if utc_hour >= 18 and utc_hour < 24:
blocks.append(block_name)
elif utc_hour >= start and utc_hour < end:
blocks.append(block_name)
return blocks
def analyze_applicant_availability(timezone_str: str, day_slots: dict) -> dict:
"""Analyze availability for one applicant"""
utc_offset = parse_utc_offset(timezone_str)
block_counts = defaultdict(int)
all_utc_hours = set()
for day in DAYS:
slots = day_slots.get(day, [])
for start_hour, end_hour in slots:
for hour in range(start_hour, end_hour):
utc_hour = local_hour_to_utc(hour, utc_offset)
all_utc_hours.add(utc_hour)
blocks = get_utc_blocks_for_hour(utc_hour)
for block in blocks:
block_counts[block] += 1
available_blocks = []
for block in ["mornings", "afternoons", "evenings", "nights"]:
if block_counts[block] >= 3:
available_blocks.append(block)
return {
"utc_offset": utc_offset,
"timezone": timezone_str,
"available_blocks": available_blocks,
"block_counts": dict(block_counts),
"total_unique_utc_hours": len(all_utc_hours),
}
def parse_table_md() -> list[dict]:
"""Parse table.md and extract availability data"""
with open("table.md") as f:
content = f.read()
lines = content.strip().split("\n")
header_idx = None
for i, line in enumerate(lines):
if line.startswith("| Discord ID"):
header_idx = i
break
if header_idx is None:
raise ValueError("Could not find table header")
headers = [h.strip() for h in lines[header_idx].split("|")[1:-1]]
applicants = []
for line in lines[header_idx + 2 :]:
if not line.startswith("|"):
continue
cells = [c.strip() for c in line.split("|")[1:-1]]
if len(cells) < len(headers):
continue
row = dict(zip(headers, cells))
applicants.append(row)
return applicants
def main():
with open("discord_verification.json") as f:
verification = json.load(f)
verified_ids = {v[0] for v in verification["verified"]}
print(f"Verified applicants: {len(verified_ids)}")
applicants = parse_table_md()
print(f"Total applicants in table: {len(applicants)}")
availability_results = []
for applicant in applicants:
discord_id = applicant.get("Discord ID", "")
if discord_id not in verified_ids:
continue
timezone = applicant.get("Timezone", "")
day_slots = {}
for day in DAYS:
time_str = applicant.get(day, "")
day_slots[day] = parse_time_slots(time_str)
analysis = analyze_applicant_availability(timezone, day_slots)
availability_results.append(
{
"discord_id": discord_id,
"timezone": timezone,
"utc_offset": analysis["utc_offset"],
"available_blocks": analysis["available_blocks"],
"block_counts": analysis["block_counts"],
"total_unique_utc_hours": analysis["total_unique_utc_hours"],
}
)
with open("availability_analysis.json", "w") as f:
json.dump(availability_results, f, indent=2)
block_distribution = defaultdict(int)
for result in availability_results:
for block in result["available_blocks"]:
block_distribution[block] += 1
print("\n=== AVAILABILITY ANALYSIS COMPLETE ===")
print(f"Analyzed: {len(availability_results)} applicants")
print("\nBlock Distribution (applicants available in each block):")
for block in ["mornings", "afternoons", "evenings", "nights"]:
print(f" {block.capitalize()}: {block_distribution[block]}")
no_blocks = sum(1 for r in availability_results if not r["available_blocks"])
print(f"\nApplicants with no clear block availability: {no_blocks}")
print("\nResults saved to availability_analysis.json")
if __name__ == "__main__":
main()