fahrengit-451/geoblock_watcher/watcher.py
Albert Armea 483617e41a Move geo_rules.yml into its own directory
This way, Docker still passes through changes made by text editors
2026-03-21 22:26:55 +00:00

248 lines
8.9 KiB
Python

#!/usr/bin/env python3
"""
geoblock_watcher.py
────────────────────────────────────────────────────────────────────────────
Watches geo_rules.yml for changes, renders three nginx config snippets into
/app/geoblock/, then signals the nginx container to reload its configuration.
Key constraint: nginx `return` requires a literal integer status code — it
cannot take a variable. We therefore render one map variable and one `if`
block *per distinct status code* per repo, so every `return` statement has a
hardcoded integer.
Rendered files
──────────────
repo_maps.conf (stub — logic lives in repo_vars.conf)
repo_vars.conf per-repo map blocks: region key → body string (or "")
repo_locations.conf per-repo location blocks with one `if` per status code
"""
import hashlib
import logging
import os
import re
import signal
import sys
import time
from collections import defaultdict
from pathlib import Path
from typing import Any
import docker
import yaml
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [watcher] %(levelname)s %(message)s",
datefmt="%Y-%m-%dT%H:%M:%S",
stream=sys.stdout,
)
log = logging.getLogger(__name__)
RULES_FILE = Path("/app/host/geo_rules.yml")
OUTPUT_DIR = Path("/app/geoblock")
NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx")
PROXY_DIRECTIVES = """\
proxy_pass http://forgejo:3000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
client_max_body_size 512m;
proxy_request_buffering off;
proxy_buffering off;
proxy_read_timeout 600s;
proxy_send_timeout 600s;"""
# ── Helpers ───────────────────────────────────────────────────────────────────
def _var_name(repo_path: str) -> str:
"""Convert /alice/my-repo → geoblock_alice_my_repo"""
sanitised = re.sub(r"[^a-zA-Z0-9]", "_", repo_path.strip("/"))
return f"geoblock_{sanitised}"
def _escape_body(body: str) -> str:
return (
body
.replace("\\", "\\\\")
.replace('"', '\\"')
.replace("'", "\\'")
.replace("\n", " ")
)
# ── Renderer ──────────────────────────────────────────────────────────────────
def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]:
"""
Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf).
For each repo we emit:
• One map per distinct status code:
map $geoip2_region_key $geoblock_<repo>_<status> { ... }
Value is the escaped body string when blocked, "" otherwise.
• One location block with one `if` per distinct status code:
if ($geoblock_<repo>_<status> != "") { return <status> "...body..."; }
"""
repos: list[dict] = rules_data.get("repos", [])
header = "# Generated by geoblock_watcher — do not edit manually.\n\n"
vars_blocks: list[str] = []
loc_blocks: list[str] = []
for repo in repos:
path: str = repo["path"].rstrip("/")
base_var: str = _var_name(path)
rules: list[dict] = repo.get("rules", [])
# Collect (locale, status, body) triples; group by status code
# status_map: {status_int: [(locale, body_escaped), ...]}
status_map: dict[int, list[tuple[str, str]]] = defaultdict(list)
for rule in rules:
status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked")))
for locale in rule.get("locales", []):
status_map[status].append((locale.strip(), body))
# ── One map variable per distinct status code ──────────────────────
for status, entries in status_map.items():
var = f"{base_var}_{status}"
vars_blocks.append(f"# {path} — HTTP {status}")
vars_blocks.append(f"map $geoip2_region_key ${var} {{")
vars_blocks.append(f' default "";')
# State-level rules first (more specific)
for locale, body in entries:
if "-" in locale:
vars_blocks.append(f' "{locale}" "{body}";')
# Country-level rules second
for locale, body in entries:
if "-" not in locale:
vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{body}";')
vars_blocks.append("}")
vars_blocks.append("")
# ── Location block ─────────────────────────────────────────────────
loc_blocks.append(f"# Geo-block for {path}")
loc_blocks.append(f"location ^~ {path} {{")
for status in sorted(status_map.keys()):
var = f"{base_var}_{status}"
loc_blocks.append(f' if (${var} != "") {{')
loc_blocks.append(f' return {status} "${var}";')
loc_blocks.append(f' }}')
loc_blocks.append(PROXY_DIRECTIVES)
loc_blocks.append("}")
loc_blocks.append("")
maps_conf = header + "# (Region key mapping done inline in repo_vars.conf)\n"
vars_conf = header + "\n".join(vars_blocks)
locs_conf = header + "\n".join(loc_blocks)
return maps_conf, vars_conf, locs_conf
# ── Writer & nginx reload ─────────────────────────────────────────────────────
_last_hash: str = ""
def _file_hash(path: Path) -> str:
return hashlib.sha256(path.read_bytes()).hexdigest()
def apply_rules(force: bool = False) -> None:
global _last_hash
if not RULES_FILE.exists():
log.warning("Rules file not found: %s — skipping.", RULES_FILE)
return
current_hash = _file_hash(RULES_FILE)
if not force and current_hash == _last_hash:
log.debug("Rules file unchanged — nothing to do.")
return
log.info("Rules file changed — re-rendering nginx config snippets.")
try:
rules_data = yaml.safe_load(RULES_FILE.read_text()) or {}
except yaml.YAMLError as exc:
log.error("YAML parse error in %s: %s — skipping reload.", RULES_FILE, exc)
return
try:
maps_conf, vars_conf, locs_conf = render_clean(rules_data)
except Exception as exc:
log.error("Render error: %s — skipping reload.", exc, exc_info=True)
return
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
(OUTPUT_DIR / "repo_maps.conf").write_text(maps_conf)
(OUTPUT_DIR / "repo_vars.conf").write_text(vars_conf)
(OUTPUT_DIR / "repo_locations.conf").write_text(locs_conf)
log.info("Config snippets written to %s.", OUTPUT_DIR)
_reload_nginx()
_last_hash = current_hash
def _reload_nginx() -> None:
try:
client = docker.from_env()
containers = client.containers.list(filters={"name": NGINX_CONTAINER})
if not containers:
log.warning("nginx container '%s' not found — skipping reload.", NGINX_CONTAINER)
return
container = containers[0]
container.kill(signal="HUP")
log.info("Sent SIGHUP to nginx container '%s'.", container.name)
except Exception as exc:
log.error("Failed to reload nginx: %s", exc, exc_info=True)
# ── Watchdog ──────────────────────────────────────────────────────────────────
class RulesHandler(FileSystemEventHandler):
def on_modified(self, event):
if Path(event.src_path).resolve() == RULES_FILE.resolve():
log.info("Detected change in %s.", RULES_FILE)
time.sleep(0.2) # debounce
apply_rules()
on_created = on_modified
def main() -> None:
log.info("geoblock_watcher starting. Watching %s", RULES_FILE)
apply_rules(force=True)
observer = Observer()
observer.schedule(RulesHandler(), str(RULES_FILE.parent), recursive=False)
observer.start()
def _shutdown(signum, frame):
log.info("Shutting down.")
observer.stop()
sys.exit(0)
signal.signal(signal.SIGTERM, _shutdown)
signal.signal(signal.SIGINT, _shutdown)
try:
while True:
time.sleep(60)
apply_rules()
finally:
observer.join()
if __name__ == "__main__":
main()