diff --git a/geoblock_watcher/watcher.py b/geoblock_watcher/watcher.py index d7c12f3..9869d90 100644 --- a/geoblock_watcher/watcher.py +++ b/geoblock_watcher/watcher.py @@ -5,21 +5,16 @@ geoblock_watcher.py Watches geo_rules.yml for changes, renders three nginx config snippets into /app/geoblock/, then signals the nginx container to reload its configuration. +Key constraint: nginx `return` requires a literal integer status code — it +cannot take a variable. We therefore render one map variable and one `if` +block *per distinct status code* per repo, so every `return` statement has a +hardcoded integer. + Rendered files ────────────── -repo_maps.conf - A single nginx `map` block body that maps the compound GeoIP key - ("CC-SUBDIV") → a per-repo decision token. This file is included - inside the existing map block in nginx.conf. - -repo_vars.conf - One `map` block per repo that translates the decision token to the - final "$geoblock_" variable value ("" = allow, or "status:body"). - -repo_locations.conf - One `location` block per repo. When the variable is non-empty the - block immediately returns the encoded status + body; otherwise the - request falls through to the main proxy_pass location. +repo_maps.conf (stub — logic lives in repo_vars.conf) +repo_vars.conf per-repo map blocks: region key → body string (or "") +repo_locations.conf per-repo location blocks with one `if` per status code """ import hashlib @@ -29,6 +24,7 @@ import re import signal import sys import time +from collections import defaultdict from pathlib import Path from typing import Any @@ -45,327 +41,111 @@ logging.basicConfig( ) log = logging.getLogger(__name__) -RULES_FILE = Path("/app/geo_rules.yml") -OUTPUT_DIR = Path("/app/geoblock") +RULES_FILE = Path("/app/geo_rules.yml") +OUTPUT_DIR = Path("/app/geoblock") NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx") +PROXY_DIRECTIVES = """\ + proxy_pass http://forgejo:3000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + client_max_body_size 512m; + proxy_request_buffering off; + proxy_buffering off; + proxy_read_timeout 600s; + proxy_send_timeout 600s;""" + + # ── Helpers ─────────────────────────────────────────────────────────────────── def _var_name(repo_path: str) -> str: - """Convert a repo path like /alice/my-repo → geoblock_alice_my_repo.""" + """Convert /alice/my-repo → geoblock_alice_my_repo""" sanitised = re.sub(r"[^a-zA-Z0-9]", "_", repo_path.strip("/")) return f"geoblock_{sanitised}" def _escape_body(body: str) -> str: - """Escape a string for safe embedding in an nginx config string literal.""" - return body.replace("\\", "\\\\").replace('"', '\\"').replace("'", "\\'").replace("\n", " ") - - -def _token(repo_index: int, rule_index: int) -> str: - """Unique short token used to link the map blocks together.""" - return f"repo{repo_index}_rule{rule_index}" + return ( + body + .replace("\\", "\\\\") + .replace('"', '\\"') + .replace("'", "\\'") + .replace("\n", " ") + ) # ── Renderer ────────────────────────────────────────────────────────────────── -def render(rules_data: dict[str, Any]) -> tuple[str, str, str]: - """ - Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf) as strings. - """ - repos: list[dict] = rules_data.get("repos", []) - - maps_lines: list[str] = [ - "# Generated by geoblock_watcher — do not edit manually.", - "# Included inside the map block in nginx.conf.", - "", - ] - - vars_lines: list[str] = [ - "# Generated by geoblock_watcher — do not edit manually.", - "", - ] - - loc_lines: list[str] = [ - "# Generated by geoblock_watcher — do not edit manually.", - "", - ] - - for ri, repo in enumerate(repos): - path: str = repo["path"].rstrip("/") - var: str = _var_name(path) - rules: list[dict] = repo.get("rules", []) - - # ── Map block: region key → token ───────────────────────────────────── - # Build a mapping from locale → token. More-specific (state-level) - # rules are added first so nginx map "first match" semantics apply. - state_entries: list[str] = [] - country_entries: list[str] = [] - - for rj, rule in enumerate(rules): - tok = _token(ri, rj) - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) - value = f"{status}:{body}" - - for locale in rule.get("locales", []): - locale = locale.strip() - key = f'"{locale}"' - entry = f" {key:<20} {tok!r}_{ri}_{rj};" - if "-" in locale: - state_entries.append(entry) - else: - # Country-only key — pad subdivision with empty string so - # it matches both "CC-" (no subdivision) and we also add - # a regex fallback below. - country_entries.append(entry) - - # Emit the per-rule value variable (token → "status:body") - vars_lines.append(f"# {path} — rule {rj}: {rule.get('locales', [])}") - vars_lines.append(f'map $geoip2_region_key ${var}_r{rj} {{') - vars_lines.append(f' default "";') - - for locale in rule.get("locales", []): - locale = locale.strip() - if "-" in locale: - # State-level: exact match on "CC-SUBDIV" - vars_lines.append(f' "{locale}" "{value}";') - else: - # Country-level: match any subdivision of this country - vars_lines.append(f' ~^{re.escape(locale)}- "{value}";') - # Also match when subdivision is absent ("CC-") - vars_lines.append(f' "{locale}-" "{value}";') - - vars_lines.append("}") - vars_lines.append("") - - # Aggregate rule variables into the final per-repo variable. - # The first non-empty rule variable wins. - rule_vars = [f"${var}_r{rj}" for rj in range(len(rules))] - vars_lines.append(f"# Final decision variable for {path}") - vars_lines.append(f"map $geoip2_region_key ${var} {{") - vars_lines.append(f' default "";') - - for locale_list, status_body in _aggregate_locales(rules): - for locale in locale_list: - if "-" in locale: - vars_lines.append(f' "{locale}" "{status_body}";') - else: - vars_lines.append(f' ~^{re.escape(locale)}- "{status_body}";') - vars_lines.append(f' "{locale}-" "{status_body}";') - - vars_lines.append("}") - vars_lines.append("") - - # ── Location block ──────────────────────────────────────────────────── - # Intercept // and any sub-paths. - # nginx location matching: we use a case-sensitive prefix match. - # Git HTTPS also accesses //.git — covered by the prefix. - loc_lines.append(f"# Geo-block for {path}") - loc_lines.append(f"location ^~ {path} {{") - loc_lines.append(f" if (${var} != \"\") {{") - # Split "status:body" at runtime using map — but nginx `if` can't do - # string splitting, so we embed status and body as separate variables. - # We use a nested map approach: the decision var encodes both, and we - # resolve them with two additional map lookups. - loc_lines.append(f" set $__status ${var}_status;") - loc_lines.append(f" set $__body ${var}_body;") - loc_lines.append(f" return $__status \"$__body\";") - loc_lines.append(f" }}") - loc_lines.append(f" # No block — fall through to main proxy") - loc_lines.append(f" proxy_pass http://forgejo:3000;") - loc_lines.append(f" proxy_set_header Host $host;") - loc_lines.append(f" proxy_set_header X-Real-IP $remote_addr;") - loc_lines.append(f" proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;") - loc_lines.append(f" proxy_set_header X-Forwarded-Proto $scheme;") - loc_lines.append(f" client_max_body_size 512m;") - loc_lines.append(f" proxy_request_buffering off;") - loc_lines.append(f" proxy_buffering off;") - loc_lines.append(f" proxy_read_timeout 600s;") - loc_lines.append(f" proxy_send_timeout 600s;") - loc_lines.append(f"}}") - loc_lines.append("") - - # Status and body split maps for this repo - vars_lines.append(f"# Status / body split maps for {path}") - vars_lines.append(f"map ${var} ${var}_status {{") - vars_lines.append(f' default 403;') - for locale_list, status_body in _aggregate_locales(rules): - status = status_body.split(":", 1)[0] - for locale in locale_list: - if "-" in locale: - vars_lines.append(f' "{status_body}" {status};') - break - else: - vars_lines.append(f' "~^{re.escape(status_body)}" {status};') - # Simpler: map the encoded value directly - vars_lines = _replace_split_maps(vars_lines, var, rules) - vars_lines.append("") - - return ( - "\n".join(maps_lines), - "\n".join(vars_lines), - "\n".join(loc_lines), - ) - - -def _aggregate_locales(rules: list[dict]) -> list[tuple[list[str], str]]: - """Return [(locale_list, 'status:body'), …] for all rules.""" - result = [] - for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) - result.append((rule.get("locales", []), f"{status}:{body}")) - return result - - -def _replace_split_maps(vars_lines: list[str], var: str, rules: list[dict]) -> list[str]: - """ - Replace the incomplete split-map stubs with correct status+body maps. - We rebuild the tail of vars_lines for the current repo. - """ - # Remove any partial split map lines we may have added above - while vars_lines and ( - vars_lines[-1].startswith(f"map ${var}_status") or - vars_lines[-1].startswith(f" ") or - vars_lines[-1] in ("}", "") - ): - last = vars_lines[-1] - vars_lines.pop() - if last.startswith(f"map ${var}_status"): - break - - # Status map - vars_lines.append(f"map ${var} ${var}_status {{") - vars_lines.append(f' default 403;') - seen_sv: set[str] = set() - for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) - encoded = f"{status}:{body}" - if encoded not in seen_sv: - vars_lines.append(f' "{encoded}" {status};') - seen_sv.add(encoded) - vars_lines.append("}") - vars_lines.append("") - - # Body map - vars_lines.append(f"map ${var} ${var}_body {{") - vars_lines.append(f' default "Blocked";') - seen_bv: set[str] = set() - for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) - encoded = f"{status}:{body}" - if encoded not in seen_bv: - vars_lines.append(f' "{encoded}" "{body}";') - seen_bv.add(encoded) - vars_lines.append("}") - - return vars_lines - - -# ── Clean renderer (replaces the incremental one above) ─────────────────────── - def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: """ - Cleanly render all three config files. Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf). + + For each repo we emit: + • One map per distinct status code: + map $geoip2_region_key $geoblock__ { ... } + Value is the escaped body string when blocked, "" otherwise. + • One location block with one `if` per distinct status code: + if ($geoblock__ != "") { return "...body..."; } """ repos: list[dict] = rules_data.get("repos", []) - header = "# Generated by geoblock_watcher — do not edit manually.\n\n" vars_blocks: list[str] = [] loc_blocks: list[str] = [] for repo in repos: - path: str = repo["path"].rstrip("/") - var: str = _var_name(path) + path: str = repo["path"].rstrip("/") + base_var: str = _var_name(path) rules: list[dict] = repo.get("rules", []) - # ── Per-repo decision map ────────────────────────────────────────────── - # Maps the compound GeoIP region key to "status:escapedBody" or "". - vars_blocks.append(f"# Decision map for {path}") - vars_blocks.append(f"map $geoip2_region_key ${var} {{") - vars_blocks.append(f' default "";') - - # State-level rules first (more specific → rendered first) + # Collect (locale, status, body) triples; group by status code + # status_map: {status_int: [(locale, body_escaped), ...]} + status_map: dict[int, list[tuple[str, str]]] = defaultdict(list) for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) - encoded = f"{status}:{body}" + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) for locale in rule.get("locales", []): - locale = locale.strip() + status_map[status].append((locale.strip(), body)) + + # ── One map variable per distinct status code ────────────────────── + for status, entries in status_map.items(): + var = f"{base_var}_{status}" + vars_blocks.append(f"# {path} — HTTP {status}") + vars_blocks.append(f"map $geoip2_region_key ${var} {{") + vars_blocks.append(f' default "";') + + # State-level rules first (more specific) + for locale, body in entries: if "-" in locale: - vars_blocks.append(f' "{locale}" "{encoded}";') + vars_blocks.append(f' "{locale}" "{body}";') - # Country-level rules second - for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) - encoded = f"{status}:{body}" - for locale in rule.get("locales", []): - locale = locale.strip() + # Country-level rules second + for locale, body in entries: if "-" not in locale: - # nginx map supports regex; match "CC-" and "CC-" - vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{encoded}";') + vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{body}";') - vars_blocks.append("}") - vars_blocks.append("") + vars_blocks.append("}") + vars_blocks.append("") - # ── Status split map ─────────────────────────────────────────────────── - vars_blocks.append(f"map ${var} ${var}_status {{") - vars_blocks.append(f" default 403;") - seen: set[str] = set() - for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) - encoded = f"{status}:{body}" - if encoded not in seen: - vars_blocks.append(f' "{encoded}" {status};') - seen.add(encoded) - vars_blocks.append("}") - vars_blocks.append("") - - # ── Body split map ───────────────────────────────────────────────────── - vars_blocks.append(f"map ${var} ${var}_body {{") - vars_blocks.append(f' default "Blocked";') - seen = set() - for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) - encoded = f"{status}:{body}" - if encoded not in seen: - vars_blocks.append(f' "{encoded}" "{body}";') - seen.add(encoded) - vars_blocks.append("}") - vars_blocks.append("") - - # ── Location block ───────────────────────────────────────────────────── + # ── Location block ───────────────────────────────────────────────── loc_blocks.append(f"# Geo-block for {path}") loc_blocks.append(f"location ^~ {path} {{") - loc_blocks.append(f" if (${var} != \"\") {{") - loc_blocks.append(f" return ${var}_status \"${var}_body\";") - loc_blocks.append(f" }}") - loc_blocks.append(f" proxy_pass http://forgejo:3000;") - loc_blocks.append(f" proxy_set_header Host $host;") - loc_blocks.append(f" proxy_set_header X-Real-IP $remote_addr;") - loc_blocks.append(f" proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;") - loc_blocks.append(f" proxy_set_header X-Forwarded-Proto $scheme;") - loc_blocks.append(f" client_max_body_size 512m;") - loc_blocks.append(f" proxy_request_buffering off;") - loc_blocks.append(f" proxy_buffering off;") - loc_blocks.append(f" proxy_read_timeout 600s;") - loc_blocks.append(f" proxy_send_timeout 600s;") - loc_blocks.append(f"}}") + + for status in sorted(status_map.keys()): + var = f"{base_var}_{status}" + loc_blocks.append(f' if (${var} != "") {{') + loc_blocks.append(f' return {status} "${var}";') + loc_blocks.append(f' }}') + + loc_blocks.append(PROXY_DIRECTIVES) + loc_blocks.append("}") loc_blocks.append("") - # repo_maps.conf is now empty (we use inline regex maps in repo_vars.conf) - maps_conf = header + "# (Region key mapping is now done inline in repo_vars.conf)\n" + maps_conf = header + "# (Region key mapping done inline in repo_vars.conf)\n" vars_conf = header + "\n".join(vars_blocks) locs_conf = header + "\n".join(loc_blocks) - return maps_conf, vars_conf, locs_conf @@ -400,16 +180,14 @@ def apply_rules(force: bool = False) -> None: try: maps_conf, vars_conf, locs_conf = render_clean(rules_data) - except Exception as exc: # noqa: BLE001 + except Exception as exc: log.error("Render error: %s — skipping reload.", exc, exc_info=True) return OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - (OUTPUT_DIR / "repo_maps.conf").write_text(maps_conf) (OUTPUT_DIR / "repo_vars.conf").write_text(vars_conf) (OUTPUT_DIR / "repo_locations.conf").write_text(locs_conf) - log.info("Config snippets written to %s.", OUTPUT_DIR) _reload_nginx() @@ -417,7 +195,6 @@ def apply_rules(force: bool = False) -> None: def _reload_nginx() -> None: - """Send SIGHUP to the nginx container to trigger a graceful config reload.""" try: client = docker.from_env() containers = client.containers.list(filters={"name": NGINX_CONTAINER}) @@ -427,7 +204,7 @@ def _reload_nginx() -> None: container = containers[0] container.kill(signal="HUP") log.info("Sent SIGHUP to nginx container '%s'.", container.name) - except Exception as exc: # noqa: BLE001 + except Exception as exc: log.error("Failed to reload nginx: %s", exc, exc_info=True) @@ -440,21 +217,18 @@ class RulesHandler(FileSystemEventHandler): time.sleep(0.2) # debounce apply_rules() - # on_created handles the case where the file is replaced atomically on_created = on_modified def main() -> None: log.info("geoblock_watcher starting. Watching %s", RULES_FILE) - - # Initial render on startup apply_rules(force=True) observer = Observer() observer.schedule(RulesHandler(), str(RULES_FILE.parent), recursive=False) observer.start() - def _shutdown(signum, frame): # noqa: ANN001 + def _shutdown(signum, frame): log.info("Shutting down.") observer.stop() sys.exit(0) @@ -465,7 +239,7 @@ def main() -> None: try: while True: time.sleep(60) - apply_rules() # Periodic re-check (catches missed inotify events) + apply_rules() finally: observer.join()