Make geoblock watcher render in repos

This commit is contained in:
Albert Armea 2026-03-21 20:08:28 +00:00
parent e7036d99a8
commit 76effee03f

View file

@ -5,21 +5,16 @@ geoblock_watcher.py
Watches geo_rules.yml for changes, renders three nginx config snippets into Watches geo_rules.yml for changes, renders three nginx config snippets into
/app/geoblock/, then signals the nginx container to reload its configuration. /app/geoblock/, then signals the nginx container to reload its configuration.
Key constraint: nginx `return` requires a literal integer status code it
cannot take a variable. We therefore render one map variable and one `if`
block *per distinct status code* per repo, so every `return` statement has a
hardcoded integer.
Rendered files Rendered files
repo_maps.conf repo_maps.conf (stub logic lives in repo_vars.conf)
A single nginx `map` block body that maps the compound GeoIP key repo_vars.conf per-repo map blocks: region key body string (or "")
("CC-SUBDIV") a per-repo decision token. This file is included repo_locations.conf per-repo location blocks with one `if` per status code
inside the existing map block in nginx.conf.
repo_vars.conf
One `map` block per repo that translates the decision token to the
final "$geoblock_<var>" variable value ("" = allow, or "status:body").
repo_locations.conf
One `location` block per repo. When the variable is non-empty the
block immediately returns the encoded status + body; otherwise the
request falls through to the main proxy_pass location.
""" """
import hashlib import hashlib
@ -29,6 +24,7 @@ import re
import signal import signal
import sys import sys
import time import time
from collections import defaultdict
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -45,327 +41,111 @@ logging.basicConfig(
) )
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
RULES_FILE = Path("/app/geo_rules.yml") RULES_FILE = Path("/app/geo_rules.yml")
OUTPUT_DIR = Path("/app/geoblock") OUTPUT_DIR = Path("/app/geoblock")
NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx") NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx")
PROXY_DIRECTIVES = """\
proxy_pass http://forgejo:3000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
client_max_body_size 512m;
proxy_request_buffering off;
proxy_buffering off;
proxy_read_timeout 600s;
proxy_send_timeout 600s;"""
# ── Helpers ─────────────────────────────────────────────────────────────────── # ── Helpers ───────────────────────────────────────────────────────────────────
def _var_name(repo_path: str) -> str: def _var_name(repo_path: str) -> str:
"""Convert a repo path like /alice/my-repo → geoblock_alice_my_repo.""" """Convert /alice/my-repo → geoblock_alice_my_repo"""
sanitised = re.sub(r"[^a-zA-Z0-9]", "_", repo_path.strip("/")) sanitised = re.sub(r"[^a-zA-Z0-9]", "_", repo_path.strip("/"))
return f"geoblock_{sanitised}" return f"geoblock_{sanitised}"
def _escape_body(body: str) -> str: def _escape_body(body: str) -> str:
"""Escape a string for safe embedding in an nginx config string literal.""" return (
return body.replace("\\", "\\\\").replace('"', '\\"').replace("'", "\\'").replace("\n", " ") body
.replace("\\", "\\\\")
.replace('"', '\\"')
def _token(repo_index: int, rule_index: int) -> str: .replace("'", "\\'")
"""Unique short token used to link the map blocks together.""" .replace("\n", " ")
return f"repo{repo_index}_rule{rule_index}" )
# ── Renderer ────────────────────────────────────────────────────────────────── # ── Renderer ──────────────────────────────────────────────────────────────────
def render(rules_data: dict[str, Any]) -> tuple[str, str, str]:
"""
Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf) as strings.
"""
repos: list[dict] = rules_data.get("repos", [])
maps_lines: list[str] = [
"# Generated by geoblock_watcher — do not edit manually.",
"# Included inside the map block in nginx.conf.",
"",
]
vars_lines: list[str] = [
"# Generated by geoblock_watcher — do not edit manually.",
"",
]
loc_lines: list[str] = [
"# Generated by geoblock_watcher — do not edit manually.",
"",
]
for ri, repo in enumerate(repos):
path: str = repo["path"].rstrip("/")
var: str = _var_name(path)
rules: list[dict] = repo.get("rules", [])
# ── Map block: region key → token ─────────────────────────────────────
# Build a mapping from locale → token. More-specific (state-level)
# rules are added first so nginx map "first match" semantics apply.
state_entries: list[str] = []
country_entries: list[str] = []
for rj, rule in enumerate(rules):
tok = _token(ri, rj)
status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked")))
value = f"{status}:{body}"
for locale in rule.get("locales", []):
locale = locale.strip()
key = f'"{locale}"'
entry = f" {key:<20} {tok!r}_{ri}_{rj};"
if "-" in locale:
state_entries.append(entry)
else:
# Country-only key — pad subdivision with empty string so
# it matches both "CC-" (no subdivision) and we also add
# a regex fallback below.
country_entries.append(entry)
# Emit the per-rule value variable (token → "status:body")
vars_lines.append(f"# {path} — rule {rj}: {rule.get('locales', [])}")
vars_lines.append(f'map $geoip2_region_key ${var}_r{rj} {{')
vars_lines.append(f' default "";')
for locale in rule.get("locales", []):
locale = locale.strip()
if "-" in locale:
# State-level: exact match on "CC-SUBDIV"
vars_lines.append(f' "{locale}" "{value}";')
else:
# Country-level: match any subdivision of this country
vars_lines.append(f' ~^{re.escape(locale)}- "{value}";')
# Also match when subdivision is absent ("CC-")
vars_lines.append(f' "{locale}-" "{value}";')
vars_lines.append("}")
vars_lines.append("")
# Aggregate rule variables into the final per-repo variable.
# The first non-empty rule variable wins.
rule_vars = [f"${var}_r{rj}" for rj in range(len(rules))]
vars_lines.append(f"# Final decision variable for {path}")
vars_lines.append(f"map $geoip2_region_key ${var} {{")
vars_lines.append(f' default "";')
for locale_list, status_body in _aggregate_locales(rules):
for locale in locale_list:
if "-" in locale:
vars_lines.append(f' "{locale}" "{status_body}";')
else:
vars_lines.append(f' ~^{re.escape(locale)}- "{status_body}";')
vars_lines.append(f' "{locale}-" "{status_body}";')
vars_lines.append("}")
vars_lines.append("")
# ── Location block ────────────────────────────────────────────────────
# Intercept /<owner>/<repo> and any sub-paths.
# nginx location matching: we use a case-sensitive prefix match.
# Git HTTPS also accesses /<owner>/<repo>.git — covered by the prefix.
loc_lines.append(f"# Geo-block for {path}")
loc_lines.append(f"location ^~ {path} {{")
loc_lines.append(f" if (${var} != \"\") {{")
# Split "status:body" at runtime using map — but nginx `if` can't do
# string splitting, so we embed status and body as separate variables.
# We use a nested map approach: the decision var encodes both, and we
# resolve them with two additional map lookups.
loc_lines.append(f" set $__status ${var}_status;")
loc_lines.append(f" set $__body ${var}_body;")
loc_lines.append(f" return $__status \"$__body\";")
loc_lines.append(f" }}")
loc_lines.append(f" # No block — fall through to main proxy")
loc_lines.append(f" proxy_pass http://forgejo:3000;")
loc_lines.append(f" proxy_set_header Host $host;")
loc_lines.append(f" proxy_set_header X-Real-IP $remote_addr;")
loc_lines.append(f" proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;")
loc_lines.append(f" proxy_set_header X-Forwarded-Proto $scheme;")
loc_lines.append(f" client_max_body_size 512m;")
loc_lines.append(f" proxy_request_buffering off;")
loc_lines.append(f" proxy_buffering off;")
loc_lines.append(f" proxy_read_timeout 600s;")
loc_lines.append(f" proxy_send_timeout 600s;")
loc_lines.append(f"}}")
loc_lines.append("")
# Status and body split maps for this repo
vars_lines.append(f"# Status / body split maps for {path}")
vars_lines.append(f"map ${var} ${var}_status {{")
vars_lines.append(f' default 403;')
for locale_list, status_body in _aggregate_locales(rules):
status = status_body.split(":", 1)[0]
for locale in locale_list:
if "-" in locale:
vars_lines.append(f' "{status_body}" {status};')
break
else:
vars_lines.append(f' "~^{re.escape(status_body)}" {status};')
# Simpler: map the encoded value directly
vars_lines = _replace_split_maps(vars_lines, var, rules)
vars_lines.append("")
return (
"\n".join(maps_lines),
"\n".join(vars_lines),
"\n".join(loc_lines),
)
def _aggregate_locales(rules: list[dict]) -> list[tuple[list[str], str]]:
"""Return [(locale_list, 'status:body'), …] for all rules."""
result = []
for rule in rules:
status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked")))
result.append((rule.get("locales", []), f"{status}:{body}"))
return result
def _replace_split_maps(vars_lines: list[str], var: str, rules: list[dict]) -> list[str]:
"""
Replace the incomplete split-map stubs with correct status+body maps.
We rebuild the tail of vars_lines for the current repo.
"""
# Remove any partial split map lines we may have added above
while vars_lines and (
vars_lines[-1].startswith(f"map ${var}_status") or
vars_lines[-1].startswith(f" ") or
vars_lines[-1] in ("}", "")
):
last = vars_lines[-1]
vars_lines.pop()
if last.startswith(f"map ${var}_status"):
break
# Status map
vars_lines.append(f"map ${var} ${var}_status {{")
vars_lines.append(f' default 403;')
seen_sv: set[str] = set()
for rule in rules:
status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked")))
encoded = f"{status}:{body}"
if encoded not in seen_sv:
vars_lines.append(f' "{encoded}" {status};')
seen_sv.add(encoded)
vars_lines.append("}")
vars_lines.append("")
# Body map
vars_lines.append(f"map ${var} ${var}_body {{")
vars_lines.append(f' default "Blocked";')
seen_bv: set[str] = set()
for rule in rules:
status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked")))
encoded = f"{status}:{body}"
if encoded not in seen_bv:
vars_lines.append(f' "{encoded}" "{body}";')
seen_bv.add(encoded)
vars_lines.append("}")
return vars_lines
# ── Clean renderer (replaces the incremental one above) ───────────────────────
def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]:
""" """
Cleanly render all three config files.
Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf). Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf).
For each repo we emit:
One map per distinct status code:
map $geoip2_region_key $geoblock_<repo>_<status> { ... }
Value is the escaped body string when blocked, "" otherwise.
One location block with one `if` per distinct status code:
if ($geoblock_<repo>_<status> != "") { return <status> "...body..."; }
""" """
repos: list[dict] = rules_data.get("repos", []) repos: list[dict] = rules_data.get("repos", [])
header = "# Generated by geoblock_watcher — do not edit manually.\n\n" header = "# Generated by geoblock_watcher — do not edit manually.\n\n"
vars_blocks: list[str] = [] vars_blocks: list[str] = []
loc_blocks: list[str] = [] loc_blocks: list[str] = []
for repo in repos: for repo in repos:
path: str = repo["path"].rstrip("/") path: str = repo["path"].rstrip("/")
var: str = _var_name(path) base_var: str = _var_name(path)
rules: list[dict] = repo.get("rules", []) rules: list[dict] = repo.get("rules", [])
# ── Per-repo decision map ────────────────────────────────────────────── # Collect (locale, status, body) triples; group by status code
# Maps the compound GeoIP region key to "status:escapedBody" or "". # status_map: {status_int: [(locale, body_escaped), ...]}
vars_blocks.append(f"# Decision map for {path}") status_map: dict[int, list[tuple[str, str]]] = defaultdict(list)
vars_blocks.append(f"map $geoip2_region_key ${var} {{")
vars_blocks.append(f' default "";')
# State-level rules first (more specific → rendered first)
for rule in rules: for rule in rules:
status = int(rule["status"]) status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked"))) body = _escape_body(str(rule.get("body", "Blocked")))
encoded = f"{status}:{body}"
for locale in rule.get("locales", []): for locale in rule.get("locales", []):
locale = locale.strip() status_map[status].append((locale.strip(), body))
# ── One map variable per distinct status code ──────────────────────
for status, entries in status_map.items():
var = f"{base_var}_{status}"
vars_blocks.append(f"# {path} — HTTP {status}")
vars_blocks.append(f"map $geoip2_region_key ${var} {{")
vars_blocks.append(f' default "";')
# State-level rules first (more specific)
for locale, body in entries:
if "-" in locale: if "-" in locale:
vars_blocks.append(f' "{locale}" "{encoded}";') vars_blocks.append(f' "{locale}" "{body}";')
# Country-level rules second # Country-level rules second
for rule in rules: for locale, body in entries:
status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked")))
encoded = f"{status}:{body}"
for locale in rule.get("locales", []):
locale = locale.strip()
if "-" not in locale: if "-" not in locale:
# nginx map supports regex; match "CC-<anything>" and "CC-" vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{body}";')
vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{encoded}";')
vars_blocks.append("}") vars_blocks.append("}")
vars_blocks.append("") vars_blocks.append("")
# ── Status split map ─────────────────────────────────────────────────── # ── Location block ─────────────────────────────────────────────────
vars_blocks.append(f"map ${var} ${var}_status {{")
vars_blocks.append(f" default 403;")
seen: set[str] = set()
for rule in rules:
status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked")))
encoded = f"{status}:{body}"
if encoded not in seen:
vars_blocks.append(f' "{encoded}" {status};')
seen.add(encoded)
vars_blocks.append("}")
vars_blocks.append("")
# ── Body split map ─────────────────────────────────────────────────────
vars_blocks.append(f"map ${var} ${var}_body {{")
vars_blocks.append(f' default "Blocked";')
seen = set()
for rule in rules:
status = int(rule["status"])
body = _escape_body(str(rule.get("body", "Blocked")))
encoded = f"{status}:{body}"
if encoded not in seen:
vars_blocks.append(f' "{encoded}" "{body}";')
seen.add(encoded)
vars_blocks.append("}")
vars_blocks.append("")
# ── Location block ─────────────────────────────────────────────────────
loc_blocks.append(f"# Geo-block for {path}") loc_blocks.append(f"# Geo-block for {path}")
loc_blocks.append(f"location ^~ {path} {{") loc_blocks.append(f"location ^~ {path} {{")
loc_blocks.append(f" if (${var} != \"\") {{")
loc_blocks.append(f" return ${var}_status \"${var}_body\";") for status in sorted(status_map.keys()):
loc_blocks.append(f" }}") var = f"{base_var}_{status}"
loc_blocks.append(f" proxy_pass http://forgejo:3000;") loc_blocks.append(f' if (${var} != "") {{')
loc_blocks.append(f" proxy_set_header Host $host;") loc_blocks.append(f' return {status} "${var}";')
loc_blocks.append(f" proxy_set_header X-Real-IP $remote_addr;") loc_blocks.append(f' }}')
loc_blocks.append(f" proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;")
loc_blocks.append(f" proxy_set_header X-Forwarded-Proto $scheme;") loc_blocks.append(PROXY_DIRECTIVES)
loc_blocks.append(f" client_max_body_size 512m;") loc_blocks.append("}")
loc_blocks.append(f" proxy_request_buffering off;")
loc_blocks.append(f" proxy_buffering off;")
loc_blocks.append(f" proxy_read_timeout 600s;")
loc_blocks.append(f" proxy_send_timeout 600s;")
loc_blocks.append(f"}}")
loc_blocks.append("") loc_blocks.append("")
# repo_maps.conf is now empty (we use inline regex maps in repo_vars.conf) maps_conf = header + "# (Region key mapping done inline in repo_vars.conf)\n"
maps_conf = header + "# (Region key mapping is now done inline in repo_vars.conf)\n"
vars_conf = header + "\n".join(vars_blocks) vars_conf = header + "\n".join(vars_blocks)
locs_conf = header + "\n".join(loc_blocks) locs_conf = header + "\n".join(loc_blocks)
return maps_conf, vars_conf, locs_conf return maps_conf, vars_conf, locs_conf
@ -400,16 +180,14 @@ def apply_rules(force: bool = False) -> None:
try: try:
maps_conf, vars_conf, locs_conf = render_clean(rules_data) maps_conf, vars_conf, locs_conf = render_clean(rules_data)
except Exception as exc: # noqa: BLE001 except Exception as exc:
log.error("Render error: %s — skipping reload.", exc, exc_info=True) log.error("Render error: %s — skipping reload.", exc, exc_info=True)
return return
OUTPUT_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
(OUTPUT_DIR / "repo_maps.conf").write_text(maps_conf) (OUTPUT_DIR / "repo_maps.conf").write_text(maps_conf)
(OUTPUT_DIR / "repo_vars.conf").write_text(vars_conf) (OUTPUT_DIR / "repo_vars.conf").write_text(vars_conf)
(OUTPUT_DIR / "repo_locations.conf").write_text(locs_conf) (OUTPUT_DIR / "repo_locations.conf").write_text(locs_conf)
log.info("Config snippets written to %s.", OUTPUT_DIR) log.info("Config snippets written to %s.", OUTPUT_DIR)
_reload_nginx() _reload_nginx()
@ -417,7 +195,6 @@ def apply_rules(force: bool = False) -> None:
def _reload_nginx() -> None: def _reload_nginx() -> None:
"""Send SIGHUP to the nginx container to trigger a graceful config reload."""
try: try:
client = docker.from_env() client = docker.from_env()
containers = client.containers.list(filters={"name": NGINX_CONTAINER}) containers = client.containers.list(filters={"name": NGINX_CONTAINER})
@ -427,7 +204,7 @@ def _reload_nginx() -> None:
container = containers[0] container = containers[0]
container.kill(signal="HUP") container.kill(signal="HUP")
log.info("Sent SIGHUP to nginx container '%s'.", container.name) log.info("Sent SIGHUP to nginx container '%s'.", container.name)
except Exception as exc: # noqa: BLE001 except Exception as exc:
log.error("Failed to reload nginx: %s", exc, exc_info=True) log.error("Failed to reload nginx: %s", exc, exc_info=True)
@ -440,21 +217,18 @@ class RulesHandler(FileSystemEventHandler):
time.sleep(0.2) # debounce time.sleep(0.2) # debounce
apply_rules() apply_rules()
# on_created handles the case where the file is replaced atomically
on_created = on_modified on_created = on_modified
def main() -> None: def main() -> None:
log.info("geoblock_watcher starting. Watching %s", RULES_FILE) log.info("geoblock_watcher starting. Watching %s", RULES_FILE)
# Initial render on startup
apply_rules(force=True) apply_rules(force=True)
observer = Observer() observer = Observer()
observer.schedule(RulesHandler(), str(RULES_FILE.parent), recursive=False) observer.schedule(RulesHandler(), str(RULES_FILE.parent), recursive=False)
observer.start() observer.start()
def _shutdown(signum, frame): # noqa: ANN001 def _shutdown(signum, frame):
log.info("Shutting down.") log.info("Shutting down.")
observer.stop() observer.stop()
sys.exit(0) sys.exit(0)
@ -465,7 +239,7 @@ def main() -> None:
try: try:
while True: while True:
time.sleep(60) time.sleep(60)
apply_rules() # Periodic re-check (catches missed inotify events) apply_rules()
finally: finally:
observer.join() observer.join()