diff --git a/config/geo_rules.yml.example b/config/geo_rules.yml.example index 79026fd..5b88258 100644 --- a/config/geo_rules.yml.example +++ b/config/geo_rules.yml.example @@ -19,6 +19,11 @@ # is the semantically correct choice for legal/jurisdiction blocks. # body : Plain-text response body. Keep it short — it is embedded directly # in the nginx config as a string literal. +# body_file : Path to a self-contained HTML file (relative to config/geoblock_pages/) +# served as the response body instead of a plain-text string. The +# file must be self-contained (all CSS, JS, and image resources +# inlined) because it is served directly by nginx. +# Use either body or body_file — not both — for a given rule. # ───────────────────────────────────────────────────────────────────────────── repos: @@ -44,3 +49,4 @@ repos: # - locales: ["XX", "XX-YY"] # status: 403 # body: "Access restricted." + # # body_file: blocked.html # OR: HTML file in config/geoblock_pages/ diff --git a/config/geoblock_pages/.gitignore b/config/geoblock_pages/.gitignore new file mode 100644 index 0000000..deb8ab0 --- /dev/null +++ b/config/geoblock_pages/.gitignore @@ -0,0 +1,3 @@ +# This directory is for site-specific pages that should not be checked in +* +!.gitignore diff --git a/docker-compose.yml b/docker-compose.yml index 3c0c489..4e2d590 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,6 +44,7 @@ services: # container start, substituting ${DOMAIN}. Mounting conf.d from the host # would shadow that rendered output and break the virtual host. - ./nginx/geoblock:/etc/nginx/geoblock:ro # rendered by geoblock_watcher + - ./config/geoblock_pages:/etc/nginx/geoblock_pages:ro # HTML block pages - ./certs/live:/etc/letsencrypt/live:ro - ./certs/archive:/etc/letsencrypt/archive:ro - ./certs/options-ssl-nginx.conf:/etc/letsencrypt/options-ssl-nginx.conf:ro @@ -90,6 +91,7 @@ services: restart: unless-stopped volumes: - ./config:/app/host:ro + - ./config/geoblock_pages:/app/geoblock_pages:ro # HTML block pages (validation) - ./nginx/geoblock:/app/geoblock # shared with nginx (rw here) - /var/run/docker.sock:/var/run/docker.sock networks: diff --git a/geoblock_watcher/watcher.py b/geoblock_watcher/watcher.py index e69ea33..7ec9bd5 100644 --- a/geoblock_watcher/watcher.py +++ b/geoblock_watcher/watcher.py @@ -41,9 +41,11 @@ logging.basicConfig( ) log = logging.getLogger(__name__) -RULES_FILE = Path("/app/host/geo_rules.yml") -OUTPUT_DIR = Path("/app/geoblock") -NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx") +RULES_FILE = Path("/app/host/geo_rules.yml") +OUTPUT_DIR = Path("/app/geoblock") +GEOBLOCK_PAGES_DIR = Path("/app/geoblock_pages") +NGINX_PAGES_ROOT = "/etc/nginx/geoblock_pages" +NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx") PROXY_DIRECTIVES = """\ proxy_pass http://forgejo:3000; @@ -78,6 +80,12 @@ def _escape_body(body: str) -> str: # ── Renderer ────────────────────────────────────────────────────────────────── +def _normalize_file_path(body_file: str) -> str: + """Ensure the path starts with / for use in nginx try_files.""" + path = body_file.strip() + return path if path.startswith("/") else f"/{path}" + + def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: """ Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf). @@ -85,46 +93,79 @@ def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: For each repo we emit: • One map per distinct status code: map $geoip2_region_key $geoblock__ { ... } - Value is the escaped body string when blocked, "" otherwise. - • One location block with one `if` per distinct status code: - if ($geoblock__ != "") { return "...body..."; } + For text body rules, the value is the escaped body string when blocked. + For body_file rules, the value is "1" when blocked (a flag). + • One location block with one `if` per distinct status code. + Text body: if ($var != "") { return "$var"; } + File body: error_page @_page; + if ($var != "") { return ; } + • For body_file rules, a named location outside the repo location block: + location @_page { internal; root ...; try_files =500; } """ repos: list[dict] = rules_data.get("repos", []) header = "# Generated by geoblock_watcher — do not edit manually.\n\n" - vars_blocks: list[str] = [] - loc_blocks: list[str] = [] + vars_blocks: list[str] = [] + loc_blocks: list[str] = [] + named_locs: list[str] = [] # file-serving named locations (server-level) for repo in repos: - path: str = repo["path"].rstrip("/") - base_var: str = _var_name(path) + path: str = repo["path"].rstrip("/") + base_var: str = _var_name(path) rules: list[dict] = repo.get("rules", []) - # Collect (locale, status, body) triples; group by status code - # status_map: {status_int: [(locale, body_escaped), ...]} - status_map: dict[int, list[tuple[str, str]]] = defaultdict(list) + # status_info: status -> {"is_file": bool, "file": str, "entries": [(locale, val)]} + status_info: dict[int, dict] = {} + for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) + status = int(rule["status"]) + is_file = "body_file" in rule + + if status not in status_info: + status_info[status] = { + "is_file": is_file, + "file": _normalize_file_path(rule["body_file"]) if is_file else "", + "entries": [], + } + else: + existing_is_file = status_info[status]["is_file"] + if existing_is_file != is_file: + log.warning( + "%s: status %d has mixed body/body_file rules — " + "treating all as %s.", + path, status, "body_file" if existing_is_file else "body", + ) + + if is_file: + if GEOBLOCK_PAGES_DIR.exists(): + full = GEOBLOCK_PAGES_DIR / rule["body_file"].lstrip("/") + if not full.exists(): + log.warning("body_file not found: %s", full) + value = "1" + else: + value = _escape_body(str(rule.get("body", "Blocked"))) + for locale in rule.get("locales", []): - status_map[status].append((locale.strip(), body)) + status_info[status]["entries"].append((locale.strip(), value)) # ── One map variable per distinct status code ────────────────────── - for status, entries in status_map.items(): - var = f"{base_var}_{status}" + for status, info in status_info.items(): + var = f"{base_var}_{status}" + entries = info["entries"] + vars_blocks.append(f"# {path} — HTTP {status}") vars_blocks.append(f"map $geoip2_region_key ${var} {{") vars_blocks.append(f' default "";') # State-level rules first (more specific) - for locale, body in entries: + for locale, val in entries: if "-" in locale: - vars_blocks.append(f' "{locale}" "{body}";') + vars_blocks.append(f' "{locale}" "{val}";') # Country-level rules second - for locale, body in entries: + for locale, val in entries: if "-" not in locale: - vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{body}";') + vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{val}";') vars_blocks.append("}") vars_blocks.append("") @@ -133,11 +174,28 @@ def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: loc_blocks.append(f"# Geo-block for {path}") loc_blocks.append(f"location ^~ {path} {{") - for status in sorted(status_map.keys()): - var = f"{base_var}_{status}" - loc_blocks.append(f' if (${var} != "") {{') - loc_blocks.append(f' return {status} "${var}";') - loc_blocks.append(f' }}') + for status in sorted(status_info.keys()): + info = status_info[status] + var = f"{base_var}_{status}" + + if info["is_file"]: + loc_blocks.append(f' error_page {status} @{var}_page;') + loc_blocks.append(f' if (${var} != "") {{') + loc_blocks.append(f' return {status};') + loc_blocks.append(f' }}') + + named_locs.append(f"# HTML error page for {path} — HTTP {status}") + named_locs.append(f"location @{var}_page {{") + named_locs.append(f" internal;") + named_locs.append(f" default_type text/html;") + named_locs.append(f" root {NGINX_PAGES_ROOT};") + named_locs.append(f" try_files {info['file']} =500;") + named_locs.append(f"}}") + named_locs.append("") + else: + loc_blocks.append(f' if (${var} != "") {{') + loc_blocks.append(f' return {status} "${var}";') + loc_blocks.append(f' }}') loc_blocks.append(PROXY_DIRECTIVES) loc_blocks.append("}") @@ -145,7 +203,7 @@ def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: maps_conf = header + "# (Region key mapping done inline in repo_vars.conf)\n" vars_conf = header + "\n".join(vars_blocks) - locs_conf = header + "\n".join(loc_blocks) + locs_conf = header + "\n".join(loc_blocks + named_locs) return maps_conf, vars_conf, locs_conf