From 08d61b7ac72ce30d673e53de1f4a475d6cc05d9a Mon Sep 17 00:00:00 2001 From: Albert Armea Date: Sun, 22 Mar 2026 02:58:27 +0000 Subject: [PATCH] Add support for HTML responses when blocked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change written by Claude Code: ❯ Consider the geofencing rules defined at and consumed by . Make it so that you can pass in an HTML file response instead of a short plain text body. For this exercise, you may assume that the entire contents of the HTML (HTML, CSS, JS, image resources) will be included inline in the file. You may have to modify the to provide a new (read-only) bind-mount for these files. --- config/geo_rules.yml.example | 6 ++ config/geoblock_pages/.gitignore | 3 + docker-compose.yml | 2 + geoblock_watcher/watcher.py | 114 +++++++++++++++++++++++-------- 4 files changed, 97 insertions(+), 28 deletions(-) create mode 100644 config/geoblock_pages/.gitignore diff --git a/config/geo_rules.yml.example b/config/geo_rules.yml.example index 79026fd..5b88258 100644 --- a/config/geo_rules.yml.example +++ b/config/geo_rules.yml.example @@ -19,6 +19,11 @@ # is the semantically correct choice for legal/jurisdiction blocks. # body : Plain-text response body. Keep it short — it is embedded directly # in the nginx config as a string literal. +# body_file : Path to a self-contained HTML file (relative to config/geoblock_pages/) +# served as the response body instead of a plain-text string. The +# file must be self-contained (all CSS, JS, and image resources +# inlined) because it is served directly by nginx. +# Use either body or body_file — not both — for a given rule. # ───────────────────────────────────────────────────────────────────────────── repos: @@ -44,3 +49,4 @@ repos: # - locales: ["XX", "XX-YY"] # status: 403 # body: "Access restricted." + # # body_file: blocked.html # OR: HTML file in config/geoblock_pages/ diff --git a/config/geoblock_pages/.gitignore b/config/geoblock_pages/.gitignore new file mode 100644 index 0000000..deb8ab0 --- /dev/null +++ b/config/geoblock_pages/.gitignore @@ -0,0 +1,3 @@ +# This directory is for site-specific pages that should not be checked in +* +!.gitignore diff --git a/docker-compose.yml b/docker-compose.yml index 3c0c489..4e2d590 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,6 +44,7 @@ services: # container start, substituting ${DOMAIN}. Mounting conf.d from the host # would shadow that rendered output and break the virtual host. - ./nginx/geoblock:/etc/nginx/geoblock:ro # rendered by geoblock_watcher + - ./config/geoblock_pages:/etc/nginx/geoblock_pages:ro # HTML block pages - ./certs/live:/etc/letsencrypt/live:ro - ./certs/archive:/etc/letsencrypt/archive:ro - ./certs/options-ssl-nginx.conf:/etc/letsencrypt/options-ssl-nginx.conf:ro @@ -90,6 +91,7 @@ services: restart: unless-stopped volumes: - ./config:/app/host:ro + - ./config/geoblock_pages:/app/geoblock_pages:ro # HTML block pages (validation) - ./nginx/geoblock:/app/geoblock # shared with nginx (rw here) - /var/run/docker.sock:/var/run/docker.sock networks: diff --git a/geoblock_watcher/watcher.py b/geoblock_watcher/watcher.py index e69ea33..7ec9bd5 100644 --- a/geoblock_watcher/watcher.py +++ b/geoblock_watcher/watcher.py @@ -41,9 +41,11 @@ logging.basicConfig( ) log = logging.getLogger(__name__) -RULES_FILE = Path("/app/host/geo_rules.yml") -OUTPUT_DIR = Path("/app/geoblock") -NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx") +RULES_FILE = Path("/app/host/geo_rules.yml") +OUTPUT_DIR = Path("/app/geoblock") +GEOBLOCK_PAGES_DIR = Path("/app/geoblock_pages") +NGINX_PAGES_ROOT = "/etc/nginx/geoblock_pages" +NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx") PROXY_DIRECTIVES = """\ proxy_pass http://forgejo:3000; @@ -78,6 +80,12 @@ def _escape_body(body: str) -> str: # ── Renderer ────────────────────────────────────────────────────────────────── +def _normalize_file_path(body_file: str) -> str: + """Ensure the path starts with / for use in nginx try_files.""" + path = body_file.strip() + return path if path.startswith("/") else f"/{path}" + + def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: """ Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf). @@ -85,46 +93,79 @@ def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: For each repo we emit: • One map per distinct status code: map $geoip2_region_key $geoblock__ { ... } - Value is the escaped body string when blocked, "" otherwise. - • One location block with one `if` per distinct status code: - if ($geoblock__ != "") { return "...body..."; } + For text body rules, the value is the escaped body string when blocked. + For body_file rules, the value is "1" when blocked (a flag). + • One location block with one `if` per distinct status code. + Text body: if ($var != "") { return "$var"; } + File body: error_page @_page; + if ($var != "") { return ; } + • For body_file rules, a named location outside the repo location block: + location @_page { internal; root ...; try_files =500; } """ repos: list[dict] = rules_data.get("repos", []) header = "# Generated by geoblock_watcher — do not edit manually.\n\n" - vars_blocks: list[str] = [] - loc_blocks: list[str] = [] + vars_blocks: list[str] = [] + loc_blocks: list[str] = [] + named_locs: list[str] = [] # file-serving named locations (server-level) for repo in repos: - path: str = repo["path"].rstrip("/") - base_var: str = _var_name(path) + path: str = repo["path"].rstrip("/") + base_var: str = _var_name(path) rules: list[dict] = repo.get("rules", []) - # Collect (locale, status, body) triples; group by status code - # status_map: {status_int: [(locale, body_escaped), ...]} - status_map: dict[int, list[tuple[str, str]]] = defaultdict(list) + # status_info: status -> {"is_file": bool, "file": str, "entries": [(locale, val)]} + status_info: dict[int, dict] = {} + for rule in rules: - status = int(rule["status"]) - body = _escape_body(str(rule.get("body", "Blocked"))) + status = int(rule["status"]) + is_file = "body_file" in rule + + if status not in status_info: + status_info[status] = { + "is_file": is_file, + "file": _normalize_file_path(rule["body_file"]) if is_file else "", + "entries": [], + } + else: + existing_is_file = status_info[status]["is_file"] + if existing_is_file != is_file: + log.warning( + "%s: status %d has mixed body/body_file rules — " + "treating all as %s.", + path, status, "body_file" if existing_is_file else "body", + ) + + if is_file: + if GEOBLOCK_PAGES_DIR.exists(): + full = GEOBLOCK_PAGES_DIR / rule["body_file"].lstrip("/") + if not full.exists(): + log.warning("body_file not found: %s", full) + value = "1" + else: + value = _escape_body(str(rule.get("body", "Blocked"))) + for locale in rule.get("locales", []): - status_map[status].append((locale.strip(), body)) + status_info[status]["entries"].append((locale.strip(), value)) # ── One map variable per distinct status code ────────────────────── - for status, entries in status_map.items(): - var = f"{base_var}_{status}" + for status, info in status_info.items(): + var = f"{base_var}_{status}" + entries = info["entries"] + vars_blocks.append(f"# {path} — HTTP {status}") vars_blocks.append(f"map $geoip2_region_key ${var} {{") vars_blocks.append(f' default "";') # State-level rules first (more specific) - for locale, body in entries: + for locale, val in entries: if "-" in locale: - vars_blocks.append(f' "{locale}" "{body}";') + vars_blocks.append(f' "{locale}" "{val}";') # Country-level rules second - for locale, body in entries: + for locale, val in entries: if "-" not in locale: - vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{body}";') + vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{val}";') vars_blocks.append("}") vars_blocks.append("") @@ -133,11 +174,28 @@ def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: loc_blocks.append(f"# Geo-block for {path}") loc_blocks.append(f"location ^~ {path} {{") - for status in sorted(status_map.keys()): - var = f"{base_var}_{status}" - loc_blocks.append(f' if (${var} != "") {{') - loc_blocks.append(f' return {status} "${var}";') - loc_blocks.append(f' }}') + for status in sorted(status_info.keys()): + info = status_info[status] + var = f"{base_var}_{status}" + + if info["is_file"]: + loc_blocks.append(f' error_page {status} @{var}_page;') + loc_blocks.append(f' if (${var} != "") {{') + loc_blocks.append(f' return {status};') + loc_blocks.append(f' }}') + + named_locs.append(f"# HTML error page for {path} — HTTP {status}") + named_locs.append(f"location @{var}_page {{") + named_locs.append(f" internal;") + named_locs.append(f" default_type text/html;") + named_locs.append(f" root {NGINX_PAGES_ROOT};") + named_locs.append(f" try_files {info['file']} =500;") + named_locs.append(f"}}") + named_locs.append("") + else: + loc_blocks.append(f' if (${var} != "") {{') + loc_blocks.append(f' return {status} "${var}";') + loc_blocks.append(f' }}') loc_blocks.append(PROXY_DIRECTIVES) loc_blocks.append("}") @@ -145,7 +203,7 @@ def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: maps_conf = header + "# (Region key mapping done inline in repo_vars.conf)\n" vars_conf = header + "\n".join(vars_blocks) - locs_conf = header + "\n".join(loc_blocks) + locs_conf = header + "\n".join(loc_blocks + named_locs) return maps_conf, vars_conf, locs_conf