From cf99cd50f2e8f1c599bbb2edda8913374939776c Mon Sep 17 00:00:00 2001 From: Albert Armea Date: Sat, 21 Mar 2026 18:34:50 +0000 Subject: [PATCH] Initial commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change written by Claude Sonnet 4.6: I need to set up a Git hosting service for personal projects where certain repositories need to be blocked using the locale corresponding to the user’s IP address. Here are my constraints: * The entire system will be running on a single VPS instance. I should be able to `docker compose up -d` the whole thing. * For each repository that this feature is enabled, I need to be able to set the blocked locales down to the state level, along with a custom HTTP status code and response body. * You may assume that the IP address of the request is where it actually came from — for this exercise, if the user uses a VPN to bypass the restriction, that is on them. * To simplify a reverse proxy setup, you may assume that all Git operations will happen over HTTPS. I will firewall off SSH access. * I will be using Let's Encrypt for HTTPS. Some suggestions from prior research: * nginx seems like a reasonable reverse proxy that supports all of the requirements, but you may use a different one if it is simpler to implement or maintain. * I can obtain a MaxMind API key to get a geo-IP lookup table. If you use this, you will need to add a service that automatically retrieves the table at a reasonable frequency. * Forgejo seems like a reasonable, lightweight Git service, but you may use a different one if you’re aware of one that actually supports these requirements out of the box. Write me a production-ready `docker-compose.yml` and supporting scripts or configuration scaffolding for me to implement this. --- .env.example | 12 + .gitignore | 1 + README.md | 247 +++++++++++++++ bootstrap_certs.sh | 91 ++++++ docker-compose.yml | 123 ++++++++ geo_rules.yml | 46 +++ geoblock_watcher/Dockerfile | 8 + geoblock_watcher/watcher.py | 474 +++++++++++++++++++++++++++++ nginx/Dockerfile | 56 ++++ nginx/conf.d/git.conf | 56 ++++ nginx/geoblock/repo_locations.conf | 3 + nginx/geoblock/repo_maps.conf | 7 + nginx/geoblock/repo_vars.conf | 3 + nginx/nginx.conf | 54 ++++ 14 files changed, 1181 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100644 bootstrap_certs.sh create mode 100644 docker-compose.yml create mode 100644 geo_rules.yml create mode 100644 geoblock_watcher/Dockerfile create mode 100644 geoblock_watcher/watcher.py create mode 100644 nginx/Dockerfile create mode 100644 nginx/conf.d/git.conf create mode 100644 nginx/geoblock/repo_locations.conf create mode 100644 nginx/geoblock/repo_maps.conf create mode 100644 nginx/geoblock/repo_vars.conf create mode 100644 nginx/nginx.conf diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..556e45b --- /dev/null +++ b/.env.example @@ -0,0 +1,12 @@ +# Copy this file to .env and fill in your values. +# NEVER commit .env to version control. + +# Your public domain name (used by nginx and Forgejo) +DOMAIN=git.example.com + +# MaxMind GeoLite2 credentials — sign up at https://www.maxmind.com/en/geolite2/signup +MAXMIND_ACCOUNT_ID=your_account_id +MAXMIND_LICENSE_KEY=your_license_key + +# Set to true after initial setup to prevent public registration +DISABLE_REGISTRATION=true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c49bd7 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.env diff --git a/README.md b/README.md new file mode 100644 index 0000000..4991429 --- /dev/null +++ b/README.md @@ -0,0 +1,247 @@ +# Self-hosted Git (Forgejo) with State-Level Geo-Blocking + +A single-VPS Docker Compose stack providing: + +- **Forgejo** — lightweight, Gitea-compatible Git hosting +- **nginx** — reverse proxy with TLS termination and GeoIP2 blocking +- **MaxMind GeoLite2** — IP → country + state/province database (auto-updated) +- **geoblock_watcher** — watches `geo_rules.yml` and hot-reloads nginx when rules change +- **Certbot** — automatic Let's Encrypt certificate renewal + +--- + +## Directory Layout + +``` +. +├── docker-compose.yml +├── .env.example ← copy to .env and fill in +├── geo_rules.yml ← ✏️ edit this to configure geo-blocking +├── bootstrap_certs.sh ← run once before first `docker compose up` +├── nginx/ +│ ├── Dockerfile ← builds nginx + GeoIP2 dynamic module +│ ├── nginx.conf ← main nginx config (loads GeoIP2 module) +│ ├── conf.d/ +│ │ └── git.conf ← virtual host (HTTP→HTTPS redirect + proxy) +│ └── geoblock/ ← rendered by geoblock_watcher at runtime +│ ├── repo_maps.conf +│ ├── repo_vars.conf +│ └── repo_locations.conf +└── geoblock_watcher/ + ├── Dockerfile + └── watcher.py +``` + +--- + +## Prerequisites + +| Requirement | Notes | +|---|---| +| Docker Engine ≥ 26 + Compose v2 | `docker compose version` | +| A public domain name | DNS A record → your VPS IP | +| Ports 80 and 443 open | Firewall / security group | +| MaxMind account | Free — [sign up here](https://www.maxmind.com/en/geolite2/signup) | +| `openssl` on the host | Used by `bootstrap_certs.sh` for the dummy cert | + +--- + +## Quick Start + +### 1. Configure environment + +```bash +cp .env.example .env +$EDITOR .env # fill in DOMAIN, MAXMIND_*, LETSENCRYPT_EMAIL +``` + +`.env` variables: + +| Variable | Description | +|---|---| +| `DOMAIN` | Your public domain, e.g. `git.example.com` | +| `LETSENCRYPT_EMAIL` | Email for Let's Encrypt expiry notices | +| `MAXMIND_ACCOUNT_ID` | From your MaxMind account portal | +| `MAXMIND_LICENSE_KEY` | From your MaxMind account portal | +| `DISABLE_REGISTRATION` | Set `true` after creating your admin account | + +### 2. Bootstrap TLS certificates (first run only) + +```bash +chmod +x bootstrap_certs.sh +./bootstrap_certs.sh +``` + +This will: +1. Create a temporary self-signed cert so nginx can start +2. Bring up the stack +3. Obtain a real Let's Encrypt cert via the ACME webroot challenge +4. Reload nginx with the real cert +5. Print next steps + +### 3. Complete Forgejo setup + +Visit `https://your-domain/` and complete the web installer. Create your +admin account. Then set `DISABLE_REGISTRATION=true` in `.env` and run: + +```bash +docker compose up -d forgejo +``` + +### 4. Configure geo-blocking + +Edit `geo_rules.yml` — the watcher will detect the change within seconds and +hot-reload nginx automatically. No restart needed. + +--- + +## Geo-Blocking Configuration + +`geo_rules.yml` is the single source of truth. Example: + +```yaml +repos: + + - path: /alice/secret-project + rules: + # Block California and Texas with HTTP 451 + - locales: ["US-CA", "US-TX"] + status: 451 + body: "This repository is unavailable in your jurisdiction." + + # Block all of Germany and France with HTTP 403 + - locales: ["DE", "FR"] + status: 403 + body: "Access to this repository is restricted in your country." + + - path: /alice/another-repo + rules: + - locales: ["CN", "RU"] + status: 403 + body: "Access denied." +``` + +### Locale format + +| Format | Example | Matches | +|---|---|---| +| Country (ISO 3166-1 α-2) | `"US"` | All IPs in the United States | +| Country + State (ISO 3166-2) | `"US-CA"` | IPs in California | + +State-level rules take precedence over country-level rules for the same repo. + +**Common US state codes:** `US-AL` `US-AK` `US-AZ` `US-AR` `US-CA` `US-CO` +`US-CT` `US-DE` `US-FL` `US-GA` `US-HI` `US-ID` `US-IL` `US-IN` `US-IA` +`US-KS` `US-KY` `US-LA` `US-ME` `US-MD` `US-MA` `US-MI` `US-MN` `US-MS` +`US-MO` `US-MT` `US-NE` `US-NV` `US-NH` `US-NJ` `US-NM` `US-NY` `US-NC` +`US-ND` `US-OH` `US-OK` `US-OR` `US-PA` `US-RI` `US-SC` `US-SD` `US-TN` +`US-TX` `US-UT` `US-VT` `US-VA` `US-WA` `US-WV` `US-WI` `US-WY` + +For other countries, find subdivision codes at: +https://www.iso.org/obp/ui/#search (search for the country, then see "Subdivision") + +### HTTP status codes + +| Code | Meaning | When to use | +|---|---|---| +| `403` | Forbidden | General access restriction | +| `451` | Unavailable For Legal Reasons | Legal / jurisdictional block (RFC 7725) | + +### Hot reload + +The watcher polls every 60 seconds and also reacts to inotify events +immediately. After saving `geo_rules.yml`, nginx will reload within seconds. +No traffic is dropped — nginx does a graceful configuration reload (SIGHUP). + +--- + +## GeoIP Database Updates + +The `geoipupdate` container fetches a fresh **GeoLite2-City** database every +72 hours (MaxMind publishes updates twice a week). The database is stored in +the `geoip_db` Docker volume and mounted read-only into nginx. + +nginx reads the database file at request time (not cached in memory), so a +fresh database takes effect for the next request after the file is replaced — +no nginx reload required. + +--- + +## Certificate Renewal + +The `certbot` container runs `certbot renew` every 12 hours. When a +certificate is renewed, run: + +```bash +docker compose exec nginx nginx -s reload +``` + +Or add this as a cron job on the host: + +```cron +0 */12 * * * docker compose -f /path/to/docker-compose.yml exec nginx nginx -s reload +``` + +--- + +## Operations + +### View logs + +```bash +docker compose logs -f nginx # access + error logs +docker compose logs -f geoblock_watcher +docker compose logs -f forgejo +docker compose logs -f geoipupdate +``` + +### Test geo-blocking (from a blocked region) + +Use a proxy or VPN to simulate a request from a blocked locale, or test +directly with curl overriding your IP (only works if you control nginx): + +```bash +# Verify nginx config is valid after a rules change +docker compose exec nginx nginx -t + +# Force a manual nginx reload +docker compose exec nginx nginx -s reload +``` + +### Verify the GeoIP database is loaded + +```bash +docker compose exec nginx nginx -T | grep geoip2 +``` + +### Check which database version is in use + +```bash +docker compose exec geoipupdate cat /usr/share/GeoIP/GeoLite2-City_*/COPYRIGHT_AND_LICENSE +``` + +--- + +## Security Notes + +- **SSH is disabled** in Forgejo; all Git operations use HTTPS. +- **Registration is disabled** by default after initial setup — only the admin + can create accounts. +- nginx **does not forward** `X-Forwarded-For` from downstream; it sets it + from `$remote_addr` (the actual connected IP). This is intentional — we + explicitly trust the direct connection IP as stated in the requirements. +- The `docker.sock` mount on `geoblock_watcher` is the minimum necessary + to send SIGHUP to the nginx container. If this is a concern, you can + replace it with a small privileged sidecar that only accepts a reload signal. + +--- + +## Troubleshooting + +| Symptom | Check | +|---|---| +| nginx won't start | `docker compose logs nginx` — likely a config syntax error | +| GeoIP variables always empty | Is the `geoip_db` volume populated? Check `docker compose logs geoipupdate` | +| Rules not applied | Check `docker compose logs geoblock_watcher` — look for YAML parse errors | +| Certificate errors | Ensure port 80 is open and DNS resolves before running `bootstrap_certs.sh` | +| 502 Bad Gateway | Forgejo not healthy yet — check `docker compose logs forgejo` | diff --git a/bootstrap_certs.sh b/bootstrap_certs.sh new file mode 100644 index 0000000..7d04a64 --- /dev/null +++ b/bootstrap_certs.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# bootstrap_certs.sh +# ───────────────────────────────────────────────────────────────────────────── +# Run this ONCE before `docker compose up -d` to obtain the initial Let's +# Encrypt certificate. nginx must be able to serve the ACME challenge, so we +# bring up only the services needed for that, run certbot, then download the +# Certbot recommended TLS options, and finally start everything. +# +# Prerequisites: +# • docker compose v2 installed +# • DNS for $DOMAIN already pointing to this server's IP +# • Ports 80 and 443 open in your firewall +# • .env file present (copy from .env.example and fill in) +# ───────────────────────────────────────────────────────────────────────────── + +set -euo pipefail + +if [[ ! -f .env ]]; then + echo "ERROR: .env file not found. Copy .env.example → .env and fill in your values." + exit 1 +fi + +# shellcheck disable=SC1091 +source .env + +DOMAIN="${DOMAIN:?DOMAIN must be set in .env}" +EMAIL="${LETSENCRYPT_EMAIL:?LETSENCRYPT_EMAIL must be set in .env}" +CERTS_DIR="./certs" + +echo "==> Creating certificate directory structure..." +mkdir -p "${CERTS_DIR}/live/${DOMAIN}" +mkdir -p "${CERTS_DIR}/archive" + +# ── Download Certbot recommended TLS options ────────────────────────────────── +if [[ ! -f "${CERTS_DIR}/options-ssl-nginx.conf" ]]; then + echo "==> Downloading recommended TLS options..." + curl -sSL \ + "https://raw.githubusercontent.com/certbot/certbot/master/certbot-nginx/certbot_nginx/_internal/tls_configs/options-ssl-nginx.conf" \ + -o "${CERTS_DIR}/options-ssl-nginx.conf" +fi + +if [[ ! -f "${CERTS_DIR}/ssl-dhparams.pem" ]]; then + echo "==> Downloading DH parameters..." + curl -sSL \ + "https://raw.githubusercontent.com/certbot/certbot/master/certbot/certbot/ssl-dhparams.pem" \ + -o "${CERTS_DIR}/ssl-dhparams.pem" +fi + +# ── Create a dummy certificate so nginx can start (needed for ACME challenge) ─ +DUMMY_LIVE="${CERTS_DIR}/live/${DOMAIN}" +if [[ ! -f "${DUMMY_LIVE}/fullchain.pem" ]]; then + echo "==> Generating temporary self-signed certificate..." + openssl req -x509 -nodes -newkey rsa:4096 -days 1 \ + -keyout "${DUMMY_LIVE}/privkey.pem" \ + -out "${DUMMY_LIVE}/fullchain.pem" \ + -subj "/CN=${DOMAIN}" +fi + +# ── Start nginx (and dependencies) ─────────────────────────────────────────── +echo "==> Starting nginx with temporary certificate..." +docker compose up -d nginx forgejo geoipupdate geoblock_watcher + +echo "==> Waiting for nginx to be ready..." +sleep 5 + +# ── Obtain the real certificate via webroot challenge ──────────────────────── +echo "==> Requesting Let's Encrypt certificate for ${DOMAIN}..." +docker compose run --rm certbot certonly \ + --webroot \ + --webroot-path /var/www/certbot \ + --email "${EMAIL}" \ + --agree-tos \ + --no-eff-email \ + -d "${DOMAIN}" + +# ── Reload nginx with the real certificate ──────────────────────────────────── +echo "==> Reloading nginx with the real certificate..." +docker compose exec nginx nginx -s reload + +# ── Start remaining services ────────────────────────────────────────────────── +echo "==> Starting all services..." +docker compose up -d + +echo "" +echo "✓ Bootstrap complete. Your Git service should be live at https://${DOMAIN}/" +echo "" +echo "Next steps:" +echo " 1. Visit https://${DOMAIN}/ and complete the Forgejo setup wizard." +echo " 2. Create your admin account." +echo " 3. Set DISABLE_REGISTRATION=true in .env, then: docker compose up -d forgejo" +echo " 4. Edit geo_rules.yml to configure per-repo geo-blocking." diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..fb3a3ff --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,123 @@ +services: + + # ── Forgejo ──────────────────────────────────────────────────────────────── + forgejo: + image: codeberg.org/forgejo/forgejo:9 + container_name: forgejo + restart: unless-stopped + environment: + - USER_UID=1000 + - USER_GID=1000 + - FORGEJO__server__DOMAIN=${DOMAIN} + - FORGEJO__server__ROOT_URL=https://${DOMAIN}/ + - FORGEJO__server__HTTP_PORT=3000 + - FORGEJO__server__DISABLE_SSH=true + - FORGEJO__service__DISABLE_REGISTRATION=${DISABLE_REGISTRATION:-true} + - FORGEJO__database__DB_TYPE=sqlite3 + - FORGEJO__database__PATH=/data/forgejo/forgejo.db + - FORGEJO__log__LEVEL=Info + volumes: + - forgejo_data:/data + - /etc/timezone:/etc/timezone:ro + - /etc/localtime:/etc/localtime:ro + networks: + - internal + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:3000/api/healthz"] + interval: 30s + timeout: 5s + retries: 3 + + # ── nginx (reverse proxy + GeoIP blocking) ───────────────────────────────── + nginx: + build: + context: ./nginx + dockerfile: Dockerfile + container_name: nginx + restart: unless-stopped + ports: + - "80:80" + - "443:443" + volumes: + - ./nginx/conf.d:/etc/nginx/conf.d:ro # static config fragments + - ./nginx/geoblock:/etc/nginx/geoblock:ro # rendered map snippet (written by watcher) + - ./certs/live:/etc/letsencrypt/live:ro + - ./certs/archive:/etc/letsencrypt/archive:ro + - ./certs/options-ssl-nginx.conf:/etc/letsencrypt/options-ssl-nginx.conf:ro + - ./certs/ssl-dhparams.pem:/etc/letsencrypt/ssl-dhparams.pem:ro + - certbot_webroot:/var/www/certbot:ro + - geoip_db:/usr/share/GeoIP:ro + - nginx_logs:/var/log/nginx + networks: + - internal + depends_on: + - forgejo + environment: + - DOMAIN=${DOMAIN} + healthcheck: + test: ["CMD", "nginx", "-t"] + interval: 60s + timeout: 5s + retries: 3 + + # ── MaxMind GeoIP database updater ──────────────────────────────────────── + geoipupdate: + image: ghcr.io/maxmind/geoipupdate:v7 + container_name: geoipupdate + restart: unless-stopped + environment: + - GEOIPUPDATE_ACCOUNT_ID=${MAXMIND_ACCOUNT_ID} + - GEOIPUPDATE_LICENSE_KEY=${MAXMIND_LICENSE_KEY} + - GEOIPUPDATE_EDITION_IDS=GeoLite2-City + - GEOIPUPDATE_FREQUENCY=72 # hours — MaxMind updates twice a week + - GEOIPUPDATE_DB_DIR=/usr/share/GeoIP + volumes: + - geoip_db:/usr/share/GeoIP + networks: + - internal + + # ── Geo-block config watcher ─────────────────────────────────────────────── + # Watches geo_rules.yml; re-renders the nginx map snippet and reloads nginx + # whenever rules change. + geoblock_watcher: + build: + context: ./geoblock_watcher + dockerfile: Dockerfile + container_name: geoblock_watcher + restart: unless-stopped + volumes: + - ./geo_rules.yml:/app/geo_rules.yml:ro + - ./nginx/geoblock:/app/geoblock # shared with nginx (rw here) + - /var/run/docker.sock:/var/run/docker.sock + networks: + - internal + depends_on: + - nginx + + # ── Certbot (Let's Encrypt) ──────────────────────────────────────────────── + certbot: + image: certbot/certbot:latest + container_name: certbot + restart: unless-stopped + volumes: + - ./certs:/etc/letsencrypt + - certbot_webroot:/var/www/certbot + entrypoint: > + /bin/sh -c " + trap exit TERM; + while :; do + certbot renew --webroot -w /var/www/certbot --quiet; + sleep 12h & + wait $${!}; + done + " + +volumes: + forgejo_data: + geoip_db: + certbot_webroot: + nginx_logs: + +networks: + internal: + driver: bridge diff --git a/geo_rules.yml b/geo_rules.yml new file mode 100644 index 0000000..79026fd --- /dev/null +++ b/geo_rules.yml @@ -0,0 +1,46 @@ +# geo_rules.yml +# ───────────────────────────────────────────────────────────────────────────── +# Define geo-blocking rules per repository. +# +# Each entry targets a Forgejo repository identified by its URL path +# (//). When a request for that repo (or any sub-path, e.g. +# //.git or ///raw/…) arrives from a blocked +# locale, nginx returns the configured HTTP status and body. +# +# Locale format: +# Country only : "US" (ISO 3166-1 alpha-2) +# Country+State : "US-CA" (ISO 3166-2, country + subdivision code) +# +# You can mix country-level and state-level rules in the same repo block. +# More-specific rules (state) take precedence over less-specific ones (country) +# because the watcher renders them first in the nginx map block. +# +# status: Any valid HTTP status code. 451 ("Unavailable For Legal Reasons") +# is the semantically correct choice for legal/jurisdiction blocks. +# body : Plain-text response body. Keep it short — it is embedded directly +# in the nginx config as a string literal. +# ───────────────────────────────────────────────────────────────────────────── + +repos: + + - path: /alice/secret-project + rules: + - locales: ["US-CA", "US-TX"] + status: 451 + body: "This repository is unavailable in your jurisdiction." + - locales: ["DE", "FR"] + status: 403 + body: "Access to this repository is restricted in your country." + + - path: /alice/another-repo + rules: + - locales: ["CN", "RU"] + status: 403 + body: "Access denied." + + # Template — copy and fill in for each additional repo: + # - path: /owner/repo-name + # rules: + # - locales: ["XX", "XX-YY"] + # status: 403 + # body: "Access restricted." diff --git a/geoblock_watcher/Dockerfile b/geoblock_watcher/Dockerfile new file mode 100644 index 0000000..b3966ec --- /dev/null +++ b/geoblock_watcher/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.13-alpine + +RUN pip install --no-cache-dir pyyaml watchdog docker + +WORKDIR /app +COPY watcher.py . + +CMD ["python", "-u", "watcher.py"] diff --git a/geoblock_watcher/watcher.py b/geoblock_watcher/watcher.py new file mode 100644 index 0000000..d7c12f3 --- /dev/null +++ b/geoblock_watcher/watcher.py @@ -0,0 +1,474 @@ +#!/usr/bin/env python3 +""" +geoblock_watcher.py +──────────────────────────────────────────────────────────────────────────── +Watches geo_rules.yml for changes, renders three nginx config snippets into +/app/geoblock/, then signals the nginx container to reload its configuration. + +Rendered files +────────────── +repo_maps.conf + A single nginx `map` block body that maps the compound GeoIP key + ("CC-SUBDIV") → a per-repo decision token. This file is included + inside the existing map block in nginx.conf. + +repo_vars.conf + One `map` block per repo that translates the decision token to the + final "$geoblock_" variable value ("" = allow, or "status:body"). + +repo_locations.conf + One `location` block per repo. When the variable is non-empty the + block immediately returns the encoded status + body; otherwise the + request falls through to the main proxy_pass location. +""" + +import hashlib +import logging +import os +import re +import signal +import sys +import time +from pathlib import Path +from typing import Any + +import docker +import yaml +from watchdog.events import FileSystemEventHandler +from watchdog.observers import Observer + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [watcher] %(levelname)s %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S", + stream=sys.stdout, +) +log = logging.getLogger(__name__) + +RULES_FILE = Path("/app/geo_rules.yml") +OUTPUT_DIR = Path("/app/geoblock") +NGINX_CONTAINER = os.environ.get("NGINX_CONTAINER_NAME", "nginx") + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +def _var_name(repo_path: str) -> str: + """Convert a repo path like /alice/my-repo → geoblock_alice_my_repo.""" + sanitised = re.sub(r"[^a-zA-Z0-9]", "_", repo_path.strip("/")) + return f"geoblock_{sanitised}" + + +def _escape_body(body: str) -> str: + """Escape a string for safe embedding in an nginx config string literal.""" + return body.replace("\\", "\\\\").replace('"', '\\"').replace("'", "\\'").replace("\n", " ") + + +def _token(repo_index: int, rule_index: int) -> str: + """Unique short token used to link the map blocks together.""" + return f"repo{repo_index}_rule{rule_index}" + + +# ── Renderer ────────────────────────────────────────────────────────────────── + +def render(rules_data: dict[str, Any]) -> tuple[str, str, str]: + """ + Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf) as strings. + """ + repos: list[dict] = rules_data.get("repos", []) + + maps_lines: list[str] = [ + "# Generated by geoblock_watcher — do not edit manually.", + "# Included inside the map block in nginx.conf.", + "", + ] + + vars_lines: list[str] = [ + "# Generated by geoblock_watcher — do not edit manually.", + "", + ] + + loc_lines: list[str] = [ + "# Generated by geoblock_watcher — do not edit manually.", + "", + ] + + for ri, repo in enumerate(repos): + path: str = repo["path"].rstrip("/") + var: str = _var_name(path) + rules: list[dict] = repo.get("rules", []) + + # ── Map block: region key → token ───────────────────────────────────── + # Build a mapping from locale → token. More-specific (state-level) + # rules are added first so nginx map "first match" semantics apply. + state_entries: list[str] = [] + country_entries: list[str] = [] + + for rj, rule in enumerate(rules): + tok = _token(ri, rj) + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) + value = f"{status}:{body}" + + for locale in rule.get("locales", []): + locale = locale.strip() + key = f'"{locale}"' + entry = f" {key:<20} {tok!r}_{ri}_{rj};" + if "-" in locale: + state_entries.append(entry) + else: + # Country-only key — pad subdivision with empty string so + # it matches both "CC-" (no subdivision) and we also add + # a regex fallback below. + country_entries.append(entry) + + # Emit the per-rule value variable (token → "status:body") + vars_lines.append(f"# {path} — rule {rj}: {rule.get('locales', [])}") + vars_lines.append(f'map $geoip2_region_key ${var}_r{rj} {{') + vars_lines.append(f' default "";') + + for locale in rule.get("locales", []): + locale = locale.strip() + if "-" in locale: + # State-level: exact match on "CC-SUBDIV" + vars_lines.append(f' "{locale}" "{value}";') + else: + # Country-level: match any subdivision of this country + vars_lines.append(f' ~^{re.escape(locale)}- "{value}";') + # Also match when subdivision is absent ("CC-") + vars_lines.append(f' "{locale}-" "{value}";') + + vars_lines.append("}") + vars_lines.append("") + + # Aggregate rule variables into the final per-repo variable. + # The first non-empty rule variable wins. + rule_vars = [f"${var}_r{rj}" for rj in range(len(rules))] + vars_lines.append(f"# Final decision variable for {path}") + vars_lines.append(f"map $geoip2_region_key ${var} {{") + vars_lines.append(f' default "";') + + for locale_list, status_body in _aggregate_locales(rules): + for locale in locale_list: + if "-" in locale: + vars_lines.append(f' "{locale}" "{status_body}";') + else: + vars_lines.append(f' ~^{re.escape(locale)}- "{status_body}";') + vars_lines.append(f' "{locale}-" "{status_body}";') + + vars_lines.append("}") + vars_lines.append("") + + # ── Location block ──────────────────────────────────────────────────── + # Intercept // and any sub-paths. + # nginx location matching: we use a case-sensitive prefix match. + # Git HTTPS also accesses //.git — covered by the prefix. + loc_lines.append(f"# Geo-block for {path}") + loc_lines.append(f"location ^~ {path} {{") + loc_lines.append(f" if (${var} != \"\") {{") + # Split "status:body" at runtime using map — but nginx `if` can't do + # string splitting, so we embed status and body as separate variables. + # We use a nested map approach: the decision var encodes both, and we + # resolve them with two additional map lookups. + loc_lines.append(f" set $__status ${var}_status;") + loc_lines.append(f" set $__body ${var}_body;") + loc_lines.append(f" return $__status \"$__body\";") + loc_lines.append(f" }}") + loc_lines.append(f" # No block — fall through to main proxy") + loc_lines.append(f" proxy_pass http://forgejo:3000;") + loc_lines.append(f" proxy_set_header Host $host;") + loc_lines.append(f" proxy_set_header X-Real-IP $remote_addr;") + loc_lines.append(f" proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;") + loc_lines.append(f" proxy_set_header X-Forwarded-Proto $scheme;") + loc_lines.append(f" client_max_body_size 512m;") + loc_lines.append(f" proxy_request_buffering off;") + loc_lines.append(f" proxy_buffering off;") + loc_lines.append(f" proxy_read_timeout 600s;") + loc_lines.append(f" proxy_send_timeout 600s;") + loc_lines.append(f"}}") + loc_lines.append("") + + # Status and body split maps for this repo + vars_lines.append(f"# Status / body split maps for {path}") + vars_lines.append(f"map ${var} ${var}_status {{") + vars_lines.append(f' default 403;') + for locale_list, status_body in _aggregate_locales(rules): + status = status_body.split(":", 1)[0] + for locale in locale_list: + if "-" in locale: + vars_lines.append(f' "{status_body}" {status};') + break + else: + vars_lines.append(f' "~^{re.escape(status_body)}" {status};') + # Simpler: map the encoded value directly + vars_lines = _replace_split_maps(vars_lines, var, rules) + vars_lines.append("") + + return ( + "\n".join(maps_lines), + "\n".join(vars_lines), + "\n".join(loc_lines), + ) + + +def _aggregate_locales(rules: list[dict]) -> list[tuple[list[str], str]]: + """Return [(locale_list, 'status:body'), …] for all rules.""" + result = [] + for rule in rules: + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) + result.append((rule.get("locales", []), f"{status}:{body}")) + return result + + +def _replace_split_maps(vars_lines: list[str], var: str, rules: list[dict]) -> list[str]: + """ + Replace the incomplete split-map stubs with correct status+body maps. + We rebuild the tail of vars_lines for the current repo. + """ + # Remove any partial split map lines we may have added above + while vars_lines and ( + vars_lines[-1].startswith(f"map ${var}_status") or + vars_lines[-1].startswith(f" ") or + vars_lines[-1] in ("}", "") + ): + last = vars_lines[-1] + vars_lines.pop() + if last.startswith(f"map ${var}_status"): + break + + # Status map + vars_lines.append(f"map ${var} ${var}_status {{") + vars_lines.append(f' default 403;') + seen_sv: set[str] = set() + for rule in rules: + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) + encoded = f"{status}:{body}" + if encoded not in seen_sv: + vars_lines.append(f' "{encoded}" {status};') + seen_sv.add(encoded) + vars_lines.append("}") + vars_lines.append("") + + # Body map + vars_lines.append(f"map ${var} ${var}_body {{") + vars_lines.append(f' default "Blocked";') + seen_bv: set[str] = set() + for rule in rules: + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) + encoded = f"{status}:{body}" + if encoded not in seen_bv: + vars_lines.append(f' "{encoded}" "{body}";') + seen_bv.add(encoded) + vars_lines.append("}") + + return vars_lines + + +# ── Clean renderer (replaces the incremental one above) ─────────────────────── + +def render_clean(rules_data: dict[str, Any]) -> tuple[str, str, str]: + """ + Cleanly render all three config files. + Returns (repo_maps_conf, repo_vars_conf, repo_locations_conf). + """ + repos: list[dict] = rules_data.get("repos", []) + + header = "# Generated by geoblock_watcher — do not edit manually.\n\n" + + vars_blocks: list[str] = [] + loc_blocks: list[str] = [] + + for repo in repos: + path: str = repo["path"].rstrip("/") + var: str = _var_name(path) + rules: list[dict] = repo.get("rules", []) + + # ── Per-repo decision map ────────────────────────────────────────────── + # Maps the compound GeoIP region key to "status:escapedBody" or "". + vars_blocks.append(f"# Decision map for {path}") + vars_blocks.append(f"map $geoip2_region_key ${var} {{") + vars_blocks.append(f' default "";') + + # State-level rules first (more specific → rendered first) + for rule in rules: + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) + encoded = f"{status}:{body}" + for locale in rule.get("locales", []): + locale = locale.strip() + if "-" in locale: + vars_blocks.append(f' "{locale}" "{encoded}";') + + # Country-level rules second + for rule in rules: + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) + encoded = f"{status}:{body}" + for locale in rule.get("locales", []): + locale = locale.strip() + if "-" not in locale: + # nginx map supports regex; match "CC-" and "CC-" + vars_blocks.append(f' "~^{re.escape(locale)}(-|$)" "{encoded}";') + + vars_blocks.append("}") + vars_blocks.append("") + + # ── Status split map ─────────────────────────────────────────────────── + vars_blocks.append(f"map ${var} ${var}_status {{") + vars_blocks.append(f" default 403;") + seen: set[str] = set() + for rule in rules: + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) + encoded = f"{status}:{body}" + if encoded not in seen: + vars_blocks.append(f' "{encoded}" {status};') + seen.add(encoded) + vars_blocks.append("}") + vars_blocks.append("") + + # ── Body split map ───────────────────────────────────────────────────── + vars_blocks.append(f"map ${var} ${var}_body {{") + vars_blocks.append(f' default "Blocked";') + seen = set() + for rule in rules: + status = int(rule["status"]) + body = _escape_body(str(rule.get("body", "Blocked"))) + encoded = f"{status}:{body}" + if encoded not in seen: + vars_blocks.append(f' "{encoded}" "{body}";') + seen.add(encoded) + vars_blocks.append("}") + vars_blocks.append("") + + # ── Location block ───────────────────────────────────────────────────── + loc_blocks.append(f"# Geo-block for {path}") + loc_blocks.append(f"location ^~ {path} {{") + loc_blocks.append(f" if (${var} != \"\") {{") + loc_blocks.append(f" return ${var}_status \"${var}_body\";") + loc_blocks.append(f" }}") + loc_blocks.append(f" proxy_pass http://forgejo:3000;") + loc_blocks.append(f" proxy_set_header Host $host;") + loc_blocks.append(f" proxy_set_header X-Real-IP $remote_addr;") + loc_blocks.append(f" proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;") + loc_blocks.append(f" proxy_set_header X-Forwarded-Proto $scheme;") + loc_blocks.append(f" client_max_body_size 512m;") + loc_blocks.append(f" proxy_request_buffering off;") + loc_blocks.append(f" proxy_buffering off;") + loc_blocks.append(f" proxy_read_timeout 600s;") + loc_blocks.append(f" proxy_send_timeout 600s;") + loc_blocks.append(f"}}") + loc_blocks.append("") + + # repo_maps.conf is now empty (we use inline regex maps in repo_vars.conf) + maps_conf = header + "# (Region key mapping is now done inline in repo_vars.conf)\n" + vars_conf = header + "\n".join(vars_blocks) + locs_conf = header + "\n".join(loc_blocks) + + return maps_conf, vars_conf, locs_conf + + +# ── Writer & nginx reload ───────────────────────────────────────────────────── + +_last_hash: str = "" + + +def _file_hash(path: Path) -> str: + return hashlib.sha256(path.read_bytes()).hexdigest() + + +def apply_rules(force: bool = False) -> None: + global _last_hash + + if not RULES_FILE.exists(): + log.warning("Rules file not found: %s — skipping.", RULES_FILE) + return + + current_hash = _file_hash(RULES_FILE) + if not force and current_hash == _last_hash: + log.debug("Rules file unchanged — nothing to do.") + return + + log.info("Rules file changed — re-rendering nginx config snippets.") + + try: + rules_data = yaml.safe_load(RULES_FILE.read_text()) or {} + except yaml.YAMLError as exc: + log.error("YAML parse error in %s: %s — skipping reload.", RULES_FILE, exc) + return + + try: + maps_conf, vars_conf, locs_conf = render_clean(rules_data) + except Exception as exc: # noqa: BLE001 + log.error("Render error: %s — skipping reload.", exc, exc_info=True) + return + + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + (OUTPUT_DIR / "repo_maps.conf").write_text(maps_conf) + (OUTPUT_DIR / "repo_vars.conf").write_text(vars_conf) + (OUTPUT_DIR / "repo_locations.conf").write_text(locs_conf) + + log.info("Config snippets written to %s.", OUTPUT_DIR) + + _reload_nginx() + _last_hash = current_hash + + +def _reload_nginx() -> None: + """Send SIGHUP to the nginx container to trigger a graceful config reload.""" + try: + client = docker.from_env() + containers = client.containers.list(filters={"name": NGINX_CONTAINER}) + if not containers: + log.warning("nginx container '%s' not found — skipping reload.", NGINX_CONTAINER) + return + container = containers[0] + container.kill(signal="HUP") + log.info("Sent SIGHUP to nginx container '%s'.", container.name) + except Exception as exc: # noqa: BLE001 + log.error("Failed to reload nginx: %s", exc, exc_info=True) + + +# ── Watchdog ────────────────────────────────────────────────────────────────── + +class RulesHandler(FileSystemEventHandler): + def on_modified(self, event): + if Path(event.src_path).resolve() == RULES_FILE.resolve(): + log.info("Detected change in %s.", RULES_FILE) + time.sleep(0.2) # debounce + apply_rules() + + # on_created handles the case where the file is replaced atomically + on_created = on_modified + + +def main() -> None: + log.info("geoblock_watcher starting. Watching %s", RULES_FILE) + + # Initial render on startup + apply_rules(force=True) + + observer = Observer() + observer.schedule(RulesHandler(), str(RULES_FILE.parent), recursive=False) + observer.start() + + def _shutdown(signum, frame): # noqa: ANN001 + log.info("Shutting down.") + observer.stop() + sys.exit(0) + + signal.signal(signal.SIGTERM, _shutdown) + signal.signal(signal.SIGINT, _shutdown) + + try: + while True: + time.sleep(60) + apply_rules() # Periodic re-check (catches missed inotify events) + finally: + observer.join() + + +if __name__ == "__main__": + main() diff --git a/nginx/Dockerfile b/nginx/Dockerfile new file mode 100644 index 0000000..6fe4f59 --- /dev/null +++ b/nginx/Dockerfile @@ -0,0 +1,56 @@ +# Build nginx with the ngx_http_geoip2_module baked in. +# The official nginx image ships without GeoIP2 support; we compile the +# dynamic module against the same nginx version used in the base image. + +ARG NGINX_VERSION=1.27.4 + +FROM nginx:${NGINX_VERSION}-alpine AS builder + +ARG NGINX_VERSION + +RUN apk add --no-cache \ + build-base \ + git \ + libmaxminddb-dev \ + pcre2-dev \ + openssl-dev \ + zlib-dev \ + linux-headers + +# Clone the GeoIP2 nginx module at the tag matching the installed nginx +RUN git clone --depth 1 \ + https://github.com/leev/ngx_http_geoip2_module.git \ + /usr/src/ngx_http_geoip2_module + +# Fetch the nginx source matching the base image version +RUN wget -q "http://nginx.org/download/nginx-${NGINX_VERSION}.tar.gz" \ + -O /usr/src/nginx.tar.gz \ + && tar -xzf /usr/src/nginx.tar.gz -C /usr/src + +WORKDIR /usr/src/nginx-${NGINX_VERSION} + +# Build only the dynamic module (configure flags from `nginx -V`) +RUN eval $(nginx -V 2>&1 | grep 'configure arguments:' | sed 's/configure arguments://') && \ + ./configure \ + $configure_args \ + --add-dynamic-module=/usr/src/ngx_http_geoip2_module \ + && make modules + +# ── Runtime image ───────────────────────────────────────────────────────────── +FROM nginx:${NGINX_VERSION}-alpine + +# Runtime dependency for MaxMind DB +RUN apk add --no-cache libmaxminddb + +# Copy the compiled dynamic module +COPY --from=builder \ + /usr/src/nginx-${NGINX_VERSION}/objs/ngx_http_geoip2_module.so \ + /usr/lib/nginx/modules/ngx_http_geoip2_module.so + +# Main nginx config (loads the dynamic module at the top level) +COPY nginx.conf /etc/nginx/nginx.conf + +# GeoIP map directory (populated by geoblock_watcher at runtime) +RUN mkdir -p /etc/nginx/geoblock + +EXPOSE 80 443 diff --git a/nginx/conf.d/git.conf b/nginx/conf.d/git.conf new file mode 100644 index 0000000..e74c84e --- /dev/null +++ b/nginx/conf.d/git.conf @@ -0,0 +1,56 @@ +# HTTP → HTTPS redirect + ACME challenge +server { + listen 80; + listen [::]:80; + server_name ${DOMAIN}; + + # Let's Encrypt webroot challenge + location /.well-known/acme-challenge/ { + root /var/www/certbot; + } + + location / { + return 301 https://$host$request_uri; + } +} + +# HTTPS — main entry point +server { + listen 443 ssl; + listen [::]:443 ssl; + http2 on; + server_name ${DOMAIN}; + + # ── TLS ─────────────────────────────────────────────────────────────────── + ssl_certificate /etc/letsencrypt/live/${DOMAIN}/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/${DOMAIN}/privkey.pem; + include /etc/letsencrypt/options-ssl-nginx.conf; + ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; + + # ── Security headers ────────────────────────────────────────────────────── + add_header Strict-Transport-Security "max-age=63072000; includeSubDomains" always; + add_header X-Frame-Options SAMEORIGIN always; + add_header X-Content-Type-Options nosniff always; + add_header Referrer-Policy strict-origin-when-cross-origin always; + + # ── Geo-block check ─────────────────────────────────────────────────────── + # The watcher renders a location block per repo that checks the per-repo + # variable and returns the configured status + body when blocked. + include /etc/nginx/geoblock/repo_locations.conf; + + # ── Proxy to Forgejo ────────────────────────────────────────────────────── + location / { + proxy_pass http://forgejo:3000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Git push/pull can involve large objects + client_max_body_size 512m; + proxy_request_buffering off; + proxy_buffering off; + proxy_read_timeout 600s; + proxy_send_timeout 600s; + } +} diff --git a/nginx/geoblock/repo_locations.conf b/nginx/geoblock/repo_locations.conf new file mode 100644 index 0000000..46b398f --- /dev/null +++ b/nginx/geoblock/repo_locations.conf @@ -0,0 +1,3 @@ +# Managed by geoblock_watcher. Do not edit manually. +# Per-repo location blocks that intercept requests and return the block +# response when the geo decision variable is non-empty. diff --git a/nginx/geoblock/repo_maps.conf b/nginx/geoblock/repo_maps.conf new file mode 100644 index 0000000..4c28975 --- /dev/null +++ b/nginx/geoblock/repo_maps.conf @@ -0,0 +1,7 @@ +# This file is managed by geoblock_watcher. Do not edit manually. +# It maps the compound "$geoip2_country-$geoip2_subdivision" key to a +# per-repo decision variable value. +# +# Format (rendered by watcher): +# "US-CA" "403:Access denied from your region."; +# "US-TX" "451:Unavailable For Legal Reasons."; diff --git a/nginx/geoblock/repo_vars.conf b/nginx/geoblock/repo_vars.conf new file mode 100644 index 0000000..49a90cd --- /dev/null +++ b/nginx/geoblock/repo_vars.conf @@ -0,0 +1,3 @@ +# Managed by geoblock_watcher. Do not edit manually. +# Each enabled repo gets a map variable that resolves to "" (allow) +# or ":" (block) based on the compound region key. diff --git a/nginx/nginx.conf b/nginx/nginx.conf new file mode 100644 index 0000000..422f84a --- /dev/null +++ b/nginx/nginx.conf @@ -0,0 +1,54 @@ +load_module /usr/lib/nginx/modules/ngx_http_geoip2_module.so; + +user nginx; +worker_processes auto; + +error_log /var/log/nginx/error.log notice; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # ── Logging ─────────────────────────────────────────────────────────────── + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" country=$geoip2_country iso=$geoip2_subdivision'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + tcp_nopush on; + keepalive_timeout 65; + + # ── GeoIP2 database ─────────────────────────────────────────────────────── + # The GeoLite2-City database gives us country + subdivision (state/province) + geoip2 /usr/share/GeoIP/GeoLite2-City.mmdb { + # ISO 3166-1 alpha-2 country code (e.g. "US") + $geoip2_country country iso_code; + # ISO 3166-2 subdivision code — country prefix stripped below + # Full value looks like "US-CA"; we expose just the subdivision part + $geoip2_subdivision subdivisions 0 iso_code; + } + + # Compound key used in per-repo map blocks: "CC-SUBDIV" e.g. "US-CA" + # When the DB has no subdivision the variable is empty; the key becomes "CC-" + # which will not match any rule unless you explicitly add it. + map "$geoip2_country-$geoip2_subdivision" $geoip2_region_key { + default ""; + include /etc/nginx/geoblock/repo_maps.conf; + } + + # ── Per-repo block decision variables ───────────────────────────────────── + # Loaded from the rendered snippet produced by geoblock_watcher. + # Each repo gets a variable like $geoblock_ + # with value "" (allow) or ":" (block). + include /etc/nginx/geoblock/repo_vars.conf; + + # ── Virtual hosts ───────────────────────────────────────────────────────── + include /etc/nginx/conf.d/*.conf; +}