Initial commit, simple proxy implementation
This commit is contained in:
23
.gitignore
vendored
Normal file
23
.gitignore
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
# Editors
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
# Python
|
||||
.venv*/
|
||||
venv*/
|
||||
__pycache__/
|
||||
dist/
|
||||
|
||||
# Unit tests
|
||||
.coverage*
|
||||
htmlcov/
|
||||
.tox/
|
||||
|
||||
# Docs
|
||||
docs/_build/
|
||||
|
||||
# Our's
|
||||
Dockerfile
|
||||
.env
|
||||
./static/*.css
|
||||
./static/*.css.map
|
265
app.py
Normal file
265
app.py
Normal file
@@ -0,0 +1,265 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import ipaddress
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import socket
|
||||
import time
|
||||
from functools import lru_cache
|
||||
from urllib.parse import quote, unquote, urljoin, urlparse, urlsplit, urlunsplit
|
||||
|
||||
import requests
|
||||
from flask import Flask, Response, abort, make_response, request
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
# -------- Config --------
|
||||
ALLOW_PRIVATE = bool(int(os.getenv("ALLOW_PRIVATE", "0")))
|
||||
DEFAULT_TIMEOUT = int(os.getenv("DEFAULT_TIMEOUT", "15"))
|
||||
MAX_BYTES = int(os.getenv("MAX_BYTES", str(10 * 1024 * 1024)))
|
||||
MAX_RETRIES = int(os.getenv("MAX_RETRIES", "3"))
|
||||
MAX_REDIRECTS = int(os.getenv("MAX_REDIRECTS", "5"))
|
||||
RETRYABLE_CODES = {429, 500, 502, 503, 504}
|
||||
UA_POOL = [
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
||||
]
|
||||
SAFE_ECHO = (
|
||||
"Content-Type",
|
||||
"Cache-Control",
|
||||
"Expires",
|
||||
"Last-Modified",
|
||||
"ETag",
|
||||
"Content-Disposition",
|
||||
)
|
||||
|
||||
# -------- App & session --------
|
||||
app = Flask(__name__)
|
||||
_session = requests.Session()
|
||||
_session.mount(
|
||||
"http://",
|
||||
HTTPAdapter(
|
||||
max_retries=Retry(
|
||||
total=MAX_RETRIES,
|
||||
status_forcelist=RETRYABLE_CODES,
|
||||
allowed_methods=frozenset(["GET"]),
|
||||
backoff_factor=1.0,
|
||||
respect_retry_after_header=True,
|
||||
raise_on_status=False,
|
||||
),
|
||||
pool_maxsize=50,
|
||||
),
|
||||
)
|
||||
_session.mount(
|
||||
"https://",
|
||||
HTTPAdapter(
|
||||
max_retries=Retry(
|
||||
total=MAX_RETRIES,
|
||||
status_forcelist=RETRYABLE_CODES,
|
||||
allowed_methods=frozenset(["GET"]),
|
||||
backoff_factor=1.0,
|
||||
respect_retry_after_header=True,
|
||||
raise_on_status=False,
|
||||
),
|
||||
pool_maxsize=50,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# -------- Small helpers (compact but explicit) --------
|
||||
def _raw_site():
|
||||
# Preserve '+' by reading raw query string; take everything after first 'site='
|
||||
qs = request.query_string.decode("latin-1", "ignore")
|
||||
i = qs.find("site=")
|
||||
return None if i == -1 else unquote(qs[i + 5 :])
|
||||
|
||||
|
||||
def _normalize(u: str) -> str:
|
||||
p = urlsplit(u)
|
||||
if not p.scheme or not p.netloc:
|
||||
return u
|
||||
return urlunsplit(
|
||||
(
|
||||
p.scheme,
|
||||
p.netloc,
|
||||
quote(p.path or "/", safe="/%:@"),
|
||||
quote(p.query or "", safe="=&%+,:;@/?"),
|
||||
"",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=512)
|
||||
def _resolves(host: str):
|
||||
try:
|
||||
return {ai[4][0] for ai in socket.getaddrinfo(host, None)}
|
||||
except socket.gaierror:
|
||||
return set()
|
||||
|
||||
|
||||
def _assert_public(u: str):
|
||||
try:
|
||||
p = urlparse(u)
|
||||
except Exception:
|
||||
abort(400, "Malformed URL")
|
||||
if p.scheme not in ("http", "https"):
|
||||
abort(400, "URL must start with http:// or https://")
|
||||
if not p.hostname:
|
||||
abort(400, "URL must include a hostname")
|
||||
if ALLOW_PRIVATE:
|
||||
return
|
||||
addrs = _resolves(p.hostname)
|
||||
if not addrs:
|
||||
abort(400, "Hostname cannot be resolved")
|
||||
for ip_str in addrs:
|
||||
ip = ipaddress.ip_address(ip_str)
|
||||
if (
|
||||
ip.is_private
|
||||
or ip.is_loopback
|
||||
or ip.is_reserved
|
||||
or ip.is_link_local
|
||||
or ip.is_multicast
|
||||
):
|
||||
abort(400, "Host resolves to a non-public address (blocked)")
|
||||
|
||||
|
||||
def _headers():
|
||||
return {
|
||||
"User-Agent": random.choice(UA_POOL),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "identity",
|
||||
"Connection": "close",
|
||||
"Pragma": "no-cache",
|
||||
"Cache-Control": "no-cache",
|
||||
}
|
||||
|
||||
|
||||
def _fetch_final(url: str):
|
||||
# Follow redirects manually; validate each hop before requesting it
|
||||
hops, cur = 0, url
|
||||
while True:
|
||||
_assert_public(cur)
|
||||
try:
|
||||
r = _session.get(
|
||||
cur,
|
||||
headers=_headers(),
|
||||
timeout=DEFAULT_TIMEOUT,
|
||||
allow_redirects=False,
|
||||
stream=True,
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
abort(502, f"Upstream connection error: {e}")
|
||||
if r.is_redirect or r.is_permanent_redirect:
|
||||
if hops >= MAX_REDIRECTS:
|
||||
r.close()
|
||||
abort(502, "Too many redirects")
|
||||
loc = r.headers.get("Location")
|
||||
r.close()
|
||||
if not loc:
|
||||
abort(502, "Redirect without Location")
|
||||
cur = urljoin(cur, loc)
|
||||
hops += 1
|
||||
time.sleep(0.05)
|
||||
continue
|
||||
return r
|
||||
|
||||
|
||||
# add this helper (compact, robust enough without full HTML parsing)
|
||||
def _inject_base(html_bytes: bytes, base_url: str) -> bytes:
|
||||
base_tag = b'<base href="' + base_url.encode("utf-8", "ignore") + b'">'
|
||||
# Find <head> ... </head>
|
||||
m_head_open = re.search(br"<head[^>]*>", html_bytes, flags=re.I)
|
||||
if m_head_open:
|
||||
head_end = re.search(br"</head\s*>", html_bytes, flags=re.I)
|
||||
end_idx = head_end.start() if head_end else m_head_open.end()
|
||||
# Check if <base> already inside <head>
|
||||
if re.search(br"<base\b", html_bytes[m_head_open.end():end_idx], flags=re.I):
|
||||
return html_bytes
|
||||
return html_bytes[:m_head_open.end()] + base_tag + html_bytes[m_head_open.end():]
|
||||
|
||||
# No <head> — try after <html>
|
||||
m_html_open = re.search(br"<html[^>]*>", html_bytes, flags=re.I)
|
||||
if m_html_open:
|
||||
return (html_bytes[:m_html_open.end()]
|
||||
+ b"<head>" + base_tag + b"</head>"
|
||||
+ html_bytes[m_html_open.end():])
|
||||
|
||||
# No <html> either — prepend (rare but safe)
|
||||
return b"<head>" + base_tag + b"</head>" + html_bytes
|
||||
|
||||
|
||||
# -------- Route --------
|
||||
@app.route("/", methods=["GET"])
|
||||
def root():
|
||||
site = _raw_site()
|
||||
if not site:
|
||||
abort(400, "Missing required query parameter: site")
|
||||
site = _normalize(site)
|
||||
_assert_public(site)
|
||||
|
||||
upstream = _fetch_final(site)
|
||||
|
||||
# Early size guard via Content-Length
|
||||
cl = upstream.headers.get("Content-Length")
|
||||
if cl and cl.isdigit() and int(cl) > MAX_BYTES:
|
||||
upstream.close()
|
||||
abort(502, "Upstream response too large")
|
||||
|
||||
# Stream & cap
|
||||
total, buf = 0, []
|
||||
try:
|
||||
for chunk in upstream.iter_content(64 * 1024):
|
||||
if not chunk:
|
||||
continue
|
||||
total += len(chunk)
|
||||
if total > MAX_BYTES:
|
||||
upstream.close()
|
||||
abort(502, "Upstream response too large")
|
||||
buf.append(chunk)
|
||||
finally:
|
||||
upstream.close()
|
||||
body = b"".join(buf)
|
||||
|
||||
ctype = upstream.headers.get("Content-Type", "")
|
||||
if "text/html" in ctype.lower():
|
||||
body = _inject_base(body, upstream.url)
|
||||
|
||||
out = make_response(body, upstream.status_code)
|
||||
out.headers["X-Proxied-From"] = site
|
||||
for h in SAFE_ECHO:
|
||||
if h in upstream.headers:
|
||||
out.headers[h] = upstream.headers[h]
|
||||
out.headers.setdefault("Content-Type", "application/octet-stream")
|
||||
out.headers["Content-Length"] = str(len(body))
|
||||
out.headers["X-Content-Type-Options"] = "nosniff"
|
||||
return out
|
||||
|
||||
|
||||
# Plain-text 4xx/5xx
|
||||
@app.errorhandler(400)
|
||||
@app.errorhandler(502)
|
||||
def _err(e):
|
||||
return Response(
|
||||
f"{e.code} {e.name}: {getattr(e, 'description', str(e))}\n",
|
||||
status=e.code,
|
||||
mimetype="text/plain; charset=utf-8",
|
||||
)
|
||||
|
||||
|
||||
# -------- CLI --------
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="Simple HTTPS proxy via Flask (compact)")
|
||||
ap.add_argument("--port", type=int, default=8888)
|
||||
ap.add_argument("--host", default="127.0.0.1")
|
||||
a = ap.parse_args()
|
||||
print(
|
||||
f"MiniProxy (Flask) http://{a.host}:{a.port} (ALLOW_PRIVATE={'1' if ALLOW_PRIVATE else '0'})"
|
||||
)
|
||||
app.run(host=a.host, port=a.port, threaded=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
33
readme.md
Normal file
33
readme.md
Normal file
@@ -0,0 +1,33 @@
|
||||
# KolibriOS Mini Proxy
|
||||
|
||||
## Features
|
||||
- Blocks localhost/private/reserved IPs unless ALLOW_PRIVATE=1
|
||||
- Manual redirects with per-hop SSRF checks (MAX_REDIRECTS)
|
||||
- Retries/backoff for 429/5xx & transient errors
|
||||
- Streams and caps body to MAX_BYTES
|
||||
- Safe header passthrough + nosniff
|
||||
|
||||
|
||||
## How to Run
|
||||
```shell
|
||||
pip install Flask requests
|
||||
python3 app.py --host 127.0.0.1 --port 8888
|
||||
```
|
||||
|
||||
## Env Vars
|
||||
* **ALLOW\_PRIVATE** - `0` blocks hosts that resolve to private/loopback/reserved/link-local/multicast IPs. Set to `1` to allow them (disables that SSRF protection).
|
||||
* Default: `0`.
|
||||
* **DEFAULT\_TIMEOUT** - Per-request timeout in seconds for the upstream fetch (connect + read).
|
||||
* Default: `15`.
|
||||
* **MAX\_BYTES** - Hard cap on how many bytes the proxy will download/return from the upstream response.
|
||||
* Default: `10 MiB` (`10 * 1024 * 1024`).
|
||||
* **MAX\_RETRIES** - Max number of **retries** on 429/5xx and transient network errors (backoff applied). Up to this many retries after the first attempt.
|
||||
* Default: `3`.
|
||||
* **MAX\_REDIRECTS** - Maximum redirect hops the proxy will follow, validating each hop before fetching. Exceeds → `502`.
|
||||
* Default: `5`.
|
||||
|
||||
## Example Usage
|
||||
```shell
|
||||
GET /?site=https://example.com -> returns the fetched response
|
||||
```
|
||||
|
13
requirements.txt
Normal file
13
requirements.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
blinker==1.9.0
|
||||
certifi==2025.8.3
|
||||
charset-normalizer==3.4.3
|
||||
click==8.2.1
|
||||
colorama==0.4.6
|
||||
Flask==3.1.2
|
||||
idna==3.10
|
||||
itsdangerous==2.2.0
|
||||
Jinja2==3.1.6
|
||||
MarkupSafe==3.0.2
|
||||
requests==2.32.5
|
||||
urllib3==2.5.0
|
||||
Werkzeug==3.1.3
|
23
shell.nix
Normal file
23
shell.nix
Normal file
@@ -0,0 +1,23 @@
|
||||
{ pkgs ? import <nixpkgs> {} }: let
|
||||
pypkgs = pkgs.python3Packages;
|
||||
in pkgs.mkShell {
|
||||
name = "proxy.kolibrios.org";
|
||||
|
||||
buildInputs = with pypkgs; [
|
||||
python
|
||||
virtualenv
|
||||
pkgs.nodePackages.sass
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
if [ ! -d "venv" ]; then
|
||||
python -m venv .venv
|
||||
fi
|
||||
|
||||
source .venv/bin/activate
|
||||
|
||||
if [ -f "requirements.txt" ]; then
|
||||
pip install -r requirements.txt
|
||||
fi
|
||||
'';
|
||||
}
|
Reference in New Issue
Block a user