Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 5 additions & 12 deletions .github/workflows/auto-copilot-functionality-docs-review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,12 @@ jobs:

- name: Run Basic Functionality Tests
run: |
# Try to run tests if they exist
if [ -f "package.json" ] && grep -q '"test"' package.json; then
npm test || echo "Tests failed or not configured"
if [ -f "Makefile" ]; then
make pycheck
make unittest
elif [ -d "tests" ]; then
python3 -m unittest discover -s tests
fi

if [ -f "pytest.ini" ] || [ -d "tests" ]; then
pytest || echo "Pytest tests failed or not configured"
fi

if [ -f "go.mod" ]; then
go test ./... || echo "Go tests failed or not configured"
fi
continue-on-error: true

documentation-review:
runs-on: [self-hosted, linux, x64, big]
Expand Down
56 changes: 11 additions & 45 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ PODMAN ?= podman
PYTHON ?= python3
EGRESSD_IMAGE ?= localhost/hg-proxychains-egressd-validate:latest

.PHONY: deps smoke down logs health ready pycheck unittest test check preflight validate-config validate-image repo-scan repo-clean maintenance maintenance-fix bundle clean
.PHONY: deps smoke down logs health ready pycheck unittest test check preflight validate-config validate-image repo-scan repo-scan-json repo-clean maintenance maintenance-json maintenance-fix maintenance-all maintenance-all-json maintenance-baseline bundle clean

deps:
scripts/bootstrap-third-party.sh
Expand All @@ -24,7 +24,7 @@ ready:
curl -i http://localhost:9191/ready

pycheck:
$(PYTHON) -m py_compile egressd/supervisor.py egressd/chain.py egressd/readiness.py egressd/preflight.py egressd/test_supervisor.py egressd/test_supervisor_readiness.py client/test_client.py exitserver/echo_server.py funkydns-smoke/check_resolution.py funkydns-smoke/generate_cert.py funkydns-smoke/run_funkydns.py tests/test_chain.py tests/test_preflight.py tests/test_hop_connectivity.py
$(PYTHON) -m py_compile egressd/supervisor.py egressd/chain.py egressd/readiness.py egressd/preflight.py egressd/test_supervisor.py egressd/test_supervisor_readiness.py client/test_client.py exitserver/echo_server.py funkydns-smoke/check_resolution.py funkydns-smoke/generate_cert.py funkydns-smoke/run_funkydns.py tests/test_chain.py tests/test_preflight.py tests/test_hop_connectivity.py scripts/repo_hygiene.py scripts/repo_maintenance.py scripts/test_repo_hygiene.py

unittest:
$(PYTHON) -m unittest egressd/test_supervisor_readiness.py egressd/test_supervisor.py tests/test_readiness.py tests/test_supervisor.py tests/test_chain.py tests/test_preflight.py tests/test_hop_connectivity.py scripts/test_repo_hygiene.py
Expand All @@ -43,65 +43,31 @@ validate-config: validate-image
$(PODMAN) run --rm -e EGRESSD_VALIDATE_ONLY=1 $(EGRESSD_IMAGE) $(PYTHON) /opt/egressd/supervisor.py

repo-scan:
$(PYTHON) scripts/repo_hygiene.py scan --repo-root .
$(PYTHON) scripts/repo_hygiene.py scan --repo-root . --no-include-third-party
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The --no-include-third-party flag is redundant. scripts/repo_hygiene.py defaults to excluding third-party directories, so this flag can be removed to simplify the command.

	$(PYTHON) scripts/repo_hygiene.py scan --repo-root .


repo-clean:
$(PYTHON) scripts/repo_hygiene.py clean --repo-root .
$(PYTHON) scripts/repo_hygiene.py clean --repo-root . --no-include-third-party
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

As with repo-scan, the --no-include-third-party flag is redundant here because it reflects the default behavior of the repo_hygiene.py script.

	$(PYTHON) scripts/repo_hygiene.py clean --repo-root .


maintenance:
$(PYTHON) scripts/repo_maintenance.py
$(PYTHON) scripts/repo_maintenance.py --no-include-third-party

maintenance-fix:
$(PYTHON) scripts/repo_maintenance.py --fix
$(PYTHON) scripts/repo_maintenance.py --no-include-third-party --fix

repo-scan-json:
python3 scripts/repo_hygiene.py scan --repo-root . --json

maintenance:
python3 scripts/repo_maintenance.py --no-include-third-party

maintenance-fix:
python3 scripts/repo_maintenance.py --no-include-third-party --fix
$(PYTHON) scripts/repo_hygiene.py scan --repo-root . --no-include-third-party --json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The --no-include-third-party flag is redundant here as it's the default for scripts/repo_hygiene.py. The command can be simplified by removing it.

	$(PYTHON) scripts/repo_hygiene.py scan --repo-root . --json


maintenance-json:
python3 scripts/repo_maintenance.py --no-include-third-party --json
$(PYTHON) scripts/repo_maintenance.py --no-include-third-party --json

maintenance-all:
python3 scripts/repo_maintenance.py
$(PYTHON) scripts/repo_maintenance.py --include-third-party

maintenance-all-json:
python3 scripts/repo_maintenance.py --json

maintenance:
python3 scripts/repo_hygiene.py scan --repo-root . --include-third-party

maintenance-fix:
python3 scripts/repo_hygiene.py clean --repo-root . --include-third-party
$(PYTHON) scripts/repo_maintenance.py --include-third-party --json

maintenance-baseline:
python3 scripts/repo_hygiene.py baseline --repo-root . --include-third-party

maintenance:
python3 scripts/repo_maintenance.py --no-include-third-party

maintenance-fix:
python3 scripts/repo_maintenance.py --no-include-third-party --fix

maintenance:
python3 scripts/repo_hygiene.py scan --repo-root . --json

maintenance-fix:
python3 scripts/repo_hygiene.py clean --repo-root . --json

maintenance: repo-scan

maintenance-fix: repo-clean

repo-scan:
$(MAKE) maintenance

repo-clean:
$(MAKE) maintenance-fix
$(PYTHON) scripts/repo_hygiene.py baseline --repo-root . --include-third-party

bundle:
tar -czf egressd-starter.tar.gz .
Expand Down
36 changes: 36 additions & 0 deletions QUICKSTART.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# QUICKSTART

Fastest path to verify the smoke harness end to end:

1. Initialize the private FunkyDNS submodule:
```bash
git submodule update --init --recursive third_party/FunkyDNS
```
If you prefer the helper script, run `make deps` instead.

2. Start the stack:
```bash
podman-compose up --build
```
Or run `make smoke`.

3. Wait for the one-shot `client` container to finish. A good run prints:
- `DNS OK` / `DoH OK` for `smoke.test`
- `DNS OK` / `DoH OK` for `hosts.smoke.internal`
- `DNS OK` / `DoH OK` for `printer`
- `CONNECT` followed by `OK from exit-server`

4. Spot-check health endpoints:
```bash
curl -sk https://localhost:18443/healthz
curl http://localhost:9191/health
curl -f http://localhost:9191/ready
```

5. Tear it down when finished:
```bash
podman-compose down -v
```
Or run `make down`.

If anything looks off, use `make logs` and then read `README.md` or `docs/USER-FLOW-REVIEW.md` for the deeper walkthrough.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ The design goal is intentionally boring:

## Quick start

Start with `QUICKSTART.md` for the shortest smoke-harness path.
For a reviewed walkthrough of the smoke-harness flow, host flow, and current
known breakpoints, see `docs/USER-FLOW-REVIEW.md`.

Expand Down Expand Up @@ -137,6 +138,7 @@ It does **not** prove host enforcement. For that, use the scripts in `scripts/`
- `pproxy` must be running
- if `dns.launch_funkydns=true`, FunkyDNS must also be running
- hop checks must be complete and successful by default
- auth-required or policy-denied CONNECT responses stay visible in `/health`, but keep `/ready` red

Readiness behavior can be tuned via:

Expand Down
2 changes: 1 addition & 1 deletion docs/BRINGUP-CHECKLIST.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
- [ ] `curl http://localhost:9191/health`
- [ ] `curl -i http://localhost:9191/ready` returns HTTP 200
- [ ] Confirm `searchdns` becomes healthy before `funky`
- [ ] Confirm hop probes are green or at least responding with expected policy/auth status
- [ ] Confirm hop probes show successful CONNECT responses for ready traffic; `403`/`407` should remain diagnostic-only failures in `/health`
- [ ] Confirm `client` starts only after `egressd` healthcheck is healthy

## Host deployment
Expand Down
2 changes: 1 addition & 1 deletion docs/HOST-DEPLOYMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ This repo's compose stack is only a smoke harness. Real enforcement happens on a

- `GET /live` on the configured health bind/port for liveness.
- `GET /health` for full state (process state + hop status details).
- `GET /ready` for gating dependent services and automation. This returns non-200 when `pproxy` is down, FunkyDNS is required but not running, or hop checks fail (by default).
- `GET /ready` for gating dependent services and automation. This returns non-200 when `pproxy` is down, FunkyDNS is required but not running, or hop checks fail (by default). CONNECT denials such as `403` and `407` stay visible in `/health`, but do not count as ready.

## Expected traffic model

Expand Down
16 changes: 15 additions & 1 deletion egressd/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,16 @@ def parse_proxy_url(url: str) -> Tuple[str, int, Optional[str]]:
return host, port, auth_header


def _parse_http_status_code(status_line: str) -> Optional[int]:
parts = status_line.split()
if len(parts) < 2:
return None
try:
return int(parts[1])
except ValueError:
return None


def check_hop_connectivity(hop_url: str, target: str, timeout: float = 3.0) -> Dict[str, Any]:
start = time.time()
checked_at = int(start)
Expand All @@ -254,11 +264,15 @@ def check_hop_connectivity(hop_url: str, target: str, timeout: float = 3.0) -> D
sock.sendall(request.encode("utf-8"))
response = sock.recv(4096).decode("utf-8", errors="ignore")
status_line = response.splitlines()[0] if response else "<no-response>"
ok = any(code in status_line for code in (" 200 ", " 403 ", " 407 "))
status_code = _parse_http_status_code(status_line)
reachable = status_code is not None
ok = status_code is not None and 200 <= status_code < 300
result = {
"ok": ok,
"reachable": reachable,
"proxy": proxy_label,
"status_line": status_line,
"status_code": status_code,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error path missing new reachable and status_code fields

Medium Severity

check_hop_connectivity returns reachable and status_code in the success path (when a response is received) but omits them from the except branch (connection failure, timeout, etc.). Since the hop statuses are exposed verbatim through the /health endpoint, API consumers and any future code that accesses result["reachable"] on an error result will get a KeyError. A connection-refused hop is definitively not reachable, so "reachable": False and "status_code": None belong in the error dict too.

Additional Locations (1)
Fix in Cursor Fix in Web

"elapsed_ms": int((time.time() - start) * 1000),
"checked_at": checked_at,
}
Expand Down
18 changes: 17 additions & 1 deletion egressd/test_supervisor_readiness.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,30 @@ def test_ready_when_pproxy_running_and_hops_are_healthy(self) -> None:
"funkydns": "disabled",
"hops": {
"hop_0": {"ok": True, "status_line": "HTTP/1.1 200 Connection Established"},
"hop_1": {"ok": True, "status_line": "HTTP/1.1 407 Proxy Authentication Required"},
"hop_1": {"ok": True, "status_line": "HTTP/1.1 200 Connection Established"},
},
"hops_last_update": now - 3,
}
readiness = supervisor.compute_readiness(state, cfg, now=now)
self.assertTrue(readiness["ready"])
self.assertEqual([], readiness["reasons"])

def test_not_ready_when_proxy_demands_auth(self) -> None:
cfg = sample_cfg()
now = 1_500
state = {
"pproxy": "running",
"funkydns": "disabled",
"hops": {
"hop_0": {"ok": True, "status_line": "HTTP/1.1 200 Connection Established"},
"hop_1": {"ok": False, "status_line": "HTTP/1.1 407 Proxy Authentication Required"},
},
"hops_last_update": now - 1,
}
readiness = supervisor.compute_readiness(state, cfg, now=now)
self.assertFalse(readiness["ready"])
self.assertIn("hop_1_down", readiness["reasons"])

def test_not_ready_when_hop_checks_are_stale(self) -> None:
cfg = sample_cfg()
now = 2_000
Expand Down
Loading
Loading