From 8fb33f6b2cc79c4c6b1004f608647a3ed1c38ec8 Mon Sep 17 00:00:00 2001 From: dp-web4 Date: Thu, 26 Mar 2026 11:27:19 -0700 Subject: [PATCH] feat(web4-governance): add governance plugin with R6 workflow (Tiers 1-4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Web4 governance plugin for Claude Code hooks — structured audit trails, trust tensors, entity witnessing, policy gating, and event streaming. Tiers: observational audit (T1), policy presets and rate limiting (T1.5), signing and persistent witnesses (T2), multi-target extraction (T3), event stream monitoring (T4). See plugins/web4-governance/README.md for full documentation. PR: anthropics/claude-code#20448 Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 6 + .../.claude-plugin/plugin.json | 12 + plugins/web4-governance/.gitattributes | 8 + plugins/web4-governance/EVENT_STREAM_API.md | 325 ++++++++ plugins/web4-governance/FEATURE_REQUEST.md | 43 + plugins/web4-governance/HOOK_STDERR_NOTE.md | 117 +++ plugins/web4-governance/LICENSE | 21 + plugins/web4-governance/PRESETS.md | 201 +++++ plugins/web4-governance/PR_DESCRIPTION.md | 42 + plugins/web4-governance/README.md | 659 ++++++++++++++++ plugins/web4-governance/commands/audit.md | 98 +++ plugins/web4-governance/deploy.sh | 92 +++ .../docs/RUST_CORE_PROPOSAL.md | 324 ++++++++ .../web4-governance/governance/__init__.py | 203 +++++ .../governance/agent_governance.py | 710 +++++++++++++++++ .../governance/entity_trust.py | 723 +++++++++++++++++ .../governance/event_stream.py | 510 ++++++++++++ plugins/web4-governance/governance/ledger.py | 737 ++++++++++++++++++ .../web4-governance/governance/matchers.py | 193 +++++ .../governance/persistent_rate_limiter.py | 270 +++++++ .../governance/policy_entity.py | 568 ++++++++++++++ .../web4-governance/governance/presence.py | 278 +++++++ plugins/web4-governance/governance/presets.py | 367 +++++++++ .../governance/rate_limiter.py | 150 ++++ .../web4-governance/governance/references.py | 572 ++++++++++++++ .../web4-governance/governance/reporter.py | 378 +++++++++ .../web4-governance/governance/role_trust.py | 590 ++++++++++++++ .../governance/session_manager.py | 303 +++++++ plugins/web4-governance/governance/signing.py | 123 +++ .../web4-governance/governance/soft_lct.py | 209 +++++ .../governance/target_extraction.py | 256 ++++++ plugins/web4-governance/governance/tensors.py | 486 ++++++++++++ .../governance/trust_backend.py | 231 ++++++ plugins/web4-governance/hooks/heartbeat.py | 236 ++++++ plugins/web4-governance/hooks/hooks.json | 40 + .../web4-governance/hooks/post_tool_use.py | 265 +++++++ plugins/web4-governance/hooks/pre_tool_use.py | 697 +++++++++++++++++ .../web4-governance/hooks/session_start.py | 183 +++++ .../web4-governance/settings.template.json | 36 + plugins/web4-governance/test_agent_flow.py | 99 +++ plugins/web4-governance/test_entity_trust.py | 191 +++++ .../test_governance_session.py | 392 ++++++++++ plugins/web4-governance/test_heartbeat.py | 57 ++ plugins/web4-governance/test_policy_entity.py | 406 ++++++++++ plugins/web4-governance/test_presence.py | 188 +++++ plugins/web4-governance/test_tier1_5.py | 425 ++++++++++ web4-governance-issue.md | 217 ++++++ 47 files changed, 13237 insertions(+) create mode 100644 plugins/web4-governance/.claude-plugin/plugin.json create mode 100644 plugins/web4-governance/.gitattributes create mode 100644 plugins/web4-governance/EVENT_STREAM_API.md create mode 100644 plugins/web4-governance/FEATURE_REQUEST.md create mode 100644 plugins/web4-governance/HOOK_STDERR_NOTE.md create mode 100644 plugins/web4-governance/LICENSE create mode 100644 plugins/web4-governance/PRESETS.md create mode 100644 plugins/web4-governance/PR_DESCRIPTION.md create mode 100644 plugins/web4-governance/README.md create mode 100644 plugins/web4-governance/commands/audit.md create mode 100644 plugins/web4-governance/deploy.sh create mode 100644 plugins/web4-governance/docs/RUST_CORE_PROPOSAL.md create mode 100644 plugins/web4-governance/governance/__init__.py create mode 100644 plugins/web4-governance/governance/agent_governance.py create mode 100644 plugins/web4-governance/governance/entity_trust.py create mode 100644 plugins/web4-governance/governance/event_stream.py create mode 100644 plugins/web4-governance/governance/ledger.py create mode 100644 plugins/web4-governance/governance/matchers.py create mode 100644 plugins/web4-governance/governance/persistent_rate_limiter.py create mode 100644 plugins/web4-governance/governance/policy_entity.py create mode 100644 plugins/web4-governance/governance/presence.py create mode 100644 plugins/web4-governance/governance/presets.py create mode 100644 plugins/web4-governance/governance/rate_limiter.py create mode 100644 plugins/web4-governance/governance/references.py create mode 100644 plugins/web4-governance/governance/reporter.py create mode 100644 plugins/web4-governance/governance/role_trust.py create mode 100644 plugins/web4-governance/governance/session_manager.py create mode 100644 plugins/web4-governance/governance/signing.py create mode 100644 plugins/web4-governance/governance/soft_lct.py create mode 100644 plugins/web4-governance/governance/target_extraction.py create mode 100644 plugins/web4-governance/governance/tensors.py create mode 100644 plugins/web4-governance/governance/trust_backend.py create mode 100644 plugins/web4-governance/hooks/heartbeat.py create mode 100644 plugins/web4-governance/hooks/hooks.json create mode 100755 plugins/web4-governance/hooks/post_tool_use.py create mode 100755 plugins/web4-governance/hooks/pre_tool_use.py create mode 100755 plugins/web4-governance/hooks/session_start.py create mode 100644 plugins/web4-governance/settings.template.json create mode 100644 plugins/web4-governance/test_agent_flow.py create mode 100644 plugins/web4-governance/test_entity_trust.py create mode 100644 plugins/web4-governance/test_governance_session.py create mode 100644 plugins/web4-governance/test_heartbeat.py create mode 100644 plugins/web4-governance/test_policy_entity.py create mode 100644 plugins/web4-governance/test_presence.py create mode 100644 plugins/web4-governance/test_tier1_5.py create mode 100644 web4-governance-issue.md diff --git a/.gitignore b/.gitignore index 5ca0973f8f..71d865688d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,8 @@ .DS_Store +*.ffs_db +.gitnexus +.claude/skills/gitnexus/ +CLAUDE.md +AGENTS.md +__pycache__/ diff --git a/plugins/web4-governance/.claude-plugin/plugin.json b/plugins/web4-governance/.claude-plugin/plugin.json new file mode 100644 index 0000000000..5a93454cf9 --- /dev/null +++ b/plugins/web4-governance/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "web4-governance", + "version": "1.0.0", + "description": "Lightweight AI governance with R6 workflow formalism and audit trails. Every tool call becomes a structured request with verifiable provenance.", + "author": { + "name": "Web4 Contributors", + "email": "web4@metalinxx.io" + }, + "repository": "https://github.com/dp-web4/web4", + "keywords": ["governance", "audit", "trust", "r6", "provenance", "web4"], + "license": "MIT" +} diff --git a/plugins/web4-governance/.gitattributes b/plugins/web4-governance/.gitattributes new file mode 100644 index 0000000000..0c0e74e40b --- /dev/null +++ b/plugins/web4-governance/.gitattributes @@ -0,0 +1,8 @@ +# Force LF line endings for scripts (critical for shebang on Unix/WSL) +*.py text eol=lf +*.sh text eol=lf + +# Other text files can use native line endings +*.md text +*.json text +*.txt text diff --git a/plugins/web4-governance/EVENT_STREAM_API.md b/plugins/web4-governance/EVENT_STREAM_API.md new file mode 100644 index 0000000000..276eedc2b0 --- /dev/null +++ b/plugins/web4-governance/EVENT_STREAM_API.md @@ -0,0 +1,325 @@ +# Event Stream API + +Real-time monitoring endpoint for the Web4 Governance plugin. + +## Overview + +The event stream provides a JSONL (JSON Lines) file that external clients can tail for real-time monitoring, alerting, and analytics. + +**Stream Location**: `~/.web4/events.jsonl` + +## Quick Start + +### Tail the stream (real-time) +```bash +tail -f ~/.web4/events.jsonl | jq . +``` + +### Filter by severity +```bash +tail -f ~/.web4/events.jsonl | jq -c 'select(.severity == "alert")' +``` + +### Filter by event type +```bash +grep '"type":"policy_decision"' ~/.web4/events.jsonl | jq . +``` + +### Python consumer +```python +import json + +with open("~/.web4/events.jsonl", "r") as f: + for line in f: + event = json.loads(line) + if event.get("severity") == "alert": + print(f"ALERT: {event.get('reason')}") +``` + +--- + +## Event Schema + +Each line in the stream is a self-contained JSON object: + +```json +{ + "type": "policy_decision", + "timestamp": "2026-02-05T10:30:00.123456+00:00", + "severity": "alert", + "session_id": "sess-abc123", + "tool": "Bash", + "target": "rm -rf /tmp/test", + "category": "command", + "decision": "deny", + "reason": "Destructive command blocked by safety preset", + "rule_id": "deny-destructive-commands" +} +``` + +### Required Fields + +| Field | Type | Description | +|-------|------|-------------| +| `type` | string | Event type (see Event Types below) | +| `timestamp` | string | ISO 8601 UTC timestamp | +| `severity` | string | Severity level: `debug`, `info`, `warn`, `alert`, `error` | + +### Optional Context Fields + +| Field | Type | Description | +|-------|------|-------------| +| `session_id` | string | Session identifier | +| `agent_id` | string | Agent/role identifier | + +### Event-Specific Fields + +| Field | Type | Description | +|-------|------|-------------| +| `tool` | string | Tool name (Bash, Read, Edit, etc.) | +| `target` | string | Target path, URL, or command | +| `category` | string | Tool category (file_read, file_write, command, network, etc.) | +| `decision` | string | Policy decision: `allow`, `deny`, `warn` | +| `reason` | string | Human-readable explanation | +| `rule_id` | string | ID of matched policy rule | + +### Metrics Fields + +| Field | Type | Description | +|-------|------|-------------| +| `duration_ms` | integer | Operation duration in milliseconds | +| `count` | integer | Generic count (rate limits, etc.) | + +### Trust Fields + +| Field | Type | Description | +|-------|------|-------------| +| `trust_before` | float | Trust value before update (0.0-1.0) | +| `trust_after` | float | Trust value after update (0.0-1.0) | +| `trust_delta` | float | Change in trust value | + +### Error Fields + +| Field | Type | Description | +|-------|------|-------------| +| `error` | string | Error message | +| `error_type` | string | Error class/type | + +### Extensible Metadata + +| Field | Type | Description | +|-------|------|-------------| +| `metadata` | object | Additional key-value data | + +--- + +## Event Types + +### Session Lifecycle + +| Type | Severity | Description | +|------|----------|-------------| +| `session_start` | info | New session started | +| `session_end` | info | Session ended | + +**Example:** +```json +{"type":"session_start","timestamp":"2026-02-05T10:00:00Z","severity":"info","session_id":"sess-abc123","metadata":{"project":"my-app","atp_budget":100}} +``` + +### Tool Execution + +| Type | Severity | Description | +|------|----------|-------------| +| `tool_call` | info | Tool invocation started | +| `tool_result` | info | Tool completed | + +**Example:** +```json +{"type":"tool_call","timestamp":"2026-02-05T10:01:00Z","severity":"info","session_id":"sess-abc123","tool":"Read","target":"/app/src/main.py","category":"file_read"} +``` + +### Policy Decisions + +| Type | Severity | Description | +|------|----------|-------------| +| `policy_decision` | varies | Policy evaluated (info=allow, warn=warn, alert=deny) | +| `policy_violation` | alert | Policy rule violated | + +**Example (deny):** +```json +{"type":"policy_decision","timestamp":"2026-02-05T10:02:00Z","severity":"alert","session_id":"sess-abc123","tool":"Bash","target":"rm -rf /","decision":"deny","reason":"Destructive command blocked by safety preset","rule_id":"deny-destructive-commands"} +``` + +**Example (warn):** +```json +{"type":"policy_decision","timestamp":"2026-02-05T10:03:00Z","severity":"warn","session_id":"sess-abc123","tool":"Bash","target":"rm temp.txt","decision":"warn","reason":"File deletion flagged - use with caution","rule_id":"warn-file-delete"} +``` + +### Rate Limiting + +| Type | Severity | Description | +|------|----------|-------------| +| `rate_limit_check` | debug | Rate limit checked | +| `rate_limit_exceeded` | alert | Rate limit exceeded | + +**Example:** +```json +{"type":"rate_limit_exceeded","timestamp":"2026-02-05T10:04:00Z","severity":"alert","session_id":"sess-abc123","target":"ratelimit:bash:tool","count":6,"metadata":{"max_count":5}} +``` + +### Trust Updates + +| Type | Severity | Description | +|------|----------|-------------| +| `trust_update` | info | Agent trust level changed | + +**Example:** +```json +{"type":"trust_update","timestamp":"2026-02-05T10:05:00Z","severity":"info","session_id":"sess-abc123","agent_id":"code-reviewer","trust_before":0.5,"trust_after":0.55,"trust_delta":0.05,"reason":"Successful code review"} +``` + +### Agent Lifecycle + +| Type | Severity | Description | +|------|----------|-------------| +| `agent_spawn` | info | Agent spawned | +| `agent_complete` | info | Agent completed | + +**Example:** +```json +{"type":"agent_spawn","timestamp":"2026-02-05T10:06:00Z","severity":"info","session_id":"sess-abc123","agent_id":"test-runner","metadata":{"capabilities":{"can_write":true,"can_execute":true}}} +``` + +### Audit Events + +| Type | Severity | Description | +|------|----------|-------------| +| `audit_record` | info | Standard audit record | +| `audit_alert` | alert | High-priority audit event (credential access, etc.) | + +**Example (credential access alert):** +```json +{"type":"audit_alert","timestamp":"2026-02-05T10:07:00Z","severity":"alert","session_id":"sess-abc123","tool":"Read","target":"/home/user/.aws/credentials","category":"credential_access","reason":"Credential file access detected"} +``` + +### System Events + +| Type | Severity | Description | +|------|----------|-------------| +| `system_info` | info | System information | +| `system_error` | error | System error | + +**Example:** +```json +{"type":"system_error","timestamp":"2026-02-05T10:08:00Z","severity":"error","error":"Database connection failed","error_type":"sqlite3.OperationalError"} +``` + +--- + +## Severity Levels + +| Level | When Used | Action | +|-------|-----------|--------| +| `debug` | Verbose debugging | Usually filtered | +| `info` | Normal operations | Log/monitor | +| `warn` | Potential issues | Review | +| `alert` | Security events, policy violations | Immediate attention | +| `error` | System errors | Investigate | + +--- + +## File Rotation + +The stream file automatically rotates at 100MB: +- Current: `~/.web4/events.jsonl` +- Rotated: `~/.web4/events.jsonl.1` + +Only one backup is kept. For long-term retention, configure an external log collector. + +--- + +## Integration Examples + +### Forward to syslog +```bash +tail -f ~/.web4/events.jsonl | while read line; do + logger -t web4-governance "$line" +done +``` + +### Send alerts to Slack +```python +import json +import requests + +WEBHOOK_URL = "https://hooks.slack.com/services/..." + +with open("~/.web4/events.jsonl", "r") as f: + f.seek(0, 2) # Seek to end + while True: + line = f.readline() + if line: + event = json.loads(line) + if event.get("severity") == "alert": + requests.post(WEBHOOK_URL, json={ + "text": f":warning: {event.get('type')}: {event.get('reason')}" + }) +``` + +### Prometheus metrics (conceptual) +```python +from prometheus_client import Counter + +policy_decisions = Counter('web4_policy_decisions', 'Policy decisions', ['decision']) + +# In your event consumer: +if event["type"] == "policy_decision": + policy_decisions.labels(decision=event["decision"]).inc() +``` + +### Structured logging (JSON to stdout) +```bash +tail -f ~/.web4/events.jsonl | jq -c '{ + time: .timestamp, + level: .severity, + msg: .reason // .type, + tool: .tool, + session: .session_id +}' +``` + +--- + +## Configuration + +The event stream can be configured when initializing: + +```python +from governance.event_stream import EventStream, Severity + +# Custom location and minimum severity +stream = EventStream( + storage_path="~/.my-app/governance", + filename="audit-events.jsonl", + min_severity=Severity.WARN # Only emit WARN and above +) +``` + +--- + +## Best Practices + +1. **Use `tail -f`** for real-time monitoring rather than polling +2. **Filter by severity** to reduce noise (`alert` for critical events) +3. **Use `jq`** for ad-hoc queries and formatting +4. **Configure external log rotation** for long-term retention +5. **Register callbacks** for in-process alerting (low latency) + +--- + +## Version History + +| Version | Changes | +|---------|---------| +| 0.4.0 | Initial event stream API | diff --git a/plugins/web4-governance/FEATURE_REQUEST.md b/plugins/web4-governance/FEATURE_REQUEST.md new file mode 100644 index 0000000000..369c7a4a86 --- /dev/null +++ b/plugins/web4-governance/FEATURE_REQUEST.md @@ -0,0 +1,43 @@ +# Feature Request: Agent Governance via Hooks + +## The Problem + +Agents execute tools, modify files, run commands — no standardized way to: +1. Know what happened +2. Prove the log wasn't tampered with +3. Block actions before they execute + +Current options: platform-locked (AWS), theory-heavy (academic), or lack agent semantics (enterprise audit). + +## Proposed Solution + +Governance plugin using existing `pre_tool_use` / `post_tool_use` hooks. + +**Capabilities:** +- Audit trail with structured records +- Hash-linked chain (tamper-evident) +- Pre-action gating: allow/deny/warn +- Policy presets: permissive, safety, strict, audit-only + +## Implementation + +Working code: [PR #20448](https://github.com/anthropics/claude-code/pull/20448) + +Already running in [Moltbot](https://github.com/dp-web4/moltbot/tree/main/extensions/web4-governance) — same framework, different runtime. Proves portability. + +## Scope Boundaries + +- **IS**: Inspectable, accountable, governable +- **IS NOT**: "Safe" or "correct" +- **IS**: Bounded by what hooks expose +- **IS NOT**: Complete coverage of all behaviors + +Trust metrics are operational heuristics for permissioning — not alignment signals. + +## Why Now + +Agents are acting autonomously at scale. The governance gap is no longer theoretical. + +--- + +**Whitepaper**: https://dp-web4.github.io/web4/ diff --git a/plugins/web4-governance/HOOK_STDERR_NOTE.md b/plugins/web4-governance/HOOK_STDERR_NOTE.md new file mode 100644 index 0000000000..5f092180e2 --- /dev/null +++ b/plugins/web4-governance/HOOK_STDERR_NOTE.md @@ -0,0 +1,117 @@ +# Hook Errors in Claude Code - Diagnosis and Resolution + +**Date**: 2026-01-30 +**Affected Version**: Claude Code 2.1.25 +**Status**: RESOLVED + +## Issue + +Claude Code displayed "hook error" messages for every tool use: +``` +● Read 2 files (ctrl+o to expand) + ⎿ PreToolUse:Read hook error + ⎿ PostToolUse:Read hook error +``` + +The hooks were exiting with code 0 and appeared to function correctly when tested manually. + +## Root Causes (Multiple) + +### 1. CRLF Line Endings (Primary Cause) + +Hook files on Windows-mounted drives (`/mnt/c/...`) had Windows-style CRLF line endings. This caused the shebang to be parsed incorrectly: + +``` +/usr/bin/env: 'python3\r': No such file or directory +``` + +The `\r` (carriage return) was included in the interpreter name, making `python3\r` instead of `python3`. + +**Fix**: Convert all hook files to Unix LF line endings: +```bash +cd hooks/ +for f in *.py; do sed -i 's/\r$//' "$f"; done +``` + +### 2. Unresolved Environment Variable in Local Settings + +A local settings file (`ai-agents/.claude/settings.local.json`) used `$CLAUDE_PROJECT_DIR` in hook paths: +```json +"command": "$CLAUDE_PROJECT_DIR/web4/claude-code-plugin/hooks/pre_tool_use.py" +``` + +This variable wasn't set, causing path resolution to fail. + +**Fix**: Removed hooks section from local settings, letting global settings (`~/.claude/settings.json`) with absolute paths take effect. + +### 3. Stderr Output (Initial Suspicion - Partial) + +The hooks originally output informational messages to stderr: +```python +print(f"[Web4] Session recovered: {token_short}", file=sys.stderr) +print(f"[R6] {category}:{target} {coherence}", file=sys.stderr) +``` + +Claude Code displays any stderr output as "hook error" regardless of exit code. + +**Fix**: Removed informational stderr prints. Only actual errors/blocks now write to stderr. + +## Resolution Summary + +| Issue | Fix | File(s) | +|-------|-----|---------| +| CRLF line endings | `sed -i 's/\r$//'` | All `hooks/*.py` | +| Unresolved `$CLAUDE_PROJECT_DIR` | Removed hooks from local settings | `ai-agents/.claude/settings.local.json` | +| Informational stderr | Removed non-error prints | `hooks/pre_tool_use.py`, `hooks/session_start.py` | + +## Verification + +After fixes, hooks run silently and log correctly: + +```bash +# Test hook directly - should produce no output +echo '{"session_id":"test","tool_name":"Bash","tool_input":{}}' | \ + ./pre_tool_use.py 2>&1; echo "Exit: $?" +# Expected: Exit: 0 + +# Verify R6 logging +tail -1 ~/.web4/r6/$(date +%Y-%m-%d).jsonl | python3 -m json.tool + +# Verify audit logging +tail -1 ~/.web4/audit/.jsonl | python3 -m json.tool +``` + +## Recommendations for WSL/Windows Development + +1. **Configure Git for LF line endings**: + ```bash + git config --global core.autocrlf input + ``` + +2. **Use `.gitattributes`** in the repo: + ``` + *.py text eol=lf + *.sh text eol=lf + ``` + +3. **Use absolute paths** in global hook settings, avoid environment variables + +4. **Test hooks from different directories** - working directory affects import resolution + +5. **Only use stderr for actual errors** that should alert the user + +## Affected Files + +- `hooks/pre_tool_use.py` - CRLF fix, removed informational prints +- `hooks/post_tool_use.py` - CRLF fix +- `hooks/session_start.py` - CRLF fix, removed informational prints +- `hooks/heartbeat.py` - CRLF fix +- `ai-agents/.claude/settings.local.json` - Removed duplicate hooks section + +## Notes + +The hooks continue to: +- Log R6 audit data to `~/.web4/r6/` +- Track audit trails in `~/.web4/audit/` +- Maintain session state in `~/.web4/sessions/` +- Enforce policy decisions (actual blocks still output to stderr as intended) diff --git a/plugins/web4-governance/LICENSE b/plugins/web4-governance/LICENSE new file mode 100644 index 0000000000..72201028a4 --- /dev/null +++ b/plugins/web4-governance/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Web4 Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/plugins/web4-governance/PRESETS.md b/plugins/web4-governance/PRESETS.md new file mode 100644 index 0000000000..ef839685e2 --- /dev/null +++ b/plugins/web4-governance/PRESETS.md @@ -0,0 +1,201 @@ +# Policy Presets Reference + +This document details exactly what each policy preset blocks, warns on, and allows. + +## Overview + +| Preset | Default | Enforce | Use Case | +| ------------ | ------- | ------- | ----------------------------------------- | +| `permissive` | allow | false | Pure observation, no blocking | +| `safety` | allow | true | Recommended for most users | +| `strict` | deny | true | High-security environments | +| `audit-only` | allow | false | Testing rules before enabling enforcement | + +--- + +## `permissive` + +**Purpose**: Pure observation mode. Logs everything, blocks nothing. + +| Action | What | +| --------- | ---------- | +| **Deny** | Nothing | +| **Warn** | Nothing | +| **Allow** | Everything | + +**Rules**: None + +**When to use**: Development, debugging, or when you want audit logs without any interference. + +--- + +## `safety` (Recommended) + +**Purpose**: Block dangerous operations while allowing normal development work. + +### What Gets Denied + +| Rule ID | Trigger | Example Blocked | +| --------------------------- | ------------------------------------------ | ---------------------------------------- | +| `deny-destructive-commands` | `rm` with ANY flags, or `mkfs.*` | `rm -rf ./build`, `rm -f file`, `mkfs.*` | +| `deny-secret-files` | Reading files matching credential patterns | `.env`, `.aws/credentials`, `*.apikey` | + +**Why all `rm -` flags are blocked:** + +- `rm -f` - bypasses confirmation prompts +- `rm -r` - recursive deletion +- `rm -i` - interactive mode (agent can't respond to prompts) +- Any flag combination is risky for autonomous agents + +**Full list of blocked credential patterns:** + +Environment & secrets: +- `**/.env`, `**/.env.*` +- `**/credentials.*`, `**/*secret*` +- `**/token*.json`, `**/auth*.json`, `**/*apikey*` + +Cloud providers: +- `**/.aws/credentials`, `**/.aws/config` + +SSH: +- `**/.ssh/id_*`, `**/.ssh/config` + +Package managers: +- `**/.npmrc`, `**/.pypirc` + +Databases: +- `**/.netrc`, `**/.pgpass`, `**/.my.cnf` + +Containers & orchestration: +- `**/.docker/config.json` +- `**/.kube/config` + +Encryption keys: +- `**/.gnupg/*`, `**/.gpg/*` + +### What Gets Warned + +| Rule ID | Trigger | Example Flagged | +| ------------------- | ----------------------------- | -------------------------- | +| `warn-file-delete` | Plain `rm` (no flags) | `rm file.txt` | +| `warn-memory-write` | Writing to agent memory files | `MEMORY.md`, `memory/*.md` | +| `warn-network` | Any network access | `WebFetch`, `WebSearch` | + +**Memory file patterns warned:** + +- `**/MEMORY.md`, `**/memory.md` +- `**/memory/**/*.md` +- `**/.web4/**/memory*` +- `**/.claude/**/memory*` + +### What Gets Allowed + +Everything else, including: + +- Normal file reads (source code, configs without credentials) +- Normal file writes (code, docs) +- Safe Bash commands (`ls`, `git`, `npm`, etc.) +- Task delegation to sub-agents + +--- + +## `strict` + +**Purpose**: Maximum security. Denies everything by default, only allows read-only operations. + +### What Gets Denied + +| Action | What | +| ---------------- | --------------------------------- | +| **Default deny** | Everything not explicitly allowed | + +This means: + +- All Bash commands (including safe ones like `ls`) +- All file writes (`Write`, `Edit`, `NotebookEdit`) +- All network access (`WebFetch`, `WebSearch`) +- All Task delegation + +### What Gets Allowed + +| Rule ID | Tools Allowed | +| ------------------ | ----------------------------------- | +| `allow-read-tools` | `Read`, `Glob`, `Grep`, `TodoWrite` | + +**When to use**: High-security environments, compliance scenarios, or when you need strict control over what the agent can do. + +--- + +## `audit-only` + +**Purpose**: Same rules as `safety`, but enforcement is disabled (dry-run mode). + +### Behavior + +- Uses the exact same rules as `safety` +- Logs what **would** be blocked/warned +- Does **not** actually block anything + +| What safety would... | audit-only does... | +| -------------------- | ------------------ | +| Deny | Logs "would deny" | +| Warn | Logs warning | +| Allow | Allows | + +**When to use**: Testing policy rules before enabling `safety`, or when you want to see what would be blocked without actually blocking. + +--- + +## Choosing a Preset + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Do you need to block dangerous operations? │ +│ │ +│ NO ──────────────────────────┬──────────────────────────── │ +│ │ │ │ +│ │ Want audit logs? │ │ +│ │ YES → permissive │ │ +│ │ NO → (no plugin needed) │ │ +│ │ │ +│ YES ─────────────────────────┼──────────────────────────── │ +│ │ │ +│ │ Maximum security? │ │ +│ │ YES → strict │ │ +│ │ │ │ +│ │ NO ───────────────────────┘ │ +│ │ │ +│ │ Ready for enforcement? │ +│ │ YES → safety (recommended) │ +│ │ NO → audit-only (test first) │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Custom Rules + +All presets can be extended with custom rules via `resolve_preset()`: + +```python +from governance.presets import resolve_preset, PolicyRule, PolicyMatch + +# Add a custom rule to the safety preset +config = resolve_preset( + "safety", + additional_rules=[ + PolicyRule( + id="block-prod-writes", + name="Block writes to production config", + priority=1, + decision="deny", + reason="Production config is read-only", + match=PolicyMatch( + categories=["file_write"], + target_patterns=["**/config/production.*"], + ), + ), + ], +) +``` + +See [README.md](./README.md) for full API documentation. diff --git a/plugins/web4-governance/PR_DESCRIPTION.md b/plugins/web4-governance/PR_DESCRIPTION.md new file mode 100644 index 0000000000..bbfe2e7d94 --- /dev/null +++ b/plugins/web4-governance/PR_DESCRIPTION.md @@ -0,0 +1,42 @@ +# PR Description: Agent Governance Plugin + +## The Problem + +No way to answer "what did the agent do and why?" + +- No audit trail +- No pre-execution policy gates +- No tamper-evident provenance + +## The Solution + +Hook-based governance using `pre_tool_use` / `post_tool_use`. Zero core changes. + +**What you get:** +- Searchable audit trail (R6 structured records) +- Hash-linked chain (tamper-evident) +- Policy engine: allow/deny/warn before execution +- Presets: permissive, safety, strict, audit-only + +**Key files:** +- `hooks/pre_tool_use.py` — policy check, log intent +- `hooks/post_tool_use.py` — log result, extend chain +- `governance/` — ledger, policy, rate limiting + +## Scope + +This makes agents **inspectable, accountable, governable** — not "safe" or "correct." We can only govern what hooks expose. Honest constraint. + +## Evidence + +- 75+ tests passing +- Same framework running in [Moltbot](https://github.com/dp-web4/moltbot/tree/main/extensions/web4-governance) +- Opt-in, observational by default + +## Background + +Implements concepts from [Web4](https://dp-web4.github.io/web4/) — R6 workflow formalism, T3 trust tensors (operational heuristics for permissioning, not alignment claims). + +--- + +Happy to address questions. diff --git a/plugins/web4-governance/README.md b/plugins/web4-governance/README.md new file mode 100644 index 0000000000..8bcd39e834 --- /dev/null +++ b/plugins/web4-governance/README.md @@ -0,0 +1,659 @@ +# Web4 Governance Plugin for Claude Code + +Lightweight AI governance with R6 workflow formalism, agent trust accumulation, and audit trails. + +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +## Overview + +This plugin adds structured governance to Claude Code sessions: + +- **R6 Workflow** - Every tool call follows a formal intent→action→result flow +- **Agent Trust** - T3/V3 tensors accumulate per agent role based on outcomes +- **Persistent References** - Agents learn patterns that persist across sessions +- **Heartbeat Coherence** - Timing-based session health tracking +- **Audit Trail** - Verifiable chain of actions with provenance + +No external dependencies. No network calls. Just structured, auditable AI actions. + +## What's New in v0.2 + +- **Agent Governance** - Claude Code agents map to Web4 role entities +- **Trust Tensors** - Each agent accumulates trust independently (T3/V3) +- **Reference Store** - Learned patterns persist across sessions +- **Capability Modulation** - Higher trust = more permissions +- **SQLite Ledger** - Unified storage with WAL mode for concurrent access +- **Heartbeat Ledger** - Timing coherence tracking + +## Tier 1.5 Features (NEW) + +Four new features harmonized with the [moltbot implementation](https://github.com/dp-web4/moltbot): + +### Policy Presets + +Built-in rule sets for common governance postures: + +```python +from governance import resolve_preset, list_presets + +# List available presets +for p in list_presets(): + print(f"{p.name}: {p.description}") + +# Use a preset with overrides +config = resolve_preset("safety", enforce=False) +``` + +| Preset | Default | Enforce | Description | +|--------|---------|---------|-------------| +| `permissive` | allow | false | Pure observation, no rules | +| `safety` | allow | true | Block `rm -`/secrets, warn on file delete/memory/network | +| `strict` | deny | true | Only allow Read, Glob, Grep, TodoWrite | +| `audit-only` | allow | false | Same as safety but dry-run mode | + +For detailed information on what each preset blocks, warns, and allows, see **[PRESETS.md](./PRESETS.md)**. + +**Safety preset highlights:** +- **Denies**: `rm` with any flags (not just `-rf`), `mkfs.*`, 24 credential file patterns +- **Warns**: Plain `rm`, memory file writes, network access + +### Rate Limiting + +Sliding window counters for policy enforcement: + +```python +from governance import RateLimiter + +limiter = RateLimiter() + +# Check if under limit (5 actions per 60 seconds) +result = limiter.check("ratelimit:bash:tool", max_count=5, window_ms=60000) +if result.allowed: + # Proceed and record + limiter.record("ratelimit:bash:tool") +``` + +### Audit Query + +Filter audit records by tool, category, status, target, and time: + +```python +# Query with filters +results = ledger.query_audit( + session_id="sess1", + tool="Bash", + status="error", + since="1h", # or ISO date: "2026-01-27T10:00:00Z" + limit=50 +) + +# Get aggregated stats +stats = ledger.get_audit_stats(session_id="sess1") +# Returns: {total, tool_counts, status_counts, category_counts} +``` + +### Audit Reporter + +Generate aggregated reports from audit data: + +```python +from governance import AuditReporter + +records = ledger.query_audit(session_id="sess1") +reporter = AuditReporter(records) + +# Text output +print(reporter.format_text()) + +# JSON-serializable dict +data = reporter.to_dict() +``` + +Report sections: +- **Tool Stats** — Invocations, success rate, avg duration per tool +- **Category Breakdown** — Counts and percentages per category +- **Policy Stats** — Allow/deny distribution, block rate +- **Errors** — Count by tool, top error messages +- **Timeline** — Actions per minute + +### Policy Entity (NEW) + +Policy is a first-class participant in the trust network — not just configuration, but "society's law" with identity, witnessing, and hash-tracking: + +```python +from governance import PolicyEntity, PolicyRegistry + +# Register a policy (creates hash-identified entity) +registry = PolicyRegistry() +entity = registry.register_policy("my-policy", preset="safety") + +# Entity ID follows: policy::: +print(entity.entity_id) # policy:my-policy:20260128...:a1b2c3d4... + +# Evaluate a tool call against policy +result = entity.evaluate("Bash", "command", "rm -rf ./temp_build") +print(result.decision) # "deny" +print(result.reason) # "Destructive command blocked by safety preset" + +# Session witnesses operating under policy +registry.witness_session(entity.entity_id, session_id) + +# Policy witnesses decisions +registry.witness_decision(entity.entity_id, session_id, "Read", "allow", success=True) +``` + +**Why Policy as Entity?** + +- **Immutable**: Once registered, the policy can't change (new version = new entity) +- **Hash-tracked**: Content hash in entity ID ensures integrity +- **Witnessable**: Sessions witness policy, policy witnesses decisions +- **Auditable**: R6 records reference `policy_entity_id` in rules field +- **Trust-integrated**: T3/V3 tensors track policy usage patterns + +**Hook Integration**: + +The PreToolUse hook automatically: +1. Loads policy entity from session state +2. Evaluates tool calls against policy rules +3. Blocks denied actions (if `enforce=true`) +4. Witnesses decisions in the trust network + +## Tier 2 Features (NEW) + +Three new features for enhanced security and durability: + +### Ed25519 Cryptographic Signing + +Sign audit records with Ed25519 for non-repudiation: + +```python +from governance import generate_signing_keypair, sign_data, verify_signature + +# Generate keypair for a session +keypair = generate_signing_keypair() +print(f"Key ID: {keypair['key_id']}") + +# Sign audit record +import json +record = {"tool": "Bash", "target": "ls", "status": "success"} +data = json.dumps(record) +signature = sign_data(data, keypair['private_key_hex']) + +# Verify signature +is_valid = verify_signature(data, signature, keypair['public_key_hex']) +print(f"Valid: {is_valid}") # True +``` + +### Persistent Rate Limiting + +SQLite-backed rate limits that survive process restarts: + +```python +from governance import PersistentRateLimiter + +# Initialize with storage path +limiter = PersistentRateLimiter("~/.web4") + +# Check rate limit (5 actions per minute) +result = limiter.check("ratelimit:bash:tool", max_count=5, window_ms=60000) +if result.allowed: + # Action permitted + limiter.record("ratelimit:bash:tool") + +# Check if persistence is active +print(f"Persistent: {limiter.persistent}") # True if SQLite available +``` + +### Witness Persistence + +Policy witnessing relationships now persist to JSONL: + +```python +from governance import PolicyRegistry + +registry = PolicyRegistry("~/.web4") + +# Witness records are automatically persisted to ~/.web4/witnesses.jsonl +registry.witness_session(entity.entity_id, session_id) +registry.witness_decision(entity.entity_id, session_id, "Read", "allow", success=True) + +# Query witnesses +witnesses = registry.get_witnessed_by(entity.entity_id) +witnessed = registry.get_has_witnessed(f"session:{session_id}") +``` + +## Tier 3 Features (NEW) + +Advanced pattern matching, multi-target extraction, and temporal constraints: + +### Multi-Target Extraction + +Extract all file paths and targets from tool parameters: + +```python +from governance import extract_targets, extract_target, is_credential_target + +# Extract primary target +target = extract_target("Bash", {"command": "cat /etc/passwd"}) +# Returns: "cat /etc/passwd" + +# Extract all targets from multi-file operations +targets = extract_targets("Bash", {"command": "rm -rf /tmp/a /tmp/b ~/cache"}) +# Returns: ["/tmp/a", "/tmp/b", "~/cache"] + +# Check if target is a credential file +if is_credential_target("/home/user/.aws/credentials"): + print("Credential file detected!") + +# Classify tool with target context +from governance import classify_tool_with_target +category = classify_tool_with_target("Read", "/app/.env") +# Returns: "credential_access" (upgraded from "file_read") +``` + +### Pattern Matching + +Glob and regex pattern matching for policy rules: + +```python +from governance import matches_target, glob_to_regex, validate_regex_pattern + +# Glob pattern matching +if matches_target("/path/to/.env.local", ["**/.env*"], use_regex=False): + print("Matches credential pattern!") + +# Regex pattern matching +if matches_target("rm -rf /", [r"rm\s+-"], use_regex=True): + print("Destructive command detected!") + +# Validate regex patterns for ReDoS vulnerabilities +valid, reason = validate_regex_pattern(r"(a+)+$") +if not valid: + print(f"Unsafe pattern: {reason}") +``` + +### Temporal Constraints (TimeWindow) + +Time-based policy rules that only apply during specific windows: + +```python +from governance import TimeWindow, matches_time_window + +# Define business hours (9am-5pm, Monday-Friday, US Eastern) +window = TimeWindow( + allowed_hours=(9, 17), + allowed_days=[1, 2, 3, 4, 5], # 0=Sun, 1=Mon, ... 6=Sat + timezone="America/New_York" +) + +# Check if current time is within the window +if matches_time_window(window): + print("Within business hours") + +# Overnight windows are supported (e.g., 10pm-6am) +overnight = TimeWindow(allowed_hours=(22, 6)) # 22:00 to 06:00 +``` + +**Use Cases:** +- Restrict dangerous operations to business hours only +- Allow maintenance windows during off-peak times +- Enforce compliance with operational schedules + +## Tier 4 Features (NEW) + +Real-time monitoring endpoint for external clients: + +### Event Stream + +JSONL-based event stream that external tools can consume for monitoring, alerting, and analytics. + +**Stream Location**: `~/.web4/events.jsonl` + +```python +from governance import EventStream, EventType, Severity + +# Initialize stream +stream = EventStream("~/.web4") + +# Emit events +stream.emit( + event_type=EventType.POLICY_DECISION, + severity=Severity.ALERT, + session_id="sess-123", + tool="Bash", + target="rm -rf /tmp/test", + decision="deny", + reason="Destructive command blocked" +) + +# Convenience methods +stream.policy_decision( + session_id="sess-123", + tool="Read", + target="/app/.env", + decision="deny", + reason="Credential file access denied" +) +``` + +**Consuming the stream:** +```bash +# Real-time tail +tail -f ~/.web4/events.jsonl | jq . + +# Filter alerts only +tail -f ~/.web4/events.jsonl | jq -c 'select(.severity == "alert")' + +# Filter by event type +grep '"type":"policy_decision"' ~/.web4/events.jsonl | jq . +``` + +**Event Types:** +- `session_start`, `session_end` - Session lifecycle +- `tool_call`, `tool_result` - Tool execution +- `policy_decision`, `policy_violation` - Policy enforcement +- `rate_limit_exceeded` - Rate limiting +- `trust_update` - Trust changes +- `agent_spawn`, `agent_complete` - Agent lifecycle +- `audit_alert` - High-priority audit events + +**Severity Levels:** `debug`, `info`, `warn`, `alert`, `error` + +For complete API documentation, see **[EVENT_STREAM_API.md](./EVENT_STREAM_API.md)**. + +## Installation + +### Option 1: Plugin Marketplace (Recommended) + +``` +/plugin install web4-governance +``` + +### Option 2: Manual Setup + +For standalone installation or development: + +```bash +# Run the deployment script +./deploy.sh + +# Then add hooks to your project's .claude/settings.local.json +# (The script will display the configuration) +``` + +### First Run Setup + +The plugin creates `~/.web4/` on first session: + +```bash +~/.web4/ +├── ledger.db # SQLite database (unified storage) +├── preferences.json # Your settings +├── sessions/ # Session state files +├── audit/ # Audit records (JSONL) +├── r6/ # R6 workflow logs +├── heartbeat/ # Timing coherence ledgers +└── governance/ + ├── roles/ # Per-agent trust tensors + ├── references/ # Persistent learned context + └── sessions/ # Governed session state +``` + +The SQLite ledger uses WAL mode for concurrent access, allowing multiple +parallel sessions to write simultaneously without conflicts. + +## Agent Governance + +### How It Works + +``` +Agent Spawn (Task tool) Agent Complete + │ │ + ▼ ▼ +┌───────────────┐ ┌───────────────┐ +│ on_agent_spawn│ │on_agent_complete +│ - Load trust │ │ - Update trust │ +│ - Load refs │ │ - Record outcome +│ - Check caps │ └───────────────┘ +└───────────────┘ + │ + ▼ +┌───────────────────────────────────────┐ +│ Agent Runs │ +│ (with prior context injected) │ +└───────────────────────────────────────┘ +``` + +### Trust Accumulation + +Each agent role (e.g., `code-reviewer`, `test-generator`) accumulates trust independently: + +**T3 Trust Tensor (6 dimensions):** +| Dimension | What It Measures | +|-----------|------------------| +| competence | Can they do it? | +| reliability | Will they do it consistently? | +| consistency | Same quality over time? | +| witnesses | Corroborated by others? | +| lineage | Track record length | +| alignment | Values match context? | + +**Trust Updates:** +- Success: +5% (diminishing returns near 1.0) +- Failure: -10% (asymmetric - trust is hard to earn, easy to lose) + +### Persistent References + +Agents accumulate learned patterns: + +```python +# After code review +gov.extract_reference( + role_id="code-reviewer", + content="Pattern: Always check null before array access", + source="review of auth.py", + ref_type="pattern" +) +``` + +On next invocation, the agent receives prior context automatically. + +### Capability Derivation + +Trust level determines capabilities: + +| Trust Level | can_write | can_execute | can_delegate | max_atp | +|-------------|-----------|-------------|--------------|---------| +| < 0.3 | ❌ | ❌ | ❌ | 37 | +| 0.3-0.4 | ✅ | ❌ | ❌ | 46 | +| 0.4-0.6 | ✅ | ✅ | ❌ | 64 | +| 0.6+ | ✅ | ✅ | ✅ | 82+ | + +## R6 Workflow + +Every tool call gets an R6 record: + +``` +R6 = Rules + Role + Request + Reference + Resource → Result +``` + +| Component | What It Captures | +|-----------|------------------| +| **Rules** | Preferences and constraints | +| **Role** | Session identity, action index, active agent | +| **Request** | Tool name, category, target | +| **Reference** | Chain position, previous R6 | +| **Resource** | ATP cost | +| **Result** | Status, output hash, trust update | + +## Heartbeat Coherence + +The plugin tracks timing between tool calls: + +- **on_time**: Within expected interval (good) +- **early**: Faster than expected (slight penalty) +- **late**: Slower but acceptable +- **gap**: Long pause detected + +Coherence score (0.0-1.0) indicates session health and can modulate trust application. + +### Heartbeat Tracking + +Every tool call records a timing heartbeat: + +```json +{ + "sequence": 47, + "timestamp": "2026-01-24T06:30:00Z", + "status": "on_time", + "delta_seconds": 45.2, + "tool_name": "Edit", + "entry_hash": "a1b2c3d4..." +} +``` + +**Timing status:** +- `on_time` - Normal interval (30-90 seconds) +- `early` - Faster than expected +- `late` - Slower than expected +- `gap` - Long pause (>3 minutes) + +**Timing coherence** score (0.0-1.0) indicates session regularity. Irregular patterns may indicate interruptions or context switches. + +## Commands + +| Command | Description | +|---------|-------------| +| `/audit` | Show session audit summary | +| `/audit last 10` | Show last 10 actions | +| `/audit verify` | Verify chain integrity | +| `/audit export` | Export audit log | + +## Configuration + +Create `~/.web4/preferences.json`: + +```json +{ + "audit_level": "standard", + "show_r6_status": true, + "action_budget": null +} +``` + +**audit_level**: +- `minimal` - Just record, no output +- `standard` - Session start message +- `verbose` - Show each R6 request with coherence indicator + +## Testing + +```bash +# Test heartbeat system +python3 test_heartbeat.py + +# Test agent governance flow +python3 test_agent_flow.py + +# Test entity trust and witnessing +python3 test_entity_trust.py + +# Test Tier 1.5 features (presets, rate limiting, audit query, reporter) +python3 -m pytest test_tier1_5.py -v +``` + +Run all tests: +```bash +python3 -m pytest test_*.py -v +# 50 tests passing +``` + +## Governance Module + +The plugin includes a Python governance module (`governance/`): + +```python +from governance import Ledger, SoftLCT, SessionManager, AgentGovernance + +# Start a session with automatic numbering +sm = SessionManager() +session = sm.start_session(project='my-project', atp_budget=100) +print(f"Session #{session['session_number']}") + +# Record actions +sm.record_action('Edit', target='src/main.py', status='success') + +# Agent governance +gov = AgentGovernance() +ctx = gov.on_agent_spawn(session_id, "code-reviewer") +result = gov.on_agent_complete(session_id, "code-reviewer", success=True) + +# Get session summary +print(sm.get_session_summary()) +``` + +**ATP Accounting**: Each session has an action budget (default 100). Actions consume ATP, enabling cost tracking. + +## Files + +``` +~/.web4/ +├── ledger.db # SQLite database (primary storage) +│ ├── identities # Soft LCT tokens +│ ├── sessions # Session tracking, ATP accounting +│ ├── session_sequence # Atomic session numbering per project +│ ├── heartbeats # Timing coherence records +│ ├── audit_trail # Tool use records +│ └── work_products # Files, commits registered +├── preferences.json # User preferences +├── sessions/ # Session state (JSON) +├── audit/ # Audit records (JSONL) +├── r6/ # R6 request logs +├── heartbeat/ # Timing coherence ledgers +└── governance/ + ├── roles/ # Trust tensors per agent + └── references/ # Learned context per agent +``` + +The SQLite ledger provides: +- **Unified storage** - All data in one file +- **Concurrent access** - WAL mode for parallel sessions +- **Atomic operations** - No duplicate session numbers +- **Cross-table queries** - Join heartbeat + audit data + +## Web4 Ecosystem + +This plugin implements Web4 governance concepts: + +| Concept | This Plugin | Full Web4 | +|---------|-------------|-----------| +| Identity | Soft LCT (software) | LCT (hardware-bound) | +| Workflow | R6 framework | R6 + Policy enforcement | +| Audit | SQLite + hash-linked chain | Distributed ledger | +| Timing | Heartbeat coherence | Grounding lifecycle | +| Trust | T3/V3 tensors per role | Full tensor calculus | +| Agent | Role trust + references | MRH + Witnessing | + +For enterprise features (hardware binding, TPM attestation, cross-machine verification), contact dp@metalinxx.io. + +## Contributing + +Contributions welcome! This plugin is MIT licensed. + +Areas for contribution: +- Additional audit visualizations +- R6 analytics and insights +- Trust visualization +- Reference search improvements +- Cross-session analytics + +## License + +MIT License - see [LICENSE](LICENSE) + +## Links + +- [Web4 Specification](https://github.com/dp-web4/web4) +- [R6 Framework Spec](https://github.com/dp-web4/web4/blob/main/web4-standard/core-spec/r6-framework.md) +- [Trust Tensors Spec](https://github.com/dp-web4/web4/blob/main/web4-standard/core-spec/t3-v3-tensors.md) +- Enterprise inquiries: dp@metalinxx.io diff --git a/plugins/web4-governance/commands/audit.md b/plugins/web4-governance/commands/audit.md new file mode 100644 index 0000000000..891a1bd900 --- /dev/null +++ b/plugins/web4-governance/commands/audit.md @@ -0,0 +1,98 @@ +# /audit + +View the Web4 audit trail for the current session. + +## Usage + +``` +/audit [options] +``` + +## Options + +- `/audit` - Show summary of current session +- `/audit last N` - Show last N actions (default: 10) +- `/audit verify` - Verify provenance chain integrity +- `/audit export` - Export audit log as JSON + +## Behavior + +### Summary (`/audit`) + +Display session governance status: + +``` +[Web4 Audit Summary] +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Session: a1b2c3d4 (software-bound) +Started: 2025-01-23T10:30:00Z +Actions: 47 recorded +Chain: 47 links, verified ✓ + +By Category: + file_read: 23 (49%) + file_write: 12 (26%) + command: 8 (17%) + network: 4 (8%) +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +``` + +### Recent Actions (`/audit last N`) + +Show the last N actions with R6 details: + +``` +[Recent Actions] +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +#47 [10:45:23] file_write → src/main.rs + R6: r6:f8e9a1b2 Status: success + +#46 [10:45:01] command → cargo + R6: r6:c7d8e9f0 Status: success + +#45 [10:44:45] file_read → Cargo.toml + R6: r6:a1b2c3d4 Status: success +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +``` + +### Verify Chain (`/audit verify`) + +Verify the provenance chain integrity: + +``` +[Chain Verification] +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Checking 47 records... + +✓ All record hashes valid +✓ Chain links unbroken +✓ Timestamps monotonic + +Chain Integrity: VERIFIED +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +``` + +## Files + +Audit data is stored in `~/.web4/`: +- `sessions/{session_id}.json` - Session state +- `audit/{session_id}.jsonl` - Audit records +- `r6/{date}.jsonl` - R6 request log + +## R6 Framework + +Every action is recorded using the R6 workflow: + +1. **Rules** - Constraints and preferences +2. **Role** - Session identity and context +3. **Request** - Tool, category, target +4. **Reference** - History and chain position +5. **Resource** - Cost/budget (optional) +6. **Result** - Outcome and output hash + +This creates a verifiable record of intent → action → result. + +## Learn More + +- [Web4 Specification](https://github.com/dp-web4/web4) +- [R6 Framework](https://github.com/dp-web4/web4/blob/main/web4-standard/core-spec/r6-framework.md) diff --git a/plugins/web4-governance/deploy.sh b/plugins/web4-governance/deploy.sh new file mode 100644 index 0000000000..34e547dbf7 --- /dev/null +++ b/plugins/web4-governance/deploy.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# Deploy web4 governance plugin for Claude Code +# Run this on each machine in the collective + +set -e + +echo "=== Web4 Governance Plugin Deployment ===" +echo "" + +# Step 1: Create ~/.web4 directory structure +echo "[1/3] Creating ~/.web4 directory..." +mkdir -p ~/.web4/sessions ~/.web4/r6 ~/.web4/audit +chmod 700 ~/.web4 + +# Step 2: Create default preferences +echo "[2/3] Creating preferences..." +if [[ ! -f ~/.web4/preferences.json ]]; then + cat > ~/.web4/preferences.json << 'EOF' +{ + "audit_level": "standard", + "show_r6_status": true, + "action_budget": null +} +EOF + echo " Created ~/.web4/preferences.json" +else + echo " Preferences already exist, skipping" +fi + +# Step 3: Make hooks executable +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +echo "[3/3] Making hooks executable..." +chmod +x "$SCRIPT_DIR/hooks/"*.py +echo " Done" + +echo "" +echo "=== Hook Configuration ===" +echo "" +echo "Add this to your project's .claude/settings.local.json:" +echo "(Uses \$CLAUDE_PROJECT_DIR for portability across machines)" +echo "" +cat << 'EOF' +{ + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/web4/claude-code-plugin/hooks/session_start.py" + } + ] + } + ], + "PreToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/web4/claude-code-plugin/hooks/pre_tool_use.py" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/web4/claude-code-plugin/hooks/post_tool_use.py" + } + ] + } + ] + } +} +EOF + +echo "" +echo "=== Deployment complete ===" +echo "" +echo "Next steps:" +echo "1. Add the hook configuration above to .claude/settings.local.json" +echo "2. Restart Claude Code" +echo "3. Check ~/.web4/sessions/ for session state files" +echo "" +echo "Audit trail location:" +echo " Sessions: ~/.web4/sessions/" +echo " R6 Requests: ~/.web4/r6/" +echo " Audit Records: ~/.web4/audit/" diff --git a/plugins/web4-governance/docs/RUST_CORE_PROPOSAL.md b/plugins/web4-governance/docs/RUST_CORE_PROPOSAL.md new file mode 100644 index 0000000000..551f7efb12 --- /dev/null +++ b/plugins/web4-governance/docs/RUST_CORE_PROPOSAL.md @@ -0,0 +1,324 @@ +# Web4 Trust Core - Rust Implementation Proposal + +**Date:** 2025-01-24 +**Status:** Proposed +**Author:** Claude (with dp) + +## Summary + +Implement the core Web4 trust primitives in Rust as a portable library (`web4-trust-core`) that can be used from Python, JavaScript/TypeScript, and native applications via FFI bindings. + +## Motivation + +The current Python implementation works well for prototyping but has limitations: + +1. **Performance** - Trust calculations in hot paths (every tool call) add latency +2. **Portability** - Python can't run in browsers, edge devices, or embedded systems +3. **Safety** - Trust is security-critical; memory safety and type safety matter +4. **Consistency** - Multiple implementations risk divergence; one core ensures uniformity + +Web4 trust primitives are foundational infrastructure. They should be: +- Fast (sub-millisecond operations) +- Portable (run anywhere) +- Safe (no undefined behavior) +- Verifiable (deterministic, reproducible) + +## Architecture + +### Layer Diagram + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Applications │ +│ Claude Code │ Web Validators │ Mobile Apps │ IoT Devices │ +└──────────────┬───────────────┬─────────────┬────────────────┘ + │ │ │ +┌──────────────┴───┐ ┌────────┴────────┐ ┌──┴──────────────┐ +│ Python Bindings │ │ WASM Bindings │ │ C FFI / Native │ +│ (PyO3) │ │ (wasm-bindgen) │ │ │ +└──────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ +┌──────────────┴───────────────┴─────────────┴────────────────┐ +│ web4-trust-core │ +│ (Rust) │ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ T3 Tensor │ │ V3 Tensor │ │ Witnessing │ │ +│ │ Operations │ │ Operations │ │ Chains │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Entity │ │ Role │ │ Decay │ │ +│ │ Trust │ │ Trust │ │ Functions │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ LCT │ │ Storage │ │ Serialtic │ │ +│ │ Primitives │ │ Backend │ │ (serde) │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Core Types + +```rust +/// T3 Trust Tensor - 6 dimensions +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct T3Tensor { + pub competence: f64, // Can they do it? + pub reliability: f64, // Will they do it consistently? + pub consistency: f64, // Same quality over time? + pub witnesses: f64, // Corroborated by others? + pub lineage: f64, // Track record length? + pub alignment: f64, // Values match context? +} + +/// V3 Value Tensor - 6 dimensions +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct V3Tensor { + pub energy: f64, // Effort/resources invested + pub contribution: f64, // Value added to ecosystem + pub stewardship: f64, // Care for shared resources + pub network: f64, // Connections / reach + pub reputation: f64, // External perception + pub temporal: f64, // Time-based value accumulation +} + +/// Entity trust combining T3 and V3 +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct EntityTrust { + pub entity_id: String, + pub entity_type: EntityType, + pub t3: T3Tensor, + pub v3: V3Tensor, + pub witnessed_by: Vec, + pub has_witnessed: Vec, + pub action_count: u64, + pub success_count: u64, + pub last_action: Option>, + pub created_at: DateTime, +} + +/// Entity types in the Web4 ecosystem +#[derive(Clone, Debug, Serialize, Deserialize)] +pub enum EntityType { + Mcp(String), // MCP server + Role(String), // Agent role + Session(String), // Session identity + Reference(String),// Reference/context + Lct(String), // Linked Context Token +} +``` + +### Core Operations + +```rust +impl T3Tensor { + /// Average trust score (0.0 - 1.0) + pub fn average(&self) -> f64; + + /// Categorical trust level + pub fn level(&self) -> TrustLevel; + + /// Update from outcome (success/failure) + pub fn update_from_outcome(&mut self, success: bool, magnitude: f64); + + /// Apply temporal decay + pub fn apply_decay(&mut self, days_inactive: f64, decay_rate: f64); +} + +impl EntityTrust { + /// Receive a witness event (another entity observed this one) + pub fn receive_witness(&mut self, witness_id: &str, success: bool, magnitude: f64); + + /// Give a witness event (this entity observed another) + pub fn give_witness(&mut self, target_id: &str, success: bool, magnitude: f64); + + /// Calculate days since last action + pub fn days_since_last_action(&self) -> f64; +} + +/// Witnessing chain operations +pub struct WitnessingChain { + pub fn trace_witnesses(&self, entity_id: &str, depth: u32) -> Vec; + pub fn trace_witnessed(&self, entity_id: &str, depth: u32) -> Vec; + pub fn calculate_transitive_trust(&self, entity_id: &str) -> f64; +} +``` + +### Storage Backend + +```rust +pub trait TrustStore { + fn get(&self, entity_id: &str) -> Result; + fn save(&self, trust: &EntityTrust) -> Result<()>; + fn list(&self, entity_type: Option) -> Result>; + fn witness(&self, witness_id: &str, target_id: &str, success: bool) -> Result<(EntityTrust, EntityTrust)>; +} + +// Implementations: +// - InMemoryStore (for testing, WASM) +// - SledStore (embedded, fast) +// - FileStore (JSON files, compatible with Python impl) +``` + +## Binding Strategy + +### Python (PyO3) + +```python +# Usage from Python +from web4_trust import EntityTrust, TrustStore + +store = TrustStore.open("~/.web4/governance/entities") +trust = store.get("mcp:filesystem") +print(f"T3 average: {trust.t3_average()}") + +# Witness an event +store.witness("session:abc", "mcp:filesystem", success=True) +``` + +### JavaScript/TypeScript (wasm-bindgen) + +```typescript +// Usage from TypeScript +import { EntityTrust, TrustStore } from 'web4-trust'; + +const store = new TrustStore(); +const trust = store.get("mcp:filesystem"); +console.log(`T3 average: ${trust.t3Average()}`); + +// Witness an event +store.witness("session:abc", "mcp:filesystem", true); +``` + +### C FFI + +```c +// Usage from C +#include "web4_trust.h" + +web4_trust_store_t* store = web4_trust_store_open("~/.web4/governance/entities"); +web4_entity_trust_t* trust = web4_trust_store_get(store, "mcp:filesystem"); +double t3_avg = web4_entity_trust_t3_average(trust); + +web4_trust_store_witness(store, "session:abc", "mcp:filesystem", true); +``` + +## Migration Path + +### Phase 1: Core Library (Week 1) +- [ ] Scaffold `web4-trust-core` crate +- [ ] Implement T3Tensor, V3Tensor, EntityTrust +- [ ] Implement core operations (update, decay, witness) +- [ ] Unit tests for all operations +- [ ] FileStore backend (JSON compatible with Python) + +### Phase 2: Python Bindings (Week 2) +- [ ] Add PyO3 feature flag +- [ ] Implement Python module +- [ ] Test compatibility with existing Python code +- [ ] Gradual migration of claude-code-plugin hooks + +### Phase 3: WASM Bindings (Week 3) +- [ ] Add wasm-bindgen feature flag +- [ ] Implement JS/TS bindings +- [ ] Test in browser environment +- [ ] npm package publishing + +### Phase 4: Advanced Features (Week 4+) +- [ ] LCT primitives (signing, verification) +- [ ] Sled storage backend +- [ ] Transitive trust calculations +- [ ] MRH boundary enforcement + +## File Structure + +``` +web4-trust-core/ +├── Cargo.toml +├── src/ +│ ├── lib.rs # Public API +│ ├── tensor/ +│ │ ├── mod.rs +│ │ ├── t3.rs # T3 Trust Tensor +│ │ └── v3.rs # V3 Value Tensor +│ ├── entity/ +│ │ ├── mod.rs +│ │ ├── trust.rs # EntityTrust +│ │ └── types.rs # EntityType enum +│ ├── witnessing/ +│ │ ├── mod.rs +│ │ ├── chain.rs # WitnessingChain +│ │ └── event.rs # WitnessEvent +│ ├── decay/ +│ │ ├── mod.rs +│ │ └── temporal.rs # Decay functions +│ ├── storage/ +│ │ ├── mod.rs +│ │ ├── traits.rs # TrustStore trait +│ │ ├── memory.rs # InMemoryStore +│ │ ├── file.rs # FileStore (JSON) +│ │ └── sled.rs # SledStore (optional) +│ └── bindings/ +│ ├── mod.rs +│ ├── python.rs # PyO3 bindings +│ └── wasm.rs # wasm-bindgen bindings +├── tests/ +│ ├── tensor_tests.rs +│ ├── entity_tests.rs +│ ├── witnessing_tests.rs +│ └── storage_tests.rs +└── benches/ + └── trust_benchmarks.rs +``` + +## Cargo.toml Features + +```toml +[package] +name = "web4-trust-core" +version = "0.1.0" +edition = "2021" + +[features] +default = ["file-store"] +file-store = ["serde_json"] +sled-store = ["sled"] +python = ["pyo3"] +wasm = ["wasm-bindgen", "js-sys"] + +[dependencies] +serde = { version = "1.0", features = ["derive"] } +chrono = { version = "0.4", features = ["serde"] } +thiserror = "1.0" + +# Optional dependencies +serde_json = { version = "1.0", optional = true } +sled = { version = "0.34", optional = true } +pyo3 = { version = "0.20", features = ["extension-module"], optional = true } +wasm-bindgen = { version = "0.2", optional = true } +js-sys = { version = "0.3", optional = true } +``` + +## Success Criteria + +1. **Correctness**: All Python tests pass with Rust backend +2. **Performance**: Trust operations < 100μs (currently ~1ms in Python) +3. **Compatibility**: JSON files readable by both Python and Rust +4. **Portability**: WASM build works in browser +5. **Safety**: No unsafe code outside FFI boundaries + +## Open Questions + +1. **Storage format**: Keep JSON for compatibility, or use binary (MessagePack, bincode)? +2. **Async**: Should storage operations be async-first? +3. **Versioning**: How to handle schema evolution in stored trust data? +4. **Caching**: Should the Rust layer cache entities in memory? + +## References + +- [PyO3 User Guide](https://pyo3.rs/) +- [wasm-bindgen Guide](https://rustwasm.github.io/wasm-bindgen/) +- [sled embedded database](https://sled.rs/) +- [Web4 Whitepaper](../../whitepaper/) diff --git a/plugins/web4-governance/governance/__init__.py b/plugins/web4-governance/governance/__init__.py new file mode 100644 index 0000000000..9d26453699 --- /dev/null +++ b/plugins/web4-governance/governance/__init__.py @@ -0,0 +1,203 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance +# https://github.com/dp-web4/web4 +""" +Lightweight governance for Claude Code plugin. + +This is a software-only implementation of Web4 governance concepts: +- Soft LCT (software-bound identity, no TPM) +- Local SQLite ledger (session tracking, work products, ATP) +- R6 workflow (request → result with audit trail) +- Entity trust with witnessing (MCP servers, agents, references) +- Role trust accumulation (T3/V3 tensors per agent) +- Persistent references with self-curation (learned context per role) +- Agent lifecycle governance (spawn, complete, capability modulation) + +For hardware-bound identity and enterprise features, contact dp@metalinxx.io. + +Usage: + from governance import AgentGovernance, EntityTrustStore + + # Agent governance + gov = AgentGovernance() + ctx = gov.on_agent_spawn(session_id, "code-reviewer") + result = gov.on_agent_complete(session_id, "code-reviewer", success=True) + + # Entity trust (MCP servers, etc.) + store = EntityTrustStore() + mcp_trust = store.get("mcp:filesystem") + store.witness("session:abc", "mcp:filesystem", success=True) +""" + +from .ledger import Ledger +from .soft_lct import SoftLCT +from .session_manager import SessionManager +from .role_trust import RoleTrust, RoleTrustStore +from .references import Reference, ReferenceStore +from .agent_governance import AgentGovernance +from .entity_trust import EntityTrust, EntityTrustStore, get_mcp_trust, update_mcp_trust +from .presets import ( + PolicyConfig, + PolicyRule, + PolicyMatch, + RateLimitSpec, + PresetDefinition, + get_preset, + list_presets, + resolve_preset, + is_preset_name, + policy_config_to_dict, +) +from .rate_limiter import RateLimiter, RateLimitResult +from .persistent_rate_limiter import PersistentRateLimiter +from .reporter import AuditReporter, AuditReport +from .policy_entity import PolicyEntity, PolicyRegistry, PolicyEvaluation, WitnessRecord +from .signing import ( + SigningKeyPair, + generate_signing_keypair, + sign_data, + verify_signature, + key_id_from_public_key, +) +from .target_extraction import ( + extract_target, + extract_targets, + is_credential_target, + is_memory_target, + classify_tool, + classify_tool_with_target, + TOOL_CATEGORIES, +) +from .matchers import ( + matches_time_window, + glob_to_regex, + matches_target, + validate_regex_pattern, +) +from .presets import TimeWindow +from .event_stream import ( + EventStream, + Event, + EventType, + Severity, + get_default_stream, + emit, +) + +# Trust backend (Rust or Python fallback) +from .trust_backend import ( + get_backend_info, + verify_backend, + RUST_BACKEND, + TrustStore, + T3Tensor as BackendT3Tensor, # Legacy backend tensor + V3Tensor as BackendV3Tensor, # Legacy backend tensor +) + +# Canonical T3/V3 Tensors (fractal structure per Web4 spec) +from .tensors import ( + # Base 3D tensors + T3Base, + V3Base, + # Full fractal tensors with subdimensions + T3Tensor, + V3Tensor, + # Subdimension classes + TalentSubdims, + TrainingSubdims, + TemperamentSubdims, + ValuationSubdims, + VeracitySubdims, + ValiditySubdims, + # Migration helpers + migrate_legacy_t3, + migrate_legacy_v3, +) + +__all__ = [ + 'Ledger', + 'SoftLCT', + 'SessionManager', + 'RoleTrust', + 'RoleTrustStore', + 'Reference', + 'ReferenceStore', + 'AgentGovernance', + 'EntityTrust', + 'EntityTrustStore', + 'get_mcp_trust', + 'update_mcp_trust', + # Trust backend (legacy) + 'get_backend_info', + 'verify_backend', + 'RUST_BACKEND', + 'TrustStore', + 'BackendT3Tensor', + 'BackendV3Tensor', + # Canonical T3/V3 Tensors (fractal per Web4 spec) + 'T3Base', + 'V3Base', + 'T3Tensor', + 'V3Tensor', + 'TalentSubdims', + 'TrainingSubdims', + 'TemperamentSubdims', + 'ValuationSubdims', + 'VeracitySubdims', + 'ValiditySubdims', + 'migrate_legacy_t3', + 'migrate_legacy_v3', + # Tier 1.5: Presets + 'PolicyConfig', + 'PolicyRule', + 'PolicyMatch', + 'RateLimitSpec', + 'PresetDefinition', + 'get_preset', + 'list_presets', + 'resolve_preset', + 'is_preset_name', + 'policy_config_to_dict', + # Tier 1.5: Rate Limiter + 'RateLimiter', + 'RateLimitResult', + # Tier 2: Persistent Rate Limiter + 'PersistentRateLimiter', + # Tier 1.5: Reporter + 'AuditReporter', + 'AuditReport', + # Tier 1.5: Policy Entity + 'PolicyEntity', + 'PolicyRegistry', + 'PolicyEvaluation', + 'WitnessRecord', + # Tier 2: Ed25519 Signing + 'SigningKeyPair', + 'generate_signing_keypair', + 'sign_data', + 'verify_signature', + 'key_id_from_public_key', + # Tier 3: Multi-target Extraction + 'extract_target', + 'extract_targets', + 'is_credential_target', + 'is_memory_target', + 'classify_tool', + 'classify_tool_with_target', + 'TOOL_CATEGORIES', + # Tier 3: Pattern Matchers + 'matches_time_window', + 'glob_to_regex', + 'matches_target', + 'validate_regex_pattern', + 'TimeWindow', + # Tier 4: Event Stream + 'EventStream', + 'Event', + 'EventType', + 'Severity', + 'get_default_stream', + 'emit', +] diff --git a/plugins/web4-governance/governance/agent_governance.py b/plugins/web4-governance/governance/agent_governance.py new file mode 100644 index 0000000000..bb0bf69e06 --- /dev/null +++ b/plugins/web4-governance/governance/agent_governance.py @@ -0,0 +1,710 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Agent Governance +# https://github.com/dp-web4/web4 +""" +High-level API for Web4 agent governance. + +Maps Claude Code agents to Web4 role entities: + + Agent (claude-code) → Role Entity (web4) + +Provides: +- Role trust accumulation (T3/V3 tensors) +- Persistent references (learned context) +- Capability modulation based on trust +- Session integration with R6 audit trail + +Usage in hooks: + from governance import AgentGovernance + + gov = AgentGovernance() + + # When agent spawns + ctx = gov.on_agent_spawn(session_id, "code-reviewer") + + # When agent completes + result = gov.on_agent_complete(session_id, "code-reviewer", success=True) + + # Extract reference from agent work + gov.extract_reference(session_id, "code-reviewer", + content="Pattern: null checks before array access", + source="analysis of auth.py") +""" + +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, Dict, List, Any + +from .ledger import Ledger +from .soft_lct import SoftLCT +from .session_manager import SessionManager +from .role_trust import RoleTrust, RoleTrustStore +from .references import Reference, ReferenceStore +from .entity_trust import EntityTrustStore + + +class AgentGovernance: + """ + High-level API for agent governance integration. + + Coordinates between: + - Session management (current session context) + - Role trust (per-agent trust tensors) + - References (persistent learned context) + - Audit ledger (R6 action trail) + """ + + def __init__( + self, + ledger: Optional[Ledger] = None, + lct: Optional[SoftLCT] = None + ): + """ + Initialize agent governance. + + Args: + ledger: Ledger instance for persistence + lct: SoftLCT for identity + """ + self.ledger = ledger or Ledger() + self.lct = lct or SoftLCT(self.ledger) + self.session_manager = SessionManager(self.ledger, self.lct) + self.role_trust = RoleTrustStore(self.ledger) + self.references = ReferenceStore(self.ledger) + + # Track active role per session + self._active_roles: Dict[str, str] = {} + + # Track references used per session (for witnessing on complete) + self._session_refs: Dict[str, List[str]] = {} + + # Track work attribution per session (which agent did what) + # Format: {session_id: [(agent_name, work_type, work_id), ...]} + self._work_attribution: Dict[str, List[tuple]] = {} + + # Track recent agents per session (for inter-agent witnessing) + # Format: {session_id: [agent_name, ...]} (most recent first) + self._recent_agents: Dict[str, List[str]] = {} + + # Entity trust store for inter-agent witnessing + self.entity_trust = EntityTrustStore() + + def get_role_context(self, role_id: str) -> dict: + """ + Get full context for a role (agent). + + Returns trust, references, and derived capabilities. + Useful for understanding an agent's current standing. + """ + trust = self.role_trust.get(role_id) + refs = self.references.get_for_role(role_id, limit=30) + capabilities = self.role_trust.derive_capabilities(role_id) + context_str, used_ref_ids = self.references.get_context_for_role(role_id) + + return { + "role_id": role_id, + "trust": trust.to_dict(), + "trust_level": trust.trust_level(), + "t3_average": trust.t3_average(), + "v3_average": trust.v3_average(), + "capabilities": capabilities, + "reference_count": len(refs), + "references": [r.to_dict() for r in refs[:10]], # Sample + "context_summary": context_str[:500] if context_str else None, + "context_refs_used": len(used_ref_ids) + } + + def on_agent_spawn(self, session_id: str, agent_name: str) -> dict: + """ + Called when an agent is spawned. + + Records the agent activation in the ledger and returns + role context including trust and references. + + References used are tracked for witnessing on completion. + + Args: + session_id: Current session ID + agent_name: Agent/role identifier (e.g., "code-reviewer") + + Returns: + Role context with trust, references, capabilities + """ + # Track active role + self._active_roles[session_id] = agent_name + + # Get role context + trust = self.role_trust.get(agent_name) + refs = self.references.get_for_role(agent_name, limit=20) + capabilities = self.role_trust.derive_capabilities(agent_name) + + # Get context with self-curation (returns used ref IDs) + context_str, used_ref_ids = self.references.get_context_for_role( + agent_name, max_tokens=1000 + ) + + # Track which refs were used (for witnessing on complete) + self._session_refs[session_id] = used_ref_ids + + # Record in ledger + self.ledger.record_audit( + session_id=session_id, + action_type="agent_spawn", + tool_name=agent_name, + target=f"t3={trust.t3_average():.2f}", + input_hash=None, + output_hash=f"refs={len(used_ref_ids)}", + status="success" + ) + + return { + "role_id": agent_name, + "session_id": session_id, + "trust": { + "t3_average": trust.t3_average(), + "v3_average": trust.v3_average(), + "trust_level": trust.trust_level(), + "action_count": trust.action_count, + "success_rate": trust.success_count / max(1, trust.action_count) + }, + "capabilities": capabilities, + "references_loaded": len(refs), + "references_used": len(used_ref_ids), + "context": context_str + } + + def on_agent_complete( + self, + session_id: str, + agent_name: str, + success: bool, + magnitude: float = 0.1, + validates_previous: bool = True + ) -> dict: + """ + Called when an agent completes its work. + + Updates trust based on outcome, witnesses used references, + and performs inter-agent witnessing. + + Reference witnessing enables self-curation: + - References used in successful tasks gain trust + - References used in failed tasks lose trust + - Over time, helpful references rise, unhelpful ones fade + + Inter-agent witnessing: + - When agent B completes successfully after agent A + - B's success validates A's work + - B witnesses A (A gains trust through validation) + + Args: + session_id: Current session ID + agent_name: Agent/role identifier + success: Whether agent completed successfully + magnitude: Update magnitude (0.0-1.0) + validates_previous: Whether to witness previous agent + + Returns: + Updated trust information including reference witnessing + """ + # Update agent trust + trust = self.role_trust.update(agent_name, success, magnitude) + + # Witness references that were used (self-curation) + refs_witnessed = 0 + if session_id in self._session_refs: + used_refs = self._session_refs[session_id] + if used_refs: + updated_refs = self.references.witness_references( + role_id=agent_name, + ref_ids=used_refs, + success=success, + magnitude=magnitude * 0.5 # Refs update more slowly + ) + refs_witnessed = len(updated_refs) + + # Clear tracked refs + del self._session_refs[session_id] + + # Inter-agent witnessing + # If this agent succeeded, it validates the previous agent's work + agents_witnessed = [] + if success and validates_previous and session_id in self._recent_agents: + recent = self._recent_agents[session_id] + for prev_agent in recent[:2]: # Witness up to 2 previous agents + if prev_agent != agent_name: + try: + witness_result = self.witness_agent( + session_id=session_id, + witness_agent=agent_name, + target_agent=prev_agent, + success=True, # Successful completion = positive witness + magnitude=magnitude * 0.3 # Inter-agent witnessing is gentler + ) + agents_witnessed.append(witness_result) + except Exception: + pass # Don't fail on witnessing errors + + # Track this agent in recent agents + if session_id not in self._recent_agents: + self._recent_agents[session_id] = [] + if agent_name not in self._recent_agents[session_id]: + self._recent_agents[session_id].insert(0, agent_name) + # Keep only last 5 agents + self._recent_agents[session_id] = self._recent_agents[session_id][:5] + + # Clear active role + if session_id in self._active_roles: + del self._active_roles[session_id] + + # Record in ledger + self.ledger.record_audit( + session_id=session_id, + action_type="agent_complete", + tool_name=agent_name, + target=f"success={success}", + input_hash=f"refs={refs_witnessed},agents={len(agents_witnessed)}", + output_hash=f"t3={trust.t3_average():.3f}", + status="success" if success else "failure" + ) + + return { + "role_id": agent_name, + "session_id": session_id, + "success": success, + "trust_updated": { + "t3_average": trust.t3_average(), + "v3_average": trust.v3_average(), + "trust_level": trust.trust_level(), + "reliability": trust.reliability, + "competence": trust.competence + }, + "references_witnessed": refs_witnessed, + "agents_witnessed": agents_witnessed + } + + def on_tool_use( + self, + session_id: str, + role_id: str, + tool_name: str, + tool_input: dict, + atp_cost: int = 1 + ) -> dict: + """ + Called before tool use by an agent. + + Checks capabilities and records the action. + + Args: + session_id: Current session + role_id: Active role (agent) + tool_name: Tool being used + tool_input: Tool input parameters + atp_cost: ATP cost for this action + + Returns: + Action record with remaining budget + """ + # Check capabilities + caps = self.role_trust.derive_capabilities(role_id) + + # Verify tool is allowed + tool_caps = { + "Read": "can_read", + "Glob": "can_read", + "Grep": "can_read", + "Write": "can_write", + "Edit": "can_write", + "Bash": "can_execute", + "WebFetch": "can_network", + "WebSearch": "can_network", + "Task": "can_delegate" + } + + required_cap = tool_caps.get(tool_name, "can_read") + if not caps.get(required_cap, False): + return { + "allowed": False, + "error": f"Insufficient trust for {tool_name}", + "required": required_cap, + "trust_level": caps["trust_level"] + } + + # Record action + try: + action = self.session_manager.record_action( + tool_name=tool_name, + target=self._extract_target(tool_name, tool_input), + input_data=tool_input, + atp_cost=atp_cost + ) + return { + "allowed": True, + "action": action, + "role_id": role_id + } + except RuntimeError: + # No active session + return { + "allowed": True, + "action": None, + "role_id": role_id, + "note": "No active session" + } + + def _extract_target(self, tool_name: str, tool_input: dict) -> str: + """Extract target from tool input for audit.""" + if tool_name in ["Read", "Write", "Edit", "Glob"]: + return tool_input.get("file_path", tool_input.get("path", ""))[:100] + elif tool_name == "Bash": + cmd = tool_input.get("command", "") + return cmd.split()[0] if cmd.split() else cmd[:50] + elif tool_name == "Grep": + return f"pattern:{tool_input.get('pattern', '')[:30]}" + elif tool_name == "WebFetch": + return tool_input.get("url", "")[:100] + elif tool_name == "Task": + return tool_input.get("description", "")[:50] + return "" + + def extract_reference( + self, + session_id: str, + role_id: str, + content: str, + source: str, + ref_type: str = "pattern", + confidence: float = 0.5, + tags: Optional[List[str]] = None + ) -> dict: + """ + Extract and store a reference from agent work. + + Call this when an agent learns something that should persist: + - Patterns observed in code + - Facts extracted from docs + - User preferences inferred + + Args: + session_id: Current session + role_id: Role that learned this + content: The reference content + source: Where it came from + ref_type: pattern, fact, preference, context, summary + confidence: How confident in this reference + tags: Optional categorization tags + + Returns: + Created reference info + """ + ref = self.references.add( + role_id=role_id, + content=content, + source=source, + ref_type=ref_type, + confidence=confidence, + tags=tags + ) + + # Record in ledger + self.ledger.record_audit( + session_id=session_id, + action_type="reference_extract", + tool_name=role_id, + target=ref_type, + input_hash=None, + output_hash=ref.ref_id, + status="success" + ) + + return { + "ref_id": ref.ref_id, + "role_id": role_id, + "ref_type": ref_type, + "content_preview": content[:100] + } + + def search_references( + self, + role_id: str, + query: str, + ref_type: Optional[str] = None, + limit: int = 10 + ) -> List[dict]: + """Search references for a role.""" + refs = self.references.search( + role_id=role_id, + query=query, + ref_type=ref_type, + limit=limit + ) + return [r.to_dict() for r in refs] + + def get_active_role(self, session_id: str) -> Optional[str]: + """Get currently active role for a session.""" + return self._active_roles.get(session_id) + + def witness_agent( + self, + session_id: str, + witness_agent: str, + target_agent: str, + success: bool, + magnitude: float = 0.1 + ) -> dict: + """ + Record inter-agent witnessing. + + When one agent's work validates another's, the witnessing agent + can attest to the target agent's quality. + + Example: + - code-reviewer reviews code + - test-runner runs tests, they pass + - test-runner witnesses code-reviewer (tests validate the review) + + Args: + session_id: Current session + witness_agent: Agent doing the witnessing (e.g., test-runner) + target_agent: Agent being witnessed (e.g., code-reviewer) + success: Whether the witnessed work was validated + magnitude: Update magnitude + + Returns: + Witnessing result with updated trust + """ + witness_id = f"role:{witness_agent}" + target_id = f"role:{target_agent}" + + # Record the witnessing in entity trust + witness_trust, target_trust = self.entity_trust.witness( + witness_id, target_id, success, magnitude + ) + + # Also update the role trust store (T3 witnesses dimension) + target_role = self.role_trust.get(target_agent) + if success: + # Being validated increases witnesses dimension + delta = magnitude * 0.05 * (1 - target_role.witnesses) + else: + delta = -magnitude * 0.08 * target_role.witnesses + target_role.witnesses = max(0, min(1, target_role.witnesses + delta)) + self.role_trust.save(target_role) + + # Record in ledger + self.ledger.record_audit( + session_id=session_id, + action_type="agent_witness", + tool_name=f"{witness_agent}->{target_agent}", + target=f"success={success}", + input_hash=None, + output_hash=f"t3={target_trust.t3_average():.3f}", + status="success" + ) + + return { + "witness": witness_agent, + "target": target_agent, + "success": success, + "target_trust": { + "t3_average": target_trust.t3_average(), + "witnesses_dim": round(target_role.witnesses, 3), + "trust_level": target_role.trust_level() + } + } + + def get_witnessing_chain(self, agent_name: str) -> dict: + """ + Get the witnessing chain for an agent. + + Shows which agents have witnessed this one and vice versa. + """ + entity_id = f"role:{agent_name}" + return self.entity_trust.get_witnessing_chain(entity_id) + + def get_all_roles(self) -> List[dict]: + """Get summary of all known roles.""" + roles = [] + for role_id in self.role_trust.list_roles(): + trust = self.role_trust.get(role_id) + ref_stats = self.references.get_stats(role_id) + + roles.append({ + "role_id": role_id, + "t3_average": trust.t3_average(), + "trust_level": trust.trust_level(), + "action_count": trust.action_count, + "reference_count": ref_stats["total_references"] + }) + + return sorted(roles, key=lambda r: r["t3_average"], reverse=True) + + def prune_stale_references(self, max_age_days: int = 90) -> dict: + """Prune stale references across all roles.""" + results = {} + for role_id in self.role_trust.list_roles(): + pruned = self.references.prune_stale(role_id, max_age_days) + if pruned > 0: + results[role_id] = pruned + return results + + def auto_extract_references( + self, + session_id: str, + role_id: str, + content: str, + source: str = "auto-extracted", + min_confidence: float = 0.4 + ) -> List[dict]: + """ + Automatically extract references from task output. + + Analyzes content for extractable patterns, facts, and learnings. + Called after successful task completion to capture knowledge. + + Extraction patterns: + - "Pattern: ..." or "Always ..." → pattern reference + - "Note: ..." or "Important: ..." → fact reference + - "The ... uses/contains/is ..." → fact reference + - "Prefer ... over ..." or "Use ... instead of ..." → preference + - Code patterns (function signatures, imports) → pattern reference + + Args: + session_id: Current session + role_id: Role to attribute references to + content: Content to analyze + source: Source attribution + min_confidence: Minimum confidence to extract (0.0-1.0) + + Returns: + List of extracted references + """ + import re + + extracted = [] + + # Pattern indicators (high confidence) + pattern_markers = [ + (r'[Pp]attern:\s*(.+?)(?:\n|$)', 'pattern', 0.8), + (r'[Aa]lways\s+(.+?)(?:\.|$)', 'pattern', 0.7), + (r'[Nn]ever\s+(.+?)(?:\.|$)', 'pattern', 0.7), + (r'[Bb]est practice:\s*(.+?)(?:\n|$)', 'pattern', 0.8), + ] + + # Fact indicators (medium confidence) + fact_markers = [ + (r'[Nn]ote:\s*(.+?)(?:\n|$)', 'fact', 0.6), + (r'[Ii]mportant:\s*(.+?)(?:\n|$)', 'fact', 0.7), + (r'[Tt]he (\w+) (?:uses|contains|is|has) (.+?)(?:\.|$)', 'fact', 0.5), + (r'[Ff]ound that (.+?)(?:\.|$)', 'fact', 0.6), + ] + + # Preference indicators (medium confidence) + pref_markers = [ + (r'[Pp]refer (.+?) over (.+?)(?:\.|$)', 'preference', 0.6), + (r'[Uu]se (.+?) instead of (.+?)(?:\.|$)', 'preference', 0.6), + (r'[Rr]ecommend(?:ed|s)? (.+?)(?:\.|$)', 'preference', 0.5), + ] + + # Summary indicators (lower confidence, longer content) + summary_markers = [ + (r'[Ss]ummary:\s*(.+?)(?:\n\n|$)', 'summary', 0.5), + (r'[Oo]verview:\s*(.+?)(?:\n\n|$)', 'summary', 0.5), + ] + + all_markers = pattern_markers + fact_markers + pref_markers + summary_markers + + for regex, ref_type, base_confidence in all_markers: + if base_confidence < min_confidence: + continue + + matches = re.findall(regex, content, re.MULTILINE | re.DOTALL) + for match in matches: + # Handle tuple matches (multiple groups) + if isinstance(match, tuple): + text = " ".join(m.strip() for m in match if m) + else: + text = match.strip() + + # Skip very short or very long extractions + if len(text) < 10 or len(text) > 500: + continue + + # Adjust confidence based on content quality + confidence = base_confidence + if len(text) > 100: + confidence *= 0.9 # Longer = slightly less certain + if any(word in text.lower() for word in ['might', 'maybe', 'possibly']): + confidence *= 0.8 # Uncertainty markers reduce confidence + + if confidence >= min_confidence: + ref = self.references.add( + role_id=role_id, + content=text, + source=source, + ref_type=ref_type, + confidence=confidence, + tags=["auto-extracted"] + ) + extracted.append({ + "ref_id": ref.ref_id, + "ref_type": ref_type, + "content": text[:100], + "confidence": confidence + }) + + # Record extraction in ledger + if extracted: + self.ledger.record_audit( + session_id=session_id, + action_type="auto_extract", + tool_name=role_id, + target=f"extracted={len(extracted)}", + input_hash=None, + output_hash=None, + status="success" + ) + + return extracted + + def on_task_output( + self, + session_id: str, + role_id: str, + output: str, + success: bool + ) -> dict: + """ + Process task output for reference extraction. + + Called when a task completes with output that might contain + learnings worth persisting. + + Args: + session_id: Current session + role_id: Agent role + output: Task output content + success: Whether task succeeded + + Returns: + Extraction results + """ + if not success or not output or len(output) < 50: + return {"extracted": 0, "reason": "No extractable content"} + + # Only extract from successful tasks + extracted = self.auto_extract_references( + session_id=session_id, + role_id=role_id, + content=output, + source=f"task-output:{session_id}", + min_confidence=0.5 # Higher threshold for auto-extraction + ) + + return { + "extracted": len(extracted), + "references": extracted + } diff --git a/plugins/web4-governance/governance/entity_trust.py b/plugins/web4-governance/governance/entity_trust.py new file mode 100644 index 0000000000..8e2597a7d9 --- /dev/null +++ b/plugins/web4-governance/governance/entity_trust.py @@ -0,0 +1,723 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Entity Trust +# https://github.com/dp-web4/web4 +""" +Entity trust with witnessing. + +Any Web4 entity can accumulate trust through witnessing: +- MCP servers → tool calls witnessed by Claude +- Agent roles → task completions witnessed by session +- References → usage in successful tasks witnessed by agent +- Context blocks → helpfulness witnessed by outcomes + +Key concept: Trust flows through witnessing relationships. +When entity A witnesses entity B succeed, both accumulate trust: +- B gains reliability (it worked) +- A gains alignment (its judgment was validated) + +Entity Types: +- mcp:{server_name} - MCP server +- role:{agent_name} - Agent role +- ref:{reference_id} - Reference/context +- session:{session_id} - Session identity +""" + +import json +import hashlib +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional, Tuple +from dataclasses import dataclass, field, asdict + +from .tensors import ( + T3Tensor, V3Tensor, + TalentSubdims, TrainingSubdims, TemperamentSubdims, + ValuationSubdims, VeracitySubdims, ValiditySubdims, + migrate_legacy_t3, migrate_legacy_v3, +) + +# Storage location +ENTITIES_DIR = Path.home() / ".web4" / "governance" / "entities" + + +@dataclass +class EntityTrust: + """ + Trust tensors for any Web4 entity. + + Uses fractal T3/V3 tensor structure per Web4 spec: + + T3 Trust Tensor (base 3D, each with 2 subdimensions): + - Talent → (competence, alignment) + - Training → (lineage, witnesses) + - Temperament → (reliability, consistency) + + V3 Value Tensor (base 3D, each with 2 subdimensions): + - Valuation → (reputation, contribution) + - Veracity → (stewardship, energy) + - Validity → (network, temporal) + + Trust is ROLE-CONTEXTUAL: an entity's T3/V3 exists only within + a role context. Full implementation binds to RDF/LCT entities. + """ + entity_id: str # Format: type:name (e.g., mcp:filesystem, role:code-reviewer) + entity_type: str = "" # Parsed from entity_id + entity_name: str = "" # Parsed from entity_id + + # Fractal T3 Trust Tensor + t3: T3Tensor = field(default_factory=T3Tensor) + + # Fractal V3 Value Tensor + v3: V3Tensor = field(default_factory=V3Tensor) + + # Witnessing relationships + witnessed_by: List[str] = field(default_factory=list) # Entities that witnessed this + has_witnessed: List[str] = field(default_factory=list) # Entities this has witnessed + + # Metadata + action_count: int = 0 + success_count: int = 0 + witness_count: int = 0 # Times witnessed by others + last_action: Optional[str] = None + created_at: Optional[str] = None + + def __post_init__(self): + # Parse entity_id into type and name + if ":" in self.entity_id and not self.entity_type: + parts = self.entity_id.split(":", 1) + self.entity_type = parts[0] + self.entity_name = parts[1] if len(parts) > 1 else "" + + # ========================================================================= + # Backward-compatible property accessors for subdimensions + # ========================================================================= + + @property + def competence(self) -> float: + return self.t3.competence + + @competence.setter + def competence(self, value: float): + self.t3.talent_sub.competence = value + + @property + def alignment(self) -> float: + return self.t3.alignment + + @alignment.setter + def alignment(self, value: float): + self.t3.talent_sub.alignment = value + + @property + def lineage(self) -> float: + return self.t3.lineage + + @lineage.setter + def lineage(self, value: float): + self.t3.training_sub.lineage = value + + @property + def witnesses(self) -> float: + return self.t3.witnesses + + @witnesses.setter + def witnesses(self, value: float): + self.t3.training_sub.witnesses = value + + @property + def reliability(self) -> float: + return self.t3.reliability + + @reliability.setter + def reliability(self, value: float): + self.t3.temperament_sub.reliability = value + + @property + def consistency(self) -> float: + return self.t3.consistency + + @consistency.setter + def consistency(self, value: float): + self.t3.temperament_sub.consistency = value + + # V3 subdimension accessors + @property + def reputation(self) -> float: + return self.v3.reputation + + @reputation.setter + def reputation(self, value: float): + self.v3.valuation_sub.reputation = value + + @property + def contribution(self) -> float: + return self.v3.contribution + + @contribution.setter + def contribution(self, value: float): + self.v3.valuation_sub.contribution = value + + @property + def stewardship(self) -> float: + return self.v3.stewardship + + @stewardship.setter + def stewardship(self, value: float): + self.v3.veracity_sub.stewardship = value + + @property + def energy(self) -> float: + return self.v3.energy + + @energy.setter + def energy(self, value: float): + self.v3.veracity_sub.energy = value + + @property + def network(self) -> float: + return self.v3.network + + @network.setter + def network(self, value: float): + self.v3.validity_sub.network = value + + @property + def temporal(self) -> float: + return self.v3.temporal + + @temporal.setter + def temporal(self, value: float): + self.v3.validity_sub.temporal = value + + # ========================================================================= + # Tensor aggregate methods + # ========================================================================= + + def t3_composite(self) -> float: + """ + Weighted composite T3 trust score per Web4 spec. + + Formula: talent * 0.3 + training * 0.4 + temperament * 0.3 + """ + return self.t3.composite() + + def t3_average(self) -> float: + """ + @deprecated Use t3_composite() for spec-compliant scoring. + Average of all 6 subdimensions for backward compatibility. + """ + return (self.competence + self.reliability + self.consistency + + self.witnesses + self.lineage + self.alignment) / 6 + + def v3_composite(self) -> float: + """Composite V3 value score.""" + return self.v3.composite() + + def v3_average(self) -> float: + """ + @deprecated Use v3_composite() for spec-compliant scoring. + Average of all 6 subdimensions for backward compatibility. + """ + return (self.energy + self.contribution + self.stewardship + + self.network + self.reputation + self.temporal) / 6 + + def update_from_outcome(self, success: bool, is_novel: bool = False): + """ + Update trust based on direct action outcome per Web4 spec. + + | Outcome | Talent Impact | Training Impact | Temperament Impact | + |-----------------|---------------|-----------------|-------------------| + | Novel Success | +0.02 to +0.05| +0.01 to +0.02 | +0.01 | + | Standard Success| 0 | +0.005 to +0.01 | +0.005 | + | Failure | -0.02 | -0.01 | -0.02 | + """ + self.action_count += 1 + if success: + self.success_count += 1 + + # Use the spec-compliant T3Tensor update + self.t3.update_from_outcome(success, is_novel) + + # Update V3 contribution based on outcome + clamp = lambda v: max(0.0, min(1.0, v)) + if success: + self.v3.valuation_sub.contribution = clamp(self.contribution + 0.01) + self.v3.veracity_sub.energy = clamp(self.energy + 0.005) + else: + self.v3.valuation_sub.contribution = clamp(self.contribution - 0.005) + + self.last_action = datetime.now(timezone.utc).isoformat() + + def receive_witness(self, witness_id: str, success: bool, magnitude: float = 0.05): + """ + Another entity witnessed this entity's action. + + Being witnessed builds: + - witnesses score (Training subdim - more observers = more validated) + - reputation (V3 Valuation subdim) - external perception + - network (V3 Validity subdim) - connection to other entities + """ + self.witness_count += 1 + + if witness_id not in self.witnessed_by: + self.witnessed_by.append(witness_id) + + clamp = lambda v: max(0.0, min(1.0, v)) + + # Witnessing has a smaller effect than direct outcomes + if success: + delta = magnitude * 0.03 * (1 - self.witnesses) + else: + delta = -magnitude * 0.05 * self.witnesses + + # Update Training.witnesses subdimension + self.t3.training_sub.witnesses = clamp(self.witnesses + delta) + # Update V3 Valuation.reputation + self.v3.valuation_sub.reputation = clamp(self.reputation + delta * 0.8) + # Update V3 Validity.network (grows with connections) + self.v3.validity_sub.network = clamp(self.network + 0.01) + + def give_witness(self, target_id: str, success: bool, magnitude: float = 0.02): + """ + This entity witnessed another entity's action. + + Being a witness builds: + - alignment (Talent subdim - if judgment was correct, entity is aligned) + - contribution (V3 Valuation subdim) - value added through validation + """ + if target_id not in self.has_witnessed: + self.has_witnessed.append(target_id) + + clamp = lambda v: max(0.0, min(1.0, v)) + + # Witnessing others builds own credibility slightly + if success: + delta = magnitude * 0.02 * (1 - self.alignment) + else: + # Witnessing failures doesn't hurt the witness + delta = magnitude * 0.01 * (1 - self.alignment) + + # Update Talent.alignment subdimension + self.t3.talent_sub.alignment = clamp(self.alignment + delta) + # Update V3 Valuation.contribution + self.v3.valuation_sub.contribution = clamp(self.contribution + 0.005) + + def trust_level(self) -> str: + """ + Categorical trust level based on T3 composite score. + + Uses weighted composite per Web4 spec, not simple average. + """ + return self.t3.level() + + def apply_decay(self, days_inactive: float, decay_rate: float = 0.01) -> bool: + """ + Apply trust decay based on inactivity. + + Trust decays slowly over time if not used. + Primarily affects Temperament (reliability, consistency) and temporal. + + Args: + days_inactive: Days since last action + decay_rate: Decay rate per day + + Returns: + True if decay was applied + """ + if days_inactive <= 0: + return False + + decay_factor = (1 - decay_rate) ** days_inactive + floor = 0.3 + + def decay_value(current: float) -> float: + return max(floor, floor + (current - floor) * decay_factor) + + old_reliability = self.reliability + + # Decay Temperament subdimensions (reliability and consistency) + self.t3.temperament_sub.reliability = decay_value(self.reliability) + self.t3.temperament_sub.consistency = decay_value(self.consistency * 0.98) + + # Decay V3 Validity.temporal + self.v3.validity_sub.temporal = decay_value(self.temporal) + + return abs(old_reliability - self.reliability) > 0.001 + + def apply_silence_penalty(self, severity: str = "overdue") -> bool: + """ + Apply trust penalty for unexpected silence (absence when expected). + + Silence is a signal: entities that go quiet when expected to be active + should see trust impact. This implements "the dog that didn't bark". + + Severity levels: + - "expected": Minor - entity should check in soon (no penalty yet) + - "overdue": Moderate - past grace period, warrants attention + - "missing": Significant - well past expected, may indicate problem + + Args: + severity: One of "expected", "overdue", "missing" + + Returns: + True if penalty was applied + """ + if severity == "expected": + # No penalty yet, just tracking + return False + + elif severity == "overdue": + # Moderate impact on reliability and consistency + penalty = 0.02 + self.reliability = max(0.1, self.reliability - penalty) + self.consistency = max(0.1, self.consistency - penalty * 0.5) + return True + + elif severity == "missing": + # Significant impact - entity may be unreliable + penalty = 0.05 + self.reliability = max(0.1, self.reliability - penalty) + self.consistency = max(0.1, self.consistency - penalty) + self.temporal = max(0.1, self.temporal - penalty * 0.5) + return True + + return False + + def days_since_last_action(self) -> float: + """Calculate days since last action.""" + if not self.last_action: + if self.created_at: + try: + created = datetime.fromisoformat( + self.created_at.replace("Z", "+00:00") + ) + return (datetime.now(timezone.utc) - created).days + except (ValueError, TypeError): + return 0 + return 0 + + try: + last = datetime.fromisoformat( + self.last_action.replace("Z", "+00:00") + ) + delta = datetime.now(timezone.utc) - last + return delta.total_seconds() / 86400 + except (ValueError, TypeError): + return 0 + + def to_dict(self) -> dict: + """ + Serialize to dict. + + Includes both fractal structure (t3/v3) and flattened subdimensions + for backward compatibility. + """ + return { + "entity_id": self.entity_id, + "entity_type": self.entity_type, + "entity_name": self.entity_name, + # Fractal T3 tensor + "t3": self.t3.to_dict(), + # Fractal V3 tensor (simplified) + "v3": { + "valuation": self.v3.valuation, + "veracity": self.v3.veracity, + "validity": self.v3.validity, + "reputation": self.reputation, + "contribution": self.contribution, + "stewardship": self.stewardship, + "energy": self.energy, + "network": self.network, + "temporal": self.temporal, + "composite": self.v3.composite(), + }, + # Legacy 6D flattened view (backward compatibility) + "competence": self.competence, + "reliability": self.reliability, + "consistency": self.consistency, + "witnesses": self.witnesses, + "lineage": self.lineage, + "alignment": self.alignment, + "energy": self.energy, + "contribution": self.contribution, + "stewardship": self.stewardship, + "network": self.network, + "reputation": self.reputation, + "temporal": self.temporal, + # Witnessing relationships + "witnessed_by": self.witnessed_by, + "has_witnessed": self.has_witnessed, + # Metadata + "action_count": self.action_count, + "success_count": self.success_count, + "witness_count": self.witness_count, + "last_action": self.last_action, + "created_at": self.created_at, + } + + @classmethod + def from_dict(cls, data: dict) -> 'EntityTrust': + """ + Deserialize from dict. + + Handles both new fractal format and legacy 6D flat format. + """ + # Handle list fields that might be missing + if 'witnessed_by' not in data: + data['witnessed_by'] = [] + if 'has_witnessed' not in data: + data['has_witnessed'] = [] + + entity = cls( + entity_id=data.get("entity_id", ""), + entity_type=data.get("entity_type", ""), + entity_name=data.get("entity_name", ""), + witnessed_by=data.get("witnessed_by", []), + has_witnessed=data.get("has_witnessed", []), + action_count=data.get("action_count", 0), + success_count=data.get("success_count", 0), + witness_count=data.get("witness_count", 0), + last_action=data.get("last_action"), + created_at=data.get("created_at"), + ) + + # Check if data has new fractal t3 structure + if "t3" in data and isinstance(data["t3"], dict): + entity.t3 = T3Tensor.from_dict(data["t3"]) + else: + # Migrate from legacy 6D flat format + entity.t3 = migrate_legacy_t3({ + "competence": data.get("competence", 0.5), + "reliability": data.get("reliability", 0.5), + "consistency": data.get("consistency", 0.5), + "witnesses": data.get("witnesses", 0.5), + "lineage": data.get("lineage", 0.5), + "alignment": data.get("alignment", 0.5), + }) + + # Check if data has new fractal v3 structure + if "v3" in data and isinstance(data["v3"], dict): + entity.v3 = migrate_legacy_v3({ + "reputation": data["v3"].get("reputation", 0.5), + "contribution": data["v3"].get("contribution", 0.5), + "stewardship": data["v3"].get("stewardship", 0.5), + "energy": data["v3"].get("energy", 0.5), + "network": data["v3"].get("network", 0.5), + "temporal": data["v3"].get("temporal", 0.5), + }) + else: + # Migrate from legacy 6D flat format + entity.v3 = migrate_legacy_v3({ + "reputation": data.get("reputation", 0.5), + "contribution": data.get("contribution", 0.5), + "stewardship": data.get("stewardship", 0.5), + "energy": data.get("energy", 0.5), + "network": data.get("network", 0.5), + "temporal": data.get("temporal", 0.5), + }) + + return entity + + +class EntityTrustStore: + """ + Persistent storage for entity trust with witnessing. + + Supports any entity type: MCP servers, agent roles, references, etc. + """ + + def __init__(self): + ENTITIES_DIR.mkdir(parents=True, exist_ok=True) + + def _entity_file(self, entity_id: str) -> Path: + """Get file path for entity trust data.""" + safe_name = hashlib.sha256(entity_id.encode()).hexdigest()[:16] + return ENTITIES_DIR / f"{safe_name}.json" + + def get(self, entity_id: str) -> EntityTrust: + """Get trust for entity, creating with defaults if new.""" + entity_file = self._entity_file(entity_id) + + if entity_file.exists(): + with open(entity_file) as f: + data = json.load(f) + return EntityTrust.from_dict(data) + + # New entity with default trust + trust = EntityTrust( + entity_id=entity_id, + created_at=datetime.now(timezone.utc).isoformat() + ) + self.save(trust) + return trust + + def save(self, trust: EntityTrust): + """Save entity trust to disk.""" + entity_file = self._entity_file(trust.entity_id) + with open(entity_file, "w") as f: + json.dump(trust.to_dict(), f, indent=2) + + def update(self, entity_id: str, success: bool, magnitude: float = 0.1) -> EntityTrust: + """Update entity trust based on action outcome.""" + trust = self.get(entity_id) + trust.update_from_outcome(success, magnitude) + self.save(trust) + return trust + + def witness(self, witness_id: str, target_id: str, success: bool, + magnitude: float = 0.1) -> Tuple[EntityTrust, EntityTrust]: + """ + Record a witnessing event. + + witness_id observes target_id succeed or fail. + Both entities' trust is updated. + + Returns: (witness_trust, target_trust) + """ + # Update target (being witnessed) + target = self.get(target_id) + target.receive_witness(witness_id, success, magnitude) + self.save(target) + + # Update witness (doing the witnessing) + witness = self.get(witness_id) + witness.give_witness(target_id, success, magnitude) + self.save(witness) + + return witness, target + + def list_entities(self, entity_type: Optional[str] = None) -> List[str]: + """List all known entity IDs, optionally filtered by type.""" + entities = [] + for f in ENTITIES_DIR.glob("*.json"): + try: + with open(f) as file: + data = json.load(file) + eid = data.get("entity_id", "") + if entity_type is None or data.get("entity_type") == entity_type: + entities.append(eid) + except Exception: + pass + return entities + + def get_by_type(self, entity_type: str) -> Dict[str, EntityTrust]: + """Get all entities of a specific type.""" + return { + eid: self.get(eid) + for eid in self.list_entities(entity_type) + } + + def get_mcp_servers(self) -> Dict[str, EntityTrust]: + """Get all MCP server entities.""" + return self.get_by_type("mcp") + + def get_witnessing_chain(self, entity_id: str, depth: int = 3) -> dict: + """ + Get the witnessing chain for an entity. + + Shows who has witnessed this entity and who it has witnessed. + """ + entity = self.get(entity_id) + + chain = { + "entity_id": entity_id, + "t3_average": entity.t3_average(), + "trust_level": entity.trust_level(), + "witnessed_by": [], + "has_witnessed": [] + } + + if depth > 0: + for witness_id in entity.witnessed_by[:10]: # Limit for performance + witness = self.get(witness_id) + chain["witnessed_by"].append({ + "entity_id": witness_id, + "t3_average": witness.t3_average(), + "trust_level": witness.trust_level() + }) + + for target_id in entity.has_witnessed[:10]: + target = self.get(target_id) + chain["has_witnessed"].append({ + "entity_id": target_id, + "t3_average": target.t3_average(), + "trust_level": target.trust_level() + }) + + return chain + + def apply_decay_all(self, decay_rate: float = 0.01) -> Dict[str, dict]: + """ + Apply trust decay to all entities based on inactivity. + + Should be called periodically (e.g., at session start) to + ensure trust reflects recency. + + Args: + decay_rate: Decay rate per day (default 1% per day) + + Returns: + Dict of {entity_id: {decayed, days_inactive, t3_before, t3_after}} + """ + results = {} + + for entity_id in self.list_entities(): + trust = self.get(entity_id) + days_inactive = trust.days_since_last_action() + + if days_inactive > 1: # Only decay if > 1 day inactive + t3_before = trust.t3_average() + decayed = trust.apply_decay(days_inactive, decay_rate) + + if decayed: + self.save(trust) + results[entity_id] = { + "decayed": True, + "days_inactive": round(days_inactive, 1), + "t3_before": round(t3_before, 3), + "t3_after": round(trust.t3_average(), 3), + "entity_type": trust.entity_type + } + + return results + + def get_with_decay(self, entity_id: str, decay_rate: float = 0.01) -> EntityTrust: + """ + Get trust for entity, applying decay if needed. + + Convenience method that applies decay before returning trust. + """ + trust = self.get(entity_id) + days_inactive = trust.days_since_last_action() + + if days_inactive > 1: + if trust.apply_decay(days_inactive, decay_rate): + self.save(trust) + + return trust + + +# Convenience functions for common entity types +def get_mcp_trust(server_name: str) -> EntityTrust: + """Get trust for an MCP server.""" + store = EntityTrustStore() + return store.get(f"mcp:{server_name}") + + +def update_mcp_trust(server_name: str, success: bool, witness_id: str = "session:current") -> EntityTrust: + """ + Update MCP server trust after a tool call. + + The session witnesses the MCP's action. + """ + store = EntityTrustStore() + + # Direct outcome update + mcp_trust = store.update(f"mcp:{server_name}", success) + + # Session witnesses the MCP + store.witness(witness_id, f"mcp:{server_name}", success, magnitude=0.05) + + return mcp_trust diff --git a/plugins/web4-governance/governance/event_stream.py b/plugins/web4-governance/governance/event_stream.py new file mode 100644 index 0000000000..62dcb3d9d5 --- /dev/null +++ b/plugins/web4-governance/governance/event_stream.py @@ -0,0 +1,510 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Event Stream +# https://github.com/dp-web4/web4 +""" +Event Stream for Real-Time Monitoring. + +Provides a JSONL-based event stream that external clients can consume +for real-time monitoring, alerting, and analytics. + +Stream Location: ~/.web4/events.jsonl (configurable) + +Event Format: + Each line is a self-contained JSON object with consistent schema. + See EVENT_SCHEMA below for field definitions. + +Usage: + from governance.event_stream import EventStream, EventType + + # Initialize stream + stream = EventStream("~/.web4") + + # Emit events (typically called by hooks) + stream.emit( + event_type=EventType.TOOL_CALL, + session_id="sess-123", + tool="Bash", + target="rm -rf /tmp/test", + decision="deny", + reason="Destructive command blocked" + ) + + # External clients can tail the stream file: + # tail -f ~/.web4/events.jsonl | jq . + +Consuming the Stream: + Python: + import json + with open("~/.web4/events.jsonl", "r") as f: + for line in f: + event = json.loads(line) + print(event) + + Shell (real-time): + tail -f ~/.web4/events.jsonl | jq -c 'select(.severity == "alert")' + + Shell (filter by type): + grep '"type":"policy_decision"' ~/.web4/events.jsonl | jq . +""" + +import json +import os +import threading +from dataclasses import dataclass, field, asdict +from datetime import datetime, timezone +from enum import Enum +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + + +class EventType(str, Enum): + """Event types emitted by the governance system.""" + + # Session lifecycle + SESSION_START = "session_start" + SESSION_END = "session_end" + + # Tool execution + TOOL_CALL = "tool_call" + TOOL_RESULT = "tool_result" + + # Policy decisions + POLICY_DECISION = "policy_decision" + POLICY_VIOLATION = "policy_violation" + + # Rate limiting + RATE_LIMIT_CHECK = "rate_limit_check" + RATE_LIMIT_EXCEEDED = "rate_limit_exceeded" + + # Trust updates + TRUST_UPDATE = "trust_update" + + # Agent lifecycle + AGENT_SPAWN = "agent_spawn" + AGENT_COMPLETE = "agent_complete" + + # Audit + AUDIT_RECORD = "audit_record" + AUDIT_ALERT = "audit_alert" + + # System + SYSTEM_INFO = "system_info" + SYSTEM_ERROR = "system_error" + + +class Severity(str, Enum): + """Event severity levels.""" + DEBUG = "debug" # Verbose debugging info + INFO = "info" # Normal operations + WARN = "warn" # Potential issues, policy warnings + ALERT = "alert" # Policy violations, security events + ERROR = "error" # System errors + + +@dataclass +class Event: + """ + Standard event structure for the monitoring stream. + + All events follow this schema for consistent parsing by clients. + """ + # Required fields + type: EventType # Event type enum + timestamp: str # ISO 8601 UTC timestamp + severity: Severity # Severity level + + # Context fields (optional but recommended) + session_id: Optional[str] = None # Session identifier + agent_id: Optional[str] = None # Agent/role identifier + + # Event-specific payload + tool: Optional[str] = None # Tool name (for tool events) + target: Optional[str] = None # Target path/URL/command + category: Optional[str] = None # Tool category + decision: Optional[str] = None # Policy decision (allow/deny/warn) + reason: Optional[str] = None # Human-readable reason + rule_id: Optional[str] = None # Matched policy rule ID + + # Metrics (optional) + duration_ms: Optional[int] = None # Operation duration + count: Optional[int] = None # Count (for rate limits, etc.) + + # Trust (optional) + trust_before: Optional[float] = None + trust_after: Optional[float] = None + trust_delta: Optional[float] = None + + # Error details (optional) + error: Optional[str] = None # Error message + error_type: Optional[str] = None # Error class/type + + # Extensible metadata + metadata: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary, excluding None values.""" + result = {} + for key, value in asdict(self).items(): + if value is not None: + if isinstance(value, Enum): + result[key] = value.value + elif isinstance(value, dict) and not value: + continue # Skip empty dicts + else: + result[key] = value + return result + + def to_json(self) -> str: + """Convert to JSON string (single line).""" + return json.dumps(self.to_dict(), separators=(',', ':')) + + +# Type alias for event callbacks +EventCallback = Callable[[Event], None] + + +class EventStream: + """ + JSONL event stream for real-time monitoring. + + Writes events to a file that external clients can tail. + Supports optional in-process callbacks for direct integration. + """ + + DEFAULT_FILENAME = "events.jsonl" + MAX_FILE_SIZE_MB = 100 # Rotate at 100MB + + def __init__( + self, + storage_path: Optional[str] = None, + filename: str = DEFAULT_FILENAME, + min_severity: Severity = Severity.INFO, + ): + """ + Initialize the event stream. + + Args: + storage_path: Base directory for stream file (default: ~/.web4) + filename: Stream filename (default: events.jsonl) + min_severity: Minimum severity to emit (default: INFO) + """ + if storage_path: + self.storage_path = Path(os.path.expanduser(storage_path)) + else: + self.storage_path = Path.home() / ".web4" + + self.storage_path.mkdir(parents=True, exist_ok=True) + self.stream_file = self.storage_path / filename + self.min_severity = min_severity + + # Thread safety + self._lock = threading.Lock() + + # In-process callbacks (optional) + self._callbacks: List[EventCallback] = [] + + # Severity ordering for filtering + self._severity_order = { + Severity.DEBUG: 0, + Severity.INFO: 1, + Severity.WARN: 2, + Severity.ALERT: 3, + Severity.ERROR: 4, + } + + @property + def stream_path(self) -> str: + """Get the full path to the stream file.""" + return str(self.stream_file) + + def register_callback(self, callback: EventCallback) -> None: + """Register an in-process callback for events.""" + with self._lock: + self._callbacks.append(callback) + + def unregister_callback(self, callback: EventCallback) -> None: + """Unregister an in-process callback.""" + with self._lock: + if callback in self._callbacks: + self._callbacks.remove(callback) + + def _should_emit(self, severity: Severity) -> bool: + """Check if event meets minimum severity threshold.""" + return self._severity_order[severity] >= self._severity_order[self.min_severity] + + def _check_rotation(self) -> None: + """Check if file needs rotation (called under lock).""" + try: + if self.stream_file.exists(): + size_mb = self.stream_file.stat().st_size / (1024 * 1024) + if size_mb >= self.MAX_FILE_SIZE_MB: + # Rotate: rename current to .1, start fresh + rotated = self.stream_file.with_suffix(".jsonl.1") + if rotated.exists(): + rotated.unlink() + self.stream_file.rename(rotated) + except Exception: + pass # Best effort rotation + + def emit( + self, + event_type: EventType, + severity: Severity = Severity.INFO, + session_id: Optional[str] = None, + agent_id: Optional[str] = None, + tool: Optional[str] = None, + target: Optional[str] = None, + category: Optional[str] = None, + decision: Optional[str] = None, + reason: Optional[str] = None, + rule_id: Optional[str] = None, + duration_ms: Optional[int] = None, + count: Optional[int] = None, + trust_before: Optional[float] = None, + trust_after: Optional[float] = None, + error: Optional[str] = None, + error_type: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> Optional[Event]: + """ + Emit an event to the stream. + + Args: + event_type: Type of event + severity: Event severity (default: INFO) + session_id: Session identifier + agent_id: Agent/role identifier + tool: Tool name + target: Target path/URL/command + category: Tool category + decision: Policy decision + reason: Human-readable reason + rule_id: Matched policy rule ID + duration_ms: Operation duration in milliseconds + count: Generic count field + trust_before: Trust value before update + trust_after: Trust value after update + error: Error message + error_type: Error class/type + metadata: Additional key-value data + + Returns: + The emitted Event object, or None if filtered + """ + if not self._should_emit(severity): + return None + + # Calculate trust delta if both values provided + trust_delta = None + if trust_before is not None and trust_after is not None: + trust_delta = trust_after - trust_before + + event = Event( + type=event_type, + timestamp=datetime.now(timezone.utc).isoformat(), + severity=severity, + session_id=session_id, + agent_id=agent_id, + tool=tool, + target=target, + category=category, + decision=decision, + reason=reason, + rule_id=rule_id, + duration_ms=duration_ms, + count=count, + trust_before=trust_before, + trust_after=trust_after, + trust_delta=trust_delta, + error=error, + error_type=error_type, + metadata=metadata or {}, + ) + + self._write_event(event) + self._notify_callbacks(event) + + return event + + def emit_event(self, event: Event) -> None: + """Emit a pre-constructed Event object.""" + if not self._should_emit(event.severity): + return + self._write_event(event) + self._notify_callbacks(event) + + def _write_event(self, event: Event) -> None: + """Write event to the stream file.""" + with self._lock: + self._check_rotation() + try: + with open(self.stream_file, "a", encoding="utf-8") as f: + f.write(event.to_json() + "\n") + f.flush() + except Exception as e: + # Log to stderr if stream write fails + import sys + print(f"[web4-event-stream] Write error: {e}", file=sys.stderr) + + def _notify_callbacks(self, event: Event) -> None: + """Notify registered callbacks.""" + with self._lock: + callbacks = self._callbacks.copy() + + for callback in callbacks: + try: + callback(event) + except Exception: + pass # Don't let callback errors break the stream + + # Convenience methods for common event types + + def session_start( + self, + session_id: str, + metadata: Optional[Dict[str, Any]] = None, + ) -> Event: + """Emit session start event.""" + return self.emit( + EventType.SESSION_START, + Severity.INFO, + session_id=session_id, + metadata=metadata, + ) + + def session_end( + self, + session_id: str, + duration_ms: Optional[int] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> Event: + """Emit session end event.""" + return self.emit( + EventType.SESSION_END, + Severity.INFO, + session_id=session_id, + duration_ms=duration_ms, + metadata=metadata, + ) + + def policy_decision( + self, + session_id: str, + tool: str, + target: Optional[str], + decision: str, + reason: Optional[str] = None, + rule_id: Optional[str] = None, + category: Optional[str] = None, + ) -> Event: + """Emit policy decision event.""" + # Determine severity based on decision + if decision == "deny": + severity = Severity.ALERT + elif decision == "warn": + severity = Severity.WARN + else: + severity = Severity.INFO + + return self.emit( + EventType.POLICY_DECISION, + severity, + session_id=session_id, + tool=tool, + target=target, + decision=decision, + reason=reason, + rule_id=rule_id, + category=category, + ) + + def rate_limit_exceeded( + self, + session_id: str, + key: str, + count: int, + max_count: int, + ) -> Event: + """Emit rate limit exceeded event.""" + return self.emit( + EventType.RATE_LIMIT_EXCEEDED, + Severity.ALERT, + session_id=session_id, + target=key, + count=count, + metadata={"max_count": max_count}, + ) + + def trust_update( + self, + session_id: str, + agent_id: str, + trust_before: float, + trust_after: float, + reason: Optional[str] = None, + ) -> Event: + """Emit trust update event.""" + return self.emit( + EventType.TRUST_UPDATE, + Severity.INFO, + session_id=session_id, + agent_id=agent_id, + trust_before=trust_before, + trust_after=trust_after, + reason=reason, + ) + + def audit_alert( + self, + session_id: str, + tool: str, + target: Optional[str], + reason: str, + category: Optional[str] = None, + ) -> Event: + """Emit audit alert (credential access, memory write, etc.).""" + return self.emit( + EventType.AUDIT_ALERT, + Severity.ALERT, + session_id=session_id, + tool=tool, + target=target, + reason=reason, + category=category, + ) + + def system_error( + self, + error: str, + error_type: Optional[str] = None, + session_id: Optional[str] = None, + ) -> Event: + """Emit system error event.""" + return self.emit( + EventType.SYSTEM_ERROR, + Severity.ERROR, + session_id=session_id, + error=error, + error_type=error_type, + ) + + +# Module-level default stream instance +_default_stream: Optional[EventStream] = None +_default_lock = threading.Lock() + + +def get_default_stream() -> EventStream: + """Get or create the default event stream instance.""" + global _default_stream + with _default_lock: + if _default_stream is None: + _default_stream = EventStream() + return _default_stream + + +def emit(event_type: EventType, **kwargs) -> Optional[Event]: + """Emit an event using the default stream.""" + return get_default_stream().emit(event_type, **kwargs) diff --git a/plugins/web4-governance/governance/ledger.py b/plugins/web4-governance/governance/ledger.py new file mode 100644 index 0000000000..1af0b972b9 --- /dev/null +++ b/plugins/web4-governance/governance/ledger.py @@ -0,0 +1,737 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - SQLite Ledger +# https://github.com/dp-web4/web4 +""" +SQLite-based ledger for lightweight governance. + +Provides: +- Identity registration (soft LCT) +- Session tracking with sequential numbering +- Work product registration +- ATP (Allocation Transfer Packet) accounting +- Audit trail + +This is the local persistence layer. For distributed consensus, +see the full Web4 blockchain implementation. +""" + +import sqlite3 +import json +import hashlib +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, Dict, Any, List + + +class Ledger: + """SQLite-based ledger for session tracking and governance.""" + + def __init__(self, db_path: Optional[Path] = None): + """ + Initialize ledger with SQLite database. + + Args: + db_path: Path to SQLite database. Defaults to ~/.web4/ledger.db + """ + if db_path is None: + db_path = Path.home() / ".web4" / "ledger.db" + + self.db_path = Path(db_path) + self.db_path.parent.mkdir(parents=True, exist_ok=True) + + self._init_db() + + def _get_connection(self): + """Get a database connection with proper settings for concurrency.""" + conn = sqlite3.connect(self.db_path, timeout=30.0) + conn.execute("PRAGMA busy_timeout = 30000") # 30 second wait on locks + return conn + + def _init_db(self): + """Initialize database schema with concurrency support.""" + with sqlite3.connect(self.db_path) as conn: + # Enable WAL mode for better concurrent read/write access + # This allows multiple readers and one writer simultaneously + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA busy_timeout = 30000") + + conn.executescript(""" + -- Identities (soft LCT) + CREATE TABLE IF NOT EXISTS identities ( + lct_id TEXT PRIMARY KEY, + machine_hash TEXT NOT NULL, + user_hash TEXT NOT NULL, + binding TEXT DEFAULT 'software', + created_at TEXT NOT NULL, + metadata TEXT + ); + + -- Session sequence tracking per project + CREATE TABLE IF NOT EXISTS session_sequence ( + project TEXT PRIMARY KEY, + last_session_number INTEGER DEFAULT 0, + updated_at TEXT NOT NULL + ); + + -- Sessions + CREATE TABLE IF NOT EXISTS sessions ( + session_id TEXT PRIMARY KEY, + lct_id TEXT NOT NULL, + project TEXT, + session_number INTEGER, + started_at TEXT NOT NULL, + ended_at TEXT, + status TEXT DEFAULT 'active', + atp_budget INTEGER DEFAULT 100, + atp_consumed INTEGER DEFAULT 0, + metadata TEXT, + FOREIGN KEY (lct_id) REFERENCES identities(lct_id) + ); + + -- Work products (files, commits, etc.) + CREATE TABLE IF NOT EXISTS work_products ( + product_id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + product_type TEXT NOT NULL, + path TEXT, + content_hash TEXT, + created_at TEXT NOT NULL, + metadata TEXT, + FOREIGN KEY (session_id) REFERENCES sessions(session_id) + ); + + -- Audit trail (with witnessing chain) + CREATE TABLE IF NOT EXISTS audit_trail ( + audit_id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + sequence INTEGER NOT NULL, + action_type TEXT NOT NULL, + tool_name TEXT, + target TEXT, + input_hash TEXT, + output_hash TEXT, + status TEXT, + timestamp TEXT NOT NULL, + r6_data TEXT, + previous_hash TEXT, + record_hash TEXT NOT NULL, + FOREIGN KEY (session_id) REFERENCES sessions(session_id) + ); + + -- Heartbeats (timing coherence tracking) + CREATE TABLE IF NOT EXISTS heartbeats ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + sequence INTEGER NOT NULL, + timestamp TEXT NOT NULL, + status TEXT NOT NULL, + delta_seconds REAL, + tool_name TEXT, + action_index INTEGER, + previous_hash TEXT, + entry_hash TEXT NOT NULL, + FOREIGN KEY (session_id) REFERENCES sessions(session_id) + ); + + -- Indexes for common queries + CREATE INDEX IF NOT EXISTS idx_sessions_project ON sessions(project); + CREATE INDEX IF NOT EXISTS idx_sessions_lct ON sessions(lct_id); + CREATE INDEX IF NOT EXISTS idx_audit_session ON audit_trail(session_id); + CREATE INDEX IF NOT EXISTS idx_work_session ON work_products(session_id); + CREATE INDEX IF NOT EXISTS idx_heartbeat_session ON heartbeats(session_id); + """) + + # --- Identity Management --- + + def register_identity(self, lct_id: str, machine_hash: str, user_hash: str, + binding: str = "software", metadata: Optional[Dict] = None) -> bool: + """Register a new identity (soft LCT).""" + now = datetime.now(timezone.utc).isoformat() + "Z" + + with sqlite3.connect(self.db_path) as conn: + try: + conn.execute(""" + INSERT INTO identities (lct_id, machine_hash, user_hash, binding, created_at, metadata) + VALUES (?, ?, ?, ?, ?, ?) + """, (lct_id, machine_hash, user_hash, binding, now, + json.dumps(metadata) if metadata else None)) + return True + except sqlite3.IntegrityError: + # Already exists + return False + + def get_identity(self, lct_id: str) -> Optional[Dict]: + """Get identity by LCT ID.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + row = conn.execute( + "SELECT * FROM identities WHERE lct_id = ?", (lct_id,) + ).fetchone() + return dict(row) if row else None + + # --- Session Sequence Management --- + + def get_next_session_number(self, project: str) -> int: + """ + Get next session number for a project (atomic increment). + + This is the authoritative source for session numbering. + """ + now = datetime.now(timezone.utc).isoformat() + "Z" + + with sqlite3.connect(self.db_path) as conn: + # Try to increment existing + cursor = conn.execute(""" + UPDATE session_sequence + SET last_session_number = last_session_number + 1, updated_at = ? + WHERE project = ? + """, (now, project)) + + if cursor.rowcount == 0: + # First session for this project + conn.execute(""" + INSERT INTO session_sequence (project, last_session_number, updated_at) + VALUES (?, 1, ?) + """, (project, now)) + return 1 + + # Get the new value + row = conn.execute( + "SELECT last_session_number FROM session_sequence WHERE project = ?", + (project,) + ).fetchone() + return row[0] + + def sync_session_number(self, project: str, known_max: int) -> int: + """ + Sync session number with filesystem reality. + + If ledger is behind filesystem (e.g., after recovery), update it. + Returns the current max session number. + """ + now = datetime.now(timezone.utc).isoformat() + "Z" + + with sqlite3.connect(self.db_path) as conn: + row = conn.execute( + "SELECT last_session_number FROM session_sequence WHERE project = ?", + (project,) + ).fetchone() + + current = row[0] if row else 0 + + if known_max > current: + if row: + conn.execute(""" + UPDATE session_sequence + SET last_session_number = ?, updated_at = ? + WHERE project = ? + """, (known_max, now, project)) + else: + conn.execute(""" + INSERT INTO session_sequence (project, last_session_number, updated_at) + VALUES (?, ?, ?) + """, (project, known_max, now)) + return known_max + + return current + + # --- Session Management --- + + def start_session(self, session_id: str, lct_id: str, project: Optional[str] = None, + session_number: Optional[int] = None, atp_budget: int = 100, + metadata: Optional[Dict] = None) -> Dict: + """Start a new session.""" + now = datetime.now(timezone.utc).isoformat() + "Z" + + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + INSERT INTO sessions + (session_id, lct_id, project, session_number, started_at, atp_budget, metadata) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, (session_id, lct_id, project, session_number, now, atp_budget, + json.dumps(metadata) if metadata else None)) + + return { + "session_id": session_id, + "lct_id": lct_id, + "project": project, + "session_number": session_number, + "started_at": now, + "atp_budget": atp_budget, + "atp_remaining": atp_budget + } + + def end_session(self, session_id: str, status: str = "completed") -> bool: + """End a session.""" + now = datetime.now(timezone.utc).isoformat() + "Z" + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute(""" + UPDATE sessions SET ended_at = ?, status = ? + WHERE session_id = ? + """, (now, status, session_id)) + return cursor.rowcount > 0 + + def get_session(self, session_id: str) -> Optional[Dict]: + """Get session by ID.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + row = conn.execute( + "SELECT * FROM sessions WHERE session_id = ?", (session_id,) + ).fetchone() + if row: + session = dict(row) + session["atp_remaining"] = session["atp_budget"] - session["atp_consumed"] + return session + return None + + # --- ATP Accounting --- + + def consume_atp(self, session_id: str, amount: int = 1) -> int: + """ + Consume ATP for an action. Returns remaining ATP. + + ATP = Allocation Transfer Packet (action budget) + """ + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + UPDATE sessions SET atp_consumed = atp_consumed + ? + WHERE session_id = ? + """, (amount, session_id)) + + row = conn.execute( + "SELECT atp_budget - atp_consumed FROM sessions WHERE session_id = ?", + (session_id,) + ).fetchone() + return row[0] if row else 0 + + # --- Work Products --- + + def register_work_product(self, session_id: str, product_type: str, + path: Optional[str] = None, content: Optional[str] = None, + metadata: Optional[Dict] = None) -> str: + """Register a work product (file, commit, etc.).""" + now = datetime.now(timezone.utc).isoformat() + "Z" + content_hash = hashlib.sha256(content.encode()).hexdigest()[:16] if content else None + product_id = f"wp:{hashlib.sha256(f'{session_id}:{now}'.encode()).hexdigest()[:12]}" + + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + INSERT INTO work_products + (product_id, session_id, product_type, path, content_hash, created_at, metadata) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, (product_id, session_id, product_type, path, content_hash, now, + json.dumps(metadata) if metadata else None)) + + return product_id + + def get_session_work_products(self, session_id: str) -> List[Dict]: + """Get all work products for a session.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT * FROM work_products WHERE session_id = ? ORDER BY created_at", + (session_id,) + ).fetchall() + return [dict(row) for row in rows] + + # --- Audit Trail (with witnessing chain) --- + + def record_audit(self, session_id: str, action_type: str, tool_name: Optional[str] = None, + target: Optional[str] = None, input_hash: Optional[str] = None, + output_hash: Optional[str] = None, status: str = "success", + r6_data: Optional[Dict] = None) -> Dict: + """ + Record an audit entry with hash-linked witnessing chain. + + Each record witnesses the previous, creating an unforgeable sequence. + Breaking the chain would require recomputing all subsequent hashes. + + Returns: + Dict with audit_id, sequence, and record_hash + """ + now = datetime.now(timezone.utc).isoformat() + "Z" + + with sqlite3.connect(self.db_path, timeout=30.0) as conn: + conn.execute("PRAGMA busy_timeout = 30000") + + # Get previous record for chain linking + prev = conn.execute(""" + SELECT sequence, record_hash FROM audit_trail + WHERE session_id = ? + ORDER BY sequence DESC LIMIT 1 + """, (session_id,)).fetchone() + + if prev: + sequence = prev[0] + 1 + previous_hash = prev[1] + else: + sequence = 1 + previous_hash = "" # Genesis record + + # Compute record hash (includes previous_hash for chain) + hash_input = f"{session_id}:{sequence}:{now}:{tool_name}:{target}:{input_hash}:{output_hash}:{previous_hash}" + record_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:32] + + audit_id = f"audit:{record_hash[:12]}" + + conn.execute(""" + INSERT INTO audit_trail + (audit_id, session_id, sequence, action_type, tool_name, target, + input_hash, output_hash, status, timestamp, r6_data, previous_hash, record_hash) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, (audit_id, session_id, sequence, action_type, tool_name, target, + input_hash, output_hash, status, now, + json.dumps(r6_data) if r6_data else None, + previous_hash, record_hash)) + + return { + "audit_id": audit_id, + "sequence": sequence, + "record_hash": record_hash, + "previous_hash": previous_hash + } + + def get_last_audit_record(self, session_id: str) -> Optional[Dict]: + """Get the most recent audit record for chain verification.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + row = conn.execute(""" + SELECT * FROM audit_trail + WHERE session_id = ? + ORDER BY sequence DESC LIMIT 1 + """, (session_id,)).fetchone() + return dict(row) if row else None + + def verify_audit_chain(self, session_id: str) -> tuple: + """ + Verify the audit trail hash chain integrity. + + Returns: + (is_valid: bool, error_message: Optional[str]) + """ + records = self.get_session_audit_trail(session_id) + + if not records: + return (True, None) + + for i, record in enumerate(records): + if i == 0: + # Genesis record should have empty previous_hash + if record.get('previous_hash'): + return (False, f"Genesis record has non-empty previous_hash") + continue + + prev = records[i - 1] + + # Check sequence continuity + if record['sequence'] != prev['sequence'] + 1: + return (False, f"Sequence gap at {i}: {prev['sequence']} -> {record['sequence']}") + + # Check hash chain + if record['previous_hash'] != prev['record_hash']: + return (False, f"Hash chain broken at sequence {record['sequence']}") + + return (True, None) + + def get_session_audit_trail(self, session_id: str) -> List[Dict]: + """Get audit trail for a session, ordered by sequence (witnessing order).""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT * FROM audit_trail WHERE session_id = ? ORDER BY sequence ASC", + (session_id,) + ).fetchall() + return [dict(row) for row in rows] + + # --- Summary --- + + def get_session_summary(self, session_id: str) -> Optional[Dict]: + """Get summary of a session.""" + session = self.get_session(session_id) + if not session: + return None + + with sqlite3.connect(self.db_path) as conn: + audit_count = conn.execute( + "SELECT COUNT(*) FROM audit_trail WHERE session_id = ?", (session_id,) + ).fetchone()[0] + + work_count = conn.execute( + "SELECT COUNT(*) FROM work_products WHERE session_id = ?", (session_id,) + ).fetchone()[0] + + return { + "session_id": session_id, + "session_number": session["session_number"], + "project": session["project"], + "status": session["status"], + "started_at": session["started_at"], + "ended_at": session["ended_at"], + "action_count": audit_count, + "atp_consumed": session["atp_consumed"], + "atp_remaining": session["atp_remaining"], + "work_products": work_count, + "audit_records": audit_count + } + + # --- Heartbeat Tracking --- + + def record_heartbeat(self, session_id: str, sequence: int, timestamp: str, + status: str, delta_seconds: float, tool_name: str, + action_index: int, previous_hash: str, entry_hash: str) -> int: + """ + Record a heartbeat entry. + + Args: + session_id: Session this heartbeat belongs to + sequence: Sequential heartbeat number + timestamp: ISO timestamp + status: Timing status (initial, on_time, early, late, gap) + delta_seconds: Seconds since last heartbeat + tool_name: Tool that triggered this heartbeat + action_index: Action index in session + previous_hash: Hash of previous entry (for chain) + entry_hash: Hash of this entry + + Returns: + Row ID of inserted heartbeat + """ + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute(""" + INSERT INTO heartbeats + (session_id, sequence, timestamp, status, delta_seconds, + tool_name, action_index, previous_hash, entry_hash) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, (session_id, sequence, timestamp, status, delta_seconds, + tool_name, action_index, previous_hash, entry_hash)) + return cursor.lastrowid + + def get_last_heartbeat(self, session_id: str) -> Optional[Dict]: + """Get the most recent heartbeat for a session.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + row = conn.execute(""" + SELECT * FROM heartbeats + WHERE session_id = ? + ORDER BY sequence DESC LIMIT 1 + """, (session_id,)).fetchone() + return dict(row) if row else None + + def get_heartbeats(self, session_id: str, limit: Optional[int] = None) -> List[Dict]: + """Get heartbeats for a session.""" + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + if limit: + rows = conn.execute(""" + SELECT * FROM heartbeats + WHERE session_id = ? + ORDER BY sequence DESC LIMIT ? + """, (session_id, limit)).fetchall() + # Reverse to get chronological order + return [dict(row) for row in reversed(rows)] + else: + rows = conn.execute(""" + SELECT * FROM heartbeats + WHERE session_id = ? + ORDER BY sequence ASC + """, (session_id,)).fetchall() + return [dict(row) for row in rows] + + def get_heartbeat_count(self, session_id: str) -> int: + """Get total heartbeat count for a session.""" + with sqlite3.connect(self.db_path) as conn: + row = conn.execute( + "SELECT COUNT(*) FROM heartbeats WHERE session_id = ?", + (session_id,) + ).fetchone() + return row[0] if row else 0 + + def get_heartbeat_status_distribution(self, session_id: str) -> Dict[str, int]: + """Get distribution of heartbeat statuses for a session.""" + with sqlite3.connect(self.db_path) as conn: + rows = conn.execute(""" + SELECT status, COUNT(*) as count + FROM heartbeats + WHERE session_id = ? + GROUP BY status + """, (session_id,)).fetchall() + return {row[0]: row[1] for row in rows} + + # --- Audit Query & Filtering --- + + def query_audit( + self, + session_id: Optional[str] = None, + tool: Optional[str] = None, + category: Optional[str] = None, + status: Optional[str] = None, + target_pattern: Optional[str] = None, + since: Optional[str] = None, + limit: int = 50, + ) -> List[Dict]: + """ + Query and filter audit records. + + Args: + session_id: Filter by session (optional, queries all if not set) + tool: Filter by tool name + category: Filter by category (from r6_data) + status: Filter by status (success, error, blocked) + target_pattern: Glob pattern for target filtering + since: ISO date or relative duration (1h, 30m, 2d) + limit: Max results (default 50) + + Returns: + List of matching audit records + """ + conditions = [] + params: List[Any] = [] + + if session_id: + conditions.append("session_id = ?") + params.append(session_id) + + if tool: + conditions.append("tool_name = ?") + params.append(tool) + + if status: + conditions.append("status = ?") + params.append(status) + + if since: + since_dt = self._parse_since(since) + if since_dt: + conditions.append("timestamp >= ?") + params.append(since_dt.isoformat() + "Z") + + where_clause = " AND ".join(conditions) if conditions else "1=1" + + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + rows = conn.execute(f""" + SELECT * FROM audit_trail + WHERE {where_clause} + ORDER BY timestamp DESC + LIMIT ? + """, (*params, limit * 10)).fetchall() # Fetch extra for post-filtering + + results = [dict(row) for row in rows] + + # Post-filter by category (from r6_data) + if category: + filtered = [] + for r in results: + if r.get("r6_data"): + try: + r6 = json.loads(r["r6_data"]) + if r6.get("request", {}).get("category") == category: + filtered.append(r) + except (json.JSONDecodeError, TypeError): + pass + results = filtered + + # Post-filter by target pattern (glob) + if target_pattern: + import fnmatch + filtered = [] + for r in results: + target = r.get("target") + if target and fnmatch.fnmatch(target, target_pattern): + filtered.append(r) + results = filtered + + return results[:limit] + + def _parse_since(self, since: str) -> Optional[datetime]: + """ + Parse a 'since' value: ISO date string or relative duration. + + Args: + since: ISO date or relative (e.g., "1h", "30m", "2d") + + Returns: + datetime or None if unparseable + """ + import re + + # Try relative duration first + match = re.match(r"^(\d+)\s*(s|m|h|d)$", since) + if match: + amount = int(match.group(1)) + unit = match.group(2) + multipliers = {"s": 1, "m": 60, "h": 3600, "d": 86400} + seconds = amount * multipliers[unit] + return datetime.now(timezone.utc) - __import__("datetime").timedelta(seconds=seconds) + + # Try ISO date + try: + # Handle both with and without Z suffix + if since.endswith("Z"): + since = since[:-1] + "+00:00" + return datetime.fromisoformat(since) + except ValueError: + return None + + def get_audit_stats(self, session_id: Optional[str] = None) -> Dict[str, Any]: + """ + Get aggregated audit statistics. + + Args: + session_id: Optional session to scope stats to + + Returns: + Dict with tool_counts, category_counts, status_counts, total + """ + condition = "WHERE session_id = ?" if session_id else "" + params = (session_id,) if session_id else () + + with sqlite3.connect(self.db_path) as conn: + # Tool counts + tool_rows = conn.execute(f""" + SELECT tool_name, COUNT(*) as count + FROM audit_trail {condition} + GROUP BY tool_name + ORDER BY count DESC + """, params).fetchall() + tool_counts = {row[0]: row[1] for row in tool_rows if row[0]} + + # Status counts + status_rows = conn.execute(f""" + SELECT status, COUNT(*) as count + FROM audit_trail {condition} + GROUP BY status + """, params).fetchall() + status_counts = {row[0]: row[1] for row in status_rows if row[0]} + + # Total + total_row = conn.execute(f""" + SELECT COUNT(*) FROM audit_trail {condition} + """, params).fetchone() + total = total_row[0] if total_row else 0 + + # Category counts require parsing r6_data + category_counts: Dict[str, int] = {} + with sqlite3.connect(self.db_path) as conn: + rows = conn.execute(f""" + SELECT r6_data FROM audit_trail {condition} + """, params).fetchall() + for row in rows: + if row[0]: + try: + r6 = json.loads(row[0]) + cat = r6.get("request", {}).get("category") + if cat: + category_counts[cat] = category_counts.get(cat, 0) + 1 + except (json.JSONDecodeError, TypeError): + pass + + return { + "total": total, + "tool_counts": tool_counts, + "status_counts": status_counts, + "category_counts": category_counts, + } diff --git a/plugins/web4-governance/governance/matchers.py b/plugins/web4-governance/governance/matchers.py new file mode 100644 index 0000000000..52d31cd9fc --- /dev/null +++ b/plugins/web4-governance/governance/matchers.py @@ -0,0 +1,193 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Pattern Matchers +# https://github.com/dp-web4/web4 +""" +Pattern Matchers for Policy Rules. + +Provides glob/regex matching for targets and temporal matching for time windows. + +Usage: + from governance.matchers import matches_time_window, matches_target, glob_to_regex + + # Check if current time is within business hours + window = TimeWindow(allowed_hours=(9, 17), allowed_days=[1,2,3,4,5]) + if matches_time_window(window): + print("Within business hours") + + # Check if target matches patterns + if matches_target("/path/.env", ["**/.env*"], use_regex=False): + print("Matches credential pattern") +""" + +import re +import fnmatch +from datetime import datetime +from typing import List, Optional +from zoneinfo import ZoneInfo + +from .presets import TimeWindow + + +def matches_time_window(time_window: TimeWindow, now: Optional[datetime] = None) -> bool: + """ + Check if the current time falls within a time window. + + Args: + time_window: TimeWindow with allowed_hours, allowed_days, timezone + now: Current time (defaults to now) + + Returns: + True if within the allowed window, False otherwise + """ + if now is None: + now = datetime.now() + + # Convert to specified timezone if provided + if time_window.timezone: + try: + tz = ZoneInfo(time_window.timezone) + now = now.astimezone(tz) + except Exception: + # Invalid timezone, use local time + pass + + hours = now.hour + day_of_week = now.weekday() # Monday=0, Sunday=6 + # Convert to JS-style: Sunday=0, Monday=1, ... Saturday=6 + day_of_week = (day_of_week + 1) % 7 + + # Check allowed hours + if time_window.allowed_hours: + start_hour, end_hour = time_window.allowed_hours + # Handle overnight windows (e.g., [22, 6] for 10pm-6am) + if start_hour <= end_hour: + if hours < start_hour or hours >= end_hour: + return False + else: + # Overnight: valid if >= start OR < end + if hours < start_hour and hours >= end_hour: + return False + + # Check allowed days + if time_window.allowed_days: + if day_of_week not in time_window.allowed_days: + return False + + return True + + +def glob_to_regex(pattern: str) -> re.Pattern: + """ + Convert a glob pattern to a regex. + + Supports: + - * (any chars except /) + - ** (any chars including /) + - ? (single char) + + Args: + pattern: Glob pattern string + + Returns: + Compiled regex pattern + """ + result = "" + i = 0 + while i < len(pattern): + ch = pattern[i] + if ch == "*": + if i + 1 < len(pattern) and pattern[i + 1] == "*": + result += ".*" + i += 2 + # Skip trailing slash after ** + if i < len(pattern) and pattern[i] == "/": + i += 1 + else: + result += "[^/]*" + i += 1 + elif ch == "?": + result += "[^/]" + i += 1 + elif ch in ".+^${}()|[]\\": + result += "\\" + ch + i += 1 + else: + result += ch + i += 1 + + return re.compile("^" + result + "$") + + +def matches_target( + target: Optional[str], + patterns: List[str], + use_regex: bool = False +) -> bool: + """ + Check if a target string matches any of the given patterns. + + Args: + target: Target string to match + patterns: List of glob or regex patterns + use_regex: If True, treat patterns as regex; if False, treat as glob + + Returns: + True if target matches any pattern + """ + if target is None: + return False + + for pattern in patterns: + if use_regex: + if re.search(pattern, target): + return True + else: + if glob_to_regex(pattern).search(target): + return True + + return False + + +def validate_regex_pattern(pattern: str) -> tuple: + """ + Validate a regex pattern for potential ReDoS vulnerabilities. + + Checks for common ReDoS patterns: + - Nested quantifiers: (a+)+, (a*)* + - Overlapping alternations with quantifiers + - Excessive quantifier chains + + Args: + pattern: Regex pattern to validate + + Returns: + Tuple of (valid: bool, reason: str or None) + """ + # Check for nested quantifiers + if re.search(r"\([^)]*[*+]\)[*+?]|\([^)]*[*+?]\)\{", pattern): + return (False, "Nested quantifiers detected (potential ReDoS)") + + # Check for overlapping alternations with wildcards + match = re.search(r"\(([^|)]+)\|([^|)]+)\)[*+]", pattern) + if match: + alt1, alt2 = match.group(1), match.group(2) + if alt1 in (".*", ".+") or alt2 in (".*", ".+"): + return (False, "Overlapping alternations with wildcards (potential ReDoS)") + + # Check for quantifier chains + if re.search(r"\{[^}]+\}\s*\{", pattern): + return (False, "Chained quantifiers detected (potential ReDoS)") + + # Check pattern length + if len(pattern) > 500: + return (False, "Pattern too long (max 500 characters)") + + # Try to compile the regex + try: + re.compile(pattern) + except re.error as e: + return (False, f"Invalid regex: {e}") + + return (True, None) diff --git a/plugins/web4-governance/governance/persistent_rate_limiter.py b/plugins/web4-governance/governance/persistent_rate_limiter.py new file mode 100644 index 0000000000..e4ae05572f --- /dev/null +++ b/plugins/web4-governance/governance/persistent_rate_limiter.py @@ -0,0 +1,270 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Persistent Rate Limiter +# https://github.com/dp-web4/web4 +""" +Persistent Rate Limiter - SQLite-backed sliding window counters. + +Persists rate limit state across process restarts using SQLite WAL mode. +Falls back to memory-only operation if SQLite is unavailable. + +Usage: + from governance.persistent_rate_limiter import PersistentRateLimiter + + limiter = PersistentRateLimiter("~/.web4") + + # Check if under limit + result = limiter.check("ratelimit:bash-rate:Bash", max_count=5, window_ms=60000) + if result.allowed: + # Proceed with action + limiter.record("ratelimit:bash-rate:Bash") + + # Check if persistence is active + if limiter.persistent: + print("Using SQLite storage") +""" + +import os +import time +import sqlite3 +from dataclasses import dataclass +from typing import Dict, List, Optional +from pathlib import Path + + +@dataclass +class RateLimitResult: + """Result of a rate limit check.""" + allowed: bool + current: int + limit: int + + +class PersistentRateLimiter: + """ + Persistent rate limiter with SQLite storage. + + Maintains the same interface as the memory-only RateLimiter. + Falls back to memory if SQLite initialization fails. + """ + + def __init__(self, storage_path: str): + """ + Initialize the persistent rate limiter. + + Args: + storage_path: Base path for storage (e.g., ~/.web4) + """ + self._db: Optional[sqlite3.Connection] = None + self._memory_fallback: Dict[str, List[float]] = {} + self._is_persistent: bool = False + self._init_database(storage_path) + + def _init_database(self, storage_path: str) -> None: + """Initialize SQLite database with WAL mode.""" + try: + # Expand path and create data directory + base_path = Path(os.path.expanduser(storage_path)) + data_dir = base_path / "data" + data_dir.mkdir(parents=True, exist_ok=True) + + db_path = data_dir / "rate-limits.db" + self._db = sqlite3.connect(str(db_path), check_same_thread=False) + + # Enable WAL mode for better concurrent access + self._db.execute("PRAGMA journal_mode = WAL") + self._db.execute("PRAGMA synchronous = NORMAL") + + # Create rate limits table + self._db.execute(""" + CREATE TABLE IF NOT EXISTS rate_limits ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + key TEXT NOT NULL, + timestamp INTEGER NOT NULL + ) + """) + + # Create index for efficient key lookups + self._db.execute(""" + CREATE INDEX IF NOT EXISTS idx_rate_limits_key_ts + ON rate_limits(key, timestamp) + """) + + self._db.commit() + self._is_persistent = True + + except Exception: + # SQLite not available or failed, use memory fallback + self._is_persistent = False + + def check(self, key: str, max_count: int, window_ms: int) -> RateLimitResult: + """ + Check whether a key is under its rate limit. Prunes expired entries. + + Args: + key: Rate limit key (e.g., "ratelimit:rule-id:tool:Bash") + max_count: Maximum allowed actions within window + window_ms: Window duration in milliseconds + + Returns: + RateLimitResult with allowed flag, current count, and limit + """ + now = int(time.time() * 1000) + cutoff = now - window_ms + + if self._is_persistent and self._db: + # Prune old entries for this key + self._db.execute( + "DELETE FROM rate_limits WHERE key = ? AND timestamp <= ?", + (key, cutoff) + ) + + # Count remaining entries + cursor = self._db.execute( + "SELECT COUNT(*) FROM rate_limits WHERE key = ? AND timestamp > ?", + (key, cutoff) + ) + current = cursor.fetchone()[0] + + return RateLimitResult( + allowed=current < max_count, + current=current, + limit=max_count + ) + + # Memory fallback + timestamps = self._memory_fallback.get(key) + if not timestamps: + return RateLimitResult(allowed=True, current=0, limit=max_count) + + pruned = [t for t in timestamps if t > cutoff] + self._memory_fallback[key] = pruned + + return RateLimitResult( + allowed=len(pruned) < max_count, + current=len(pruned), + limit=max_count + ) + + def record(self, key: str) -> None: + """ + Record a new action for the given key. + + Args: + key: Rate limit key + """ + now = int(time.time() * 1000) + + if self._is_persistent and self._db: + self._db.execute( + "INSERT INTO rate_limits (key, timestamp) VALUES (?, ?)", + (key, now) + ) + self._db.commit() + return + + # Memory fallback + if key in self._memory_fallback: + self._memory_fallback[key].append(now) + else: + self._memory_fallback[key] = [now] + + def prune(self, window_ms: int) -> int: + """ + Prune all expired entries across all keys. + + Args: + window_ms: Window duration to use for pruning + + Returns: + Number of entries pruned + """ + now = int(time.time() * 1000) + cutoff = now - window_ms + + if self._is_persistent and self._db: + cursor = self._db.execute( + "DELETE FROM rate_limits WHERE timestamp <= ?", + (cutoff,) + ) + self._db.commit() + return cursor.rowcount + + # Memory fallback + pruned = 0 + keys_to_remove = [] + + for key, timestamps in self._memory_fallback.items(): + before = len(timestamps) + filtered = [t for t in timestamps if t > cutoff] + pruned += before - len(filtered) + + if not filtered: + keys_to_remove.append(key) + else: + self._memory_fallback[key] = filtered + + for key in keys_to_remove: + del self._memory_fallback[key] + + return pruned + + def count(self, key: str, window_ms: int = 3_600_000) -> int: + """ + Get current count for a key within a window. + + Args: + key: Rate limit key + window_ms: Window duration (default 1 hour) + + Returns: + Number of entries for key within window + """ + cutoff = int(time.time() * 1000) - window_ms + + if self._is_persistent and self._db: + cursor = self._db.execute( + "SELECT COUNT(*) FROM rate_limits WHERE key = ? AND timestamp > ?", + (key, cutoff) + ) + return cursor.fetchone()[0] + + # Memory fallback + timestamps = self._memory_fallback.get(key) + if not timestamps: + return 0 + return len([t for t in timestamps if t > cutoff]) + + @property + def key_count(self) -> int: + """Number of tracked keys.""" + if self._is_persistent and self._db: + cursor = self._db.execute("SELECT COUNT(DISTINCT key) FROM rate_limits") + return cursor.fetchone()[0] + return len(self._memory_fallback) + + @property + def persistent(self) -> bool: + """Whether persistence is active.""" + return self._is_persistent + + def close(self) -> None: + """Close the database connection.""" + if self._db: + self._db.close() + self._db = None + + @staticmethod + def make_key(rule_id: str, tool_or_category: str) -> str: + """ + Build a rate limit key from rule context. + + Args: + rule_id: Policy rule ID + tool_or_category: Tool name or category string + + Returns: + Namespaced key string + """ + return f"ratelimit:{rule_id}:{tool_or_category}" diff --git a/plugins/web4-governance/governance/policy_entity.py b/plugins/web4-governance/governance/policy_entity.py new file mode 100644 index 0000000000..5f9e77739c --- /dev/null +++ b/plugins/web4-governance/governance/policy_entity.py @@ -0,0 +1,568 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Policy Entity +# https://github.com/dp-web4/web4 +""" +Policy Entity - Policy as a first-class participant in the trust network. + +Policy isn't just configuration — it's society's law. It has identity, +can be witnessed, and is hash-tracked in the audit chain. + +Key concepts: +- Policy is immutable once registered (changing = new entity) +- Sessions witness operating under a policy +- Policy witnesses agent decisions (allow/deny) +- R6 records reference the policy_hash in effect + +Usage: + from governance.policy_entity import PolicyEntity, PolicyRegistry + + # Register a policy (creates hash-identified entity) + registry = PolicyRegistry() + entity = registry.register_policy("safety", config) + + # Evaluate a tool call + decision = entity.evaluate(tool_name, category, target) + + # Witness a decision + entity.witness_decision(session_id, tool_name, decision, success=True) +""" + +import hashlib +import json +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional, Any, Literal +import re +import fnmatch + +from .presets import ( + PolicyConfig, + PolicyRule, + PolicyMatch, + get_preset, + resolve_preset, + policy_config_to_dict, +) +from .entity_trust import EntityTrustStore, EntityTrust + + +PolicyDecision = Literal["allow", "deny", "warn"] + + +@dataclass +class PolicyEvaluation: + """Result of evaluating a tool call against policy.""" + decision: PolicyDecision + rule_id: Optional[str] + rule_name: Optional[str] + reason: str + enforced: bool + constraints: List[str] + + +@dataclass +class PolicyEntity: + """ + A policy as a first-class entity in the trust network. + + Properties: + - entity_id: Unique identifier (policy:::) + - content_hash: SHA-256 of the policy document (first 16 chars) + - config: The actual policy configuration + - created_at: When this version was created + - trust: T3/V3 tensors (via EntityTrustStore) + """ + name: str + version: str + config: PolicyConfig + content_hash: str + entity_id: str + created_at: str + source: str = "preset" # "preset", "custom", "file" + + # Sorted rules for evaluation (lower priority = evaluated first) + _sorted_rules: List[PolicyRule] = field(default_factory=list, repr=False) + + def __post_init__(self): + """Sort rules by priority after initialization.""" + self._sorted_rules = sorted(self.config.rules, key=lambda r: r.priority) + + def evaluate( + self, + tool_name: str, + category: str, + target: Optional[str] = None, + rate_limiter: Optional[Any] = None, + full_command: Optional[str] = None, + ) -> PolicyEvaluation: + """ + Evaluate a tool call against this policy. + + Args: + tool_name: Name of the tool (e.g., "Bash", "Write") + category: Tool category (e.g., "command", "file_write") + target: Target of the operation (file path, command, URL) + rate_limiter: Optional RateLimiter for rate-based rules + full_command: For Bash tools, the full command string (enables command_patterns matching) + + Returns: + PolicyEvaluation with decision and context + """ + for rule in self._sorted_rules: + if self._matches_rule(tool_name, category, target, rule.match, full_command): + # Check rate limit if specified + if rule.match.rate_limit and rate_limiter: + key = self._rate_limit_key(rule, tool_name, category) + result = rate_limiter.check( + key, + rule.match.rate_limit.max_count, + rule.match.rate_limit.window_ms, + ) + if result.allowed: + continue # Under limit, rule doesn't fire + + enforced = rule.decision != "deny" or self.config.enforce + return PolicyEvaluation( + decision=rule.decision, + rule_id=rule.id, + rule_name=rule.name, + reason=rule.reason or f"Matched rule: {rule.name}", + enforced=enforced, + constraints=[ + f"policy:{self.entity_id}", + f"decision:{rule.decision}", + f"rule:{rule.id}", + ], + ) + + # No rule matched — default policy + return PolicyEvaluation( + decision=self.config.default_policy, + rule_id=None, + rule_name=None, + reason=f"Default policy: {self.config.default_policy}", + enforced=True, + constraints=[ + f"policy:{self.entity_id}", + f"decision:{self.config.default_policy}", + "rule:default", + ], + ) + + def _matches_rule( + self, + tool_name: str, + category: str, + target: Optional[str], + match: PolicyMatch, + full_command: Optional[str] = None, + ) -> bool: + """Check if a tool call matches a rule's criteria (AND logic).""" + # Tool match + if match.tools and tool_name not in match.tools: + return False + + # Category match + if match.categories and category not in match.categories: + return False + + # Target pattern match + if match.target_patterns: + if target is None: + return False + matched = False + for pattern in match.target_patterns: + if match.target_patterns_are_regex: + if re.search(pattern, target): + matched = True + break + else: + # Glob pattern + if fnmatch.fnmatch(target, pattern): + matched = True + break + if not matched: + return False + + # Full command pattern match (for Bash commands) + if match.command_patterns: + if full_command is None: + return False + matched = False + for pattern in match.command_patterns: + if match.command_patterns_are_regex: + if re.search(pattern, full_command): + matched = True + break + else: + if pattern in full_command: + matched = True + break + if not matched: + return False + + # Negative match: command must NOT contain these patterns + # (for rules like "git push without PAT") + if match.command_must_not_contain: + if full_command is None: + return False + # If ANY of the patterns are found, rule does NOT match + for pattern in match.command_must_not_contain: + if pattern in full_command: + return False # Found exclusion pattern, rule doesn't apply + + return True + + def _rate_limit_key(self, rule: PolicyRule, tool_name: str, category: str) -> str: + """Build rate limit key from rule context.""" + if rule.match.tools: + return f"ratelimit:{rule.id}:tool:{tool_name}" + if rule.match.categories: + return f"ratelimit:{rule.id}:category:{category}" + return f"ratelimit:{rule.id}:global" + + def to_dict(self) -> Dict[str, Any]: + """Convert to JSON-serializable dict.""" + return { + "entity_id": self.entity_id, + "name": self.name, + "version": self.version, + "content_hash": self.content_hash, + "created_at": self.created_at, + "source": self.source, + "config": policy_config_to_dict(self.config), + } + + +@dataclass +class WitnessRecord: + """Record of a witnessing relationship (persisted to JSONL).""" + type: str # "session_witness" or "decision_witness" + entity: str + witness: str + timestamp: str + tool: Optional[str] = None # For decision witnesses + decision: Optional[str] = None # For decision witnesses + + +class PolicyRegistry: + """ + Registry of policy entities with hash-tracking and witnessing. + + Policies are registered once and become immutable. Changing a policy + creates a new entity with a new hash. + + Witnessing relationships are persisted to JSONL for durability across restarts. + """ + + def __init__(self, storage_path: Optional[Path] = None): + """ + Initialize registry. + + Args: + storage_path: Base path for storage. Defaults to ~/.web4 + """ + if storage_path is None: + storage_path = Path.home() / ".web4" + self.storage_path = Path(storage_path) + self.policies_path = self.storage_path / "policies" + self.policies_path.mkdir(parents=True, exist_ok=True) + + # Entity trust store for witnessing + self._trust_store = EntityTrustStore() + + # In-memory cache of loaded policies + self._cache: Dict[str, PolicyEntity] = {} + + # Witnessing records: entity -> set of witnesses + self._witnessed_by: Dict[str, set] = {} + + # Witnessing records: entity -> set of entities witnessed + self._has_witnessed: Dict[str, set] = {} + + # Load existing witness records + self._load_witness_records() + + @property + def _witness_file_path(self) -> Path: + """Path to the witnesses JSONL file.""" + return self.storage_path / "witnesses.jsonl" + + def _load_witness_records(self) -> None: + """Load existing witness records from disk.""" + if not self._witness_file_path.exists(): + return + + try: + content = self._witness_file_path.read_text().strip() + if not content: + return + + for line in content.split("\n"): + if not line.strip(): + continue + try: + data = json.loads(line) + record = WitnessRecord(**data) + self._apply_witness_record(record) + except (json.JSONDecodeError, TypeError): + # Skip malformed lines + pass + except Exception: + # File doesn't exist or can't be read, start fresh + pass + + def _apply_witness_record(self, record: WitnessRecord) -> None: + """Apply a witness record to in-memory state.""" + # Entity is witnessed by witness + if record.entity not in self._witnessed_by: + self._witnessed_by[record.entity] = set() + self._witnessed_by[record.entity].add(record.witness) + + # Witness has witnessed entity + if record.witness not in self._has_witnessed: + self._has_witnessed[record.witness] = set() + self._has_witnessed[record.witness].add(record.entity) + + def _persist_witness_record(self, record: WitnessRecord) -> None: + """Persist a witness record to disk.""" + try: + # Ensure directory exists + self.storage_path.mkdir(parents=True, exist_ok=True) + + # Append to JSONL file + record_dict = { + "type": record.type, + "entity": record.entity, + "witness": record.witness, + "timestamp": record.timestamp, + } + if record.tool: + record_dict["tool"] = record.tool + if record.decision: + record_dict["decision"] = record.decision + + with open(self._witness_file_path, "a") as f: + f.write(json.dumps(record_dict) + "\n") + except Exception: + # Persistence failure is non-fatal + pass + + def get_witnessed_by(self, entity_id: str) -> List[str]: + """Get list of entities that have witnessed this entity.""" + return list(self._witnessed_by.get(entity_id, set())) + + def get_has_witnessed(self, entity_id: str) -> List[str]: + """Get list of entities that this entity has witnessed.""" + return list(self._has_witnessed.get(entity_id, set())) + + def register_policy( + self, + name: str, + config: Optional[PolicyConfig] = None, + preset: Optional[str] = None, + version: Optional[str] = None, + ) -> PolicyEntity: + """ + Register a policy and create its entity. + + Args: + name: Policy name (e.g., "safety", "my-custom-policy") + config: PolicyConfig to register (mutually exclusive with preset) + preset: Preset name to use as base (mutually exclusive with config) + version: Version string (auto-generated if not provided) + + Returns: + PolicyEntity with hash-identified entity_id + """ + if config is None and preset is None: + raise ValueError("Must provide either config or preset") + if config is not None and preset is not None: + raise ValueError("Cannot provide both config and preset") + + # Resolve config + if preset: + config = resolve_preset(preset) + source = "preset" + else: + source = "custom" + + # Generate version if not provided + if version is None: + version = datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S") + + # Compute content hash + config_dict = policy_config_to_dict(config) + content_str = json.dumps(config_dict, sort_keys=True) + content_hash = hashlib.sha256(content_str.encode()).hexdigest()[:16] + + # Build entity ID + entity_id = f"policy:{name}:{version}:{content_hash}" + + # Check cache + if entity_id in self._cache: + return self._cache[entity_id] + + # Create entity + now = datetime.now(timezone.utc).isoformat() + "Z" + entity = PolicyEntity( + name=name, + version=version, + config=config, + content_hash=content_hash, + entity_id=entity_id, + created_at=now, + source=source, + ) + + # Persist policy document + policy_file = self.policies_path / f"{content_hash}.json" + if not policy_file.exists(): + policy_file.write_text(json.dumps(entity.to_dict(), indent=2)) + + # Register in entity trust store (creates T3/V3 tensors) + self._trust_store.get(entity_id) + + # Cache + self._cache[entity_id] = entity + + return entity + + def get_policy(self, entity_id: str) -> Optional[PolicyEntity]: + """Get a policy by entity ID.""" + if entity_id in self._cache: + return self._cache[entity_id] + + # Try to load from disk by hash + parts = entity_id.split(":") + if len(parts) >= 4: + content_hash = parts[3] + policy_file = self.policies_path / f"{content_hash}.json" + if policy_file.exists(): + data = json.loads(policy_file.read_text()) + entity = self._entity_from_dict(data) + self._cache[entity_id] = entity + return entity + + return None + + def get_policy_by_hash(self, content_hash: str) -> Optional[PolicyEntity]: + """Get a policy by content hash.""" + policy_file = self.policies_path / f"{content_hash}.json" + if policy_file.exists(): + data = json.loads(policy_file.read_text()) + entity = self._entity_from_dict(data) + self._cache[entity.entity_id] = entity + return entity + return None + + def _entity_from_dict(self, data: Dict[str, Any]) -> PolicyEntity: + """Reconstruct PolicyEntity from dict.""" + config_data = data["config"] + rules = [ + PolicyRule( + id=r["id"], + name=r["name"], + priority=r["priority"], + decision=r["decision"], + reason=r.get("reason"), + match=PolicyMatch( + tools=r["match"].get("tools"), + categories=r["match"].get("categories"), + target_patterns=r["match"].get("target_patterns"), + target_patterns_are_regex=r["match"].get("target_patterns_are_regex", False), + ), + ) + for r in config_data.get("rules", []) + ] + config = PolicyConfig( + default_policy=config_data["default_policy"], + enforce=config_data["enforce"], + rules=rules, + preset=config_data.get("preset"), + ) + return PolicyEntity( + name=data["name"], + version=data["version"], + config=config, + content_hash=data["content_hash"], + entity_id=data["entity_id"], + created_at=data["created_at"], + source=data.get("source", "custom"), + ) + + def witness_session(self, policy_entity_id: str, session_id: str) -> None: + """ + Record that a session is operating under this policy. + + Creates bidirectional witnessing: + - Session witnesses the policy (I operate under these rules) + - Policy witnesses the session (this session uses me) + + Persists to JSONL for durability. + """ + session_entity = f"session:{session_id}" + self._trust_store.witness(session_entity, policy_entity_id, success=True) + + # Create and persist witness record + now = datetime.now(timezone.utc).isoformat() + "Z" + record = WitnessRecord( + type="session_witness", + entity=policy_entity_id, + witness=session_entity, + timestamp=now, + ) + self._apply_witness_record(record) + self._persist_witness_record(record) + + def witness_decision( + self, + policy_entity_id: str, + session_id: str, + tool_name: str, + decision: PolicyDecision, + success: bool, + ) -> None: + """ + Record a policy decision in the witnessing chain. + + The policy witnesses the tool use, and the outcome (success/failure) + affects trust in both directions. + + Persists to JSONL for durability. + """ + session_entity = f"session:{session_id}" + # Policy witnesses the decision + self._trust_store.witness(policy_entity_id, session_entity, success=success) + + # Create and persist witness record + now = datetime.now(timezone.utc).isoformat() + "Z" + record = WitnessRecord( + type="decision_witness", + entity=session_entity, + witness=policy_entity_id, + timestamp=now, + tool=tool_name, + decision=decision, + ) + self._apply_witness_record(record) + self._persist_witness_record(record) + + def get_policy_trust(self, policy_entity_id: str) -> EntityTrust: + """Get trust tensors for a policy entity.""" + return self._trust_store.get(policy_entity_id) + + def list_policies(self) -> List[PolicyEntity]: + """List all registered policies.""" + policies = [] + for policy_file in self.policies_path.glob("*.json"): + try: + data = json.loads(policy_file.read_text()) + entity = self._entity_from_dict(data) + policies.append(entity) + except (json.JSONDecodeError, KeyError): + pass + return policies diff --git a/plugins/web4-governance/governance/presence.py b/plugins/web4-governance/governance/presence.py new file mode 100644 index 0000000000..444b5c2902 --- /dev/null +++ b/plugins/web4-governance/governance/presence.py @@ -0,0 +1,278 @@ +""" +Entity Presence Tracking for Web4 Governance + +Implements "silence as signal" - tracking expected heartbeats and detecting +when entities go quiet unexpectedly. Absence of expected activity is itself +trust-relevant information. + +Key concepts: +- presence_interval: How often entity is expected to check in +- last_seen: When entity was last active +- presence_status: "active", "expected", "overdue", "missing" +- grace_factor: Multiplier before "expected" becomes "overdue" (default 1.5) + +Integration with T3: +- Overdue entities see reliability/consistency decay +- Missing entities trigger witness notifications +- Return after absence can rebuild trust through demonstrated activity + +Usage: + from governance.presence import PresenceTracker, PresenceStatus + + tracker = PresenceTracker() + tracker.register("agent:claude", interval_hours=6) + tracker.heartbeat("agent:claude") # Called on activity + + status = tracker.check("agent:claude") + if status == PresenceStatus.OVERDUE: + # Investigate or notify + pass +""" + +import json +from datetime import datetime, timedelta, timezone +from enum import Enum +from pathlib import Path +from typing import Dict, List, Optional, Any +from dataclasses import dataclass, field, asdict + + +class PresenceStatus(Enum): + """Entity presence status.""" + ACTIVE = "active" # Seen within expected interval + EXPECTED = "expected" # Within grace period, should check in soon + OVERDUE = "overdue" # Past grace period, warrants attention + MISSING = "missing" # Significantly past expected, may need intervention + UNKNOWN = "unknown" # No expectation registered + + +@dataclass +class PresenceRecord: + """Presence tracking for a single entity.""" + entity_id: str + interval_seconds: float # Expected check-in interval + grace_factor: float = 1.5 # Multiplier for grace period + missing_factor: float = 3.0 # Multiplier for "missing" threshold + last_seen: Optional[str] = None # ISO timestamp + last_status: str = "unknown" + check_count: int = 0 # Total heartbeats received + overdue_count: int = 0 # Times found overdue + missing_count: int = 0 # Times found missing + created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + + def heartbeat(self) -> None: + """Record entity activity.""" + self.last_seen = datetime.now(timezone.utc).isoformat() + self.check_count += 1 + self.last_status = PresenceStatus.ACTIVE.value + + def get_status(self) -> PresenceStatus: + """Determine current presence status.""" + if self.last_seen is None: + return PresenceStatus.UNKNOWN + + last = datetime.fromisoformat(self.last_seen) + now = datetime.now(timezone.utc) + elapsed = (now - last).total_seconds() + + expected = self.interval_seconds + grace = expected * self.grace_factor + missing = expected * self.missing_factor + + if elapsed <= expected: + return PresenceStatus.ACTIVE + elif elapsed <= grace: + return PresenceStatus.EXPECTED + elif elapsed <= missing: + return PresenceStatus.OVERDUE + else: + return PresenceStatus.MISSING + + def seconds_until_expected(self) -> float: + """Seconds until entity becomes 'expected' (negative if already past).""" + if self.last_seen is None: + return 0.0 + last = datetime.fromisoformat(self.last_seen) + now = datetime.now(timezone.utc) + elapsed = (now - last).total_seconds() + return self.interval_seconds - elapsed + + def seconds_until_overdue(self) -> float: + """Seconds until entity becomes 'overdue' (negative if already past).""" + if self.last_seen is None: + return 0.0 + last = datetime.fromisoformat(self.last_seen) + now = datetime.now(timezone.utc) + elapsed = (now - last).total_seconds() + grace = self.interval_seconds * self.grace_factor + return grace - elapsed + + +class PresenceTracker: + """ + Track entity presence expectations and detect silence. + + Provides situational awareness for distributed Web4 systems. + """ + + def __init__(self, storage_path: Optional[Path] = None): + """Initialize tracker with optional persistent storage.""" + self.storage_path = storage_path or Path.home() / ".web4" / "governance" / "presence.json" + self.records: Dict[str, PresenceRecord] = {} + self._load() + + def _load(self) -> None: + """Load presence records from storage.""" + if self.storage_path.exists(): + try: + with open(self.storage_path, 'r') as f: + data = json.load(f) + for entity_id, record_data in data.items(): + self.records[entity_id] = PresenceRecord(**record_data) + except Exception: + pass # Start fresh on error + + def _save(self) -> None: + """Persist presence records to storage.""" + self.storage_path.parent.mkdir(parents=True, exist_ok=True) + with open(self.storage_path, 'w') as f: + data = {eid: asdict(rec) for eid, rec in self.records.items()} + json.dump(data, f, indent=2) + + def register( + self, + entity_id: str, + interval_hours: float = 6.0, + grace_factor: float = 1.5, + missing_factor: float = 3.0 + ) -> PresenceRecord: + """ + Register an entity with expected presence interval. + + Args: + entity_id: Unique entity identifier (e.g., "agent:claude", "track:web4") + interval_hours: Expected hours between check-ins + grace_factor: Multiplier for grace period (default 1.5x) + missing_factor: Multiplier for missing threshold (default 3x) + + Returns: + PresenceRecord for the entity + """ + if entity_id not in self.records: + self.records[entity_id] = PresenceRecord( + entity_id=entity_id, + interval_seconds=interval_hours * 3600, + grace_factor=grace_factor, + missing_factor=missing_factor + ) + self._save() + return self.records[entity_id] + + def heartbeat(self, entity_id: str, auto_register: bool = True) -> PresenceStatus: + """ + Record entity activity (heartbeat). + + Args: + entity_id: Entity checking in + auto_register: If True, register unknown entities with default interval + + Returns: + Previous status before heartbeat (useful for detecting returns) + """ + if entity_id not in self.records: + if auto_register: + self.register(entity_id) + else: + return PresenceStatus.UNKNOWN + + record = self.records[entity_id] + prev_status = record.get_status() + + # Track overdue/missing events + if prev_status == PresenceStatus.OVERDUE: + record.overdue_count += 1 + elif prev_status == PresenceStatus.MISSING: + record.missing_count += 1 + + record.heartbeat() + self._save() + return prev_status + + def check(self, entity_id: str) -> PresenceStatus: + """Check current presence status of an entity.""" + if entity_id not in self.records: + return PresenceStatus.UNKNOWN + status = self.records[entity_id].get_status() + self.records[entity_id].last_status = status.value + return status + + def check_all(self) -> Dict[str, PresenceStatus]: + """Check status of all registered entities.""" + return {eid: self.check(eid) for eid in self.records} + + def get_overdue(self) -> List[str]: + """Get list of entities that are overdue or missing.""" + return [ + eid for eid, status in self.check_all().items() + if status in (PresenceStatus.OVERDUE, PresenceStatus.MISSING) + ] + + def get_expected_soon(self, within_hours: float = 1.0) -> List[str]: + """Get entities expected to check in within the specified hours.""" + result = [] + for eid, record in self.records.items(): + if 0 < record.seconds_until_expected() <= within_hours * 3600: + result.append(eid) + return result + + def get_record(self, entity_id: str) -> Optional[PresenceRecord]: + """Get presence record for an entity.""" + return self.records.get(entity_id) + + def unregister(self, entity_id: str) -> bool: + """Remove entity from tracking.""" + if entity_id in self.records: + del self.records[entity_id] + self._save() + return True + return False + + def summary(self) -> Dict[str, Any]: + """Get summary of all tracked entities.""" + statuses = self.check_all() + return { + "total_tracked": len(self.records), + "active": sum(1 for s in statuses.values() if s == PresenceStatus.ACTIVE), + "expected": sum(1 for s in statuses.values() if s == PresenceStatus.EXPECTED), + "overdue": sum(1 for s in statuses.values() if s == PresenceStatus.OVERDUE), + "missing": sum(1 for s in statuses.values() if s == PresenceStatus.MISSING), + "entities": { + eid: { + "status": status.value, + "last_seen": self.records[eid].last_seen, + "check_count": self.records[eid].check_count, + "overdue_count": self.records[eid].overdue_count, + } + for eid, status in statuses.items() + } + } + + +# Convenience function for integration with existing governance +def check_presence(entity_id: str, tracker: Optional[PresenceTracker] = None) -> PresenceStatus: + """Quick check of entity presence status.""" + if tracker is None: + tracker = PresenceTracker() + return tracker.check(entity_id) + + +# Default tracker instance for simple usage +_default_tracker: Optional[PresenceTracker] = None + + +def get_tracker() -> PresenceTracker: + """Get or create the default presence tracker.""" + global _default_tracker + if _default_tracker is None: + _default_tracker = PresenceTracker() + return _default_tracker diff --git a/plugins/web4-governance/governance/presets.py b/plugins/web4-governance/governance/presets.py new file mode 100644 index 0000000000..6778dbe3df --- /dev/null +++ b/plugins/web4-governance/governance/presets.py @@ -0,0 +1,367 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Policy Presets +# https://github.com/dp-web4/web4 +""" +Policy Presets - Built-in rule sets users can reference by name. + +Presets provide sensible defaults for common governance postures. +Users can override individual fields and append additional rules. + +Usage: + from governance.presets import get_preset, resolve_preset, list_presets + + # Get a preset + config = get_preset("safety") + + # Resolve with overrides + config = resolve_preset("safety", enforce=False) + + # List all presets + for preset in list_presets(): + print(preset.name, preset.description) +""" + +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Any, Literal + +PolicyDecision = Literal["allow", "deny", "warn"] +PresetName = Literal["permissive", "safety", "strict", "audit-only"] + + +@dataclass +class RateLimitSpec: + """Rate limit specification for a policy rule.""" + max_count: int + window_ms: int + + +@dataclass +class TimeWindow: + """ + Temporal constraints for policy rules. + Rule only matches during specified time windows. + """ + # Allowed hours [start, end] in 24h format. E.g., [9, 17] = 9am-5pm + allowed_hours: Optional[tuple] = None + # Allowed days of week. 0=Sunday, 1=Monday, ... 6=Saturday + allowed_days: Optional[List[int]] = None + # Timezone for time calculations. Defaults to system timezone. + timezone: Optional[str] = None + + +@dataclass +class PolicyMatch: + """Match criteria for a policy rule.""" + tools: Optional[List[str]] = None + categories: Optional[List[str]] = None + target_patterns: Optional[List[str]] = None + target_patterns_are_regex: bool = False + rate_limit: Optional[RateLimitSpec] = None + time_window: Optional[TimeWindow] = None + # For Bash: match against full command (not just first word) + command_patterns: Optional[List[str]] = None + command_patterns_are_regex: bool = False + # Negative match: rule fires if pattern is NOT found (for "git push without PAT") + command_must_not_contain: Optional[List[str]] = None + + +@dataclass +class PolicyRule: + """A single policy rule.""" + id: str + name: str + priority: int + decision: PolicyDecision + match: PolicyMatch + reason: Optional[str] = None + + +@dataclass +class PolicyConfig: + """Complete policy configuration.""" + default_policy: PolicyDecision + enforce: bool + rules: List[PolicyRule] = field(default_factory=list) + preset: Optional[str] = None + + +@dataclass +class PresetDefinition: + """A named preset with description and config.""" + name: PresetName + description: str + config: PolicyConfig + + +# Safety rules shared between 'safety' and 'audit-only' presets +SAFETY_RULES = [ + PolicyRule( + id="deny-destructive-commands", + name="Block destructive shell commands", + priority=1, + decision="deny", + reason="Destructive command blocked by safety preset", + match=PolicyMatch( + tools=["Bash"], + # Block: rm with ANY flags, mkfs.* (filesystem format) + # Rationale: rm -f bypasses prompts, rm -r is recursive, all flags are risky for agents + target_patterns=[r"rm\s+-", r"mkfs\."], + target_patterns_are_regex=True, + ), + ), + PolicyRule( + id="warn-file-delete", + name="Warn on file deletion", + priority=2, + decision="warn", + reason="File deletion flagged - use with caution", + match=PolicyMatch( + tools=["Bash"], + # Warn on plain rm (no flags) - less dangerous but still destructive + # Matches "rm file" or "rm ./path" but not "rm -rf" (caught by deny rule above) + target_patterns=[r"rm\s+[^-]"], + target_patterns_are_regex=True, + ), + ), + PolicyRule( + id="deny-secret-files", + name="Block reading secret/credential files", + priority=3, + decision="deny", + reason="Credential/secret file access denied by safety preset", + match=PolicyMatch( + categories=["file_read", "credential_access"], + target_patterns=[ + # Environment and general secrets + "**/.env", + "**/.env.*", + "**/credentials.*", + "**/*secret*", + "**/token*.json", + "**/auth*.json", + "**/*apikey*", + # Cloud provider credentials + "**/.aws/credentials", + "**/.aws/config", + # SSH keys + "**/.ssh/id_*", + "**/.ssh/config", + # Package manager auth + "**/.npmrc", + "**/.pypirc", + # Database/service credentials + "**/.netrc", + "**/.pgpass", + "**/.my.cnf", + # Container/orchestration credentials + "**/.docker/config.json", + "**/.kube/config", + # Encryption keys + "**/.gnupg/*", + "**/.gpg/*", + ], + ), + ), + PolicyRule( + id="warn-memory-write", + name="Warn on agent memory file modifications", + priority=4, + decision="warn", + reason="Memory file modification flagged - potential memory poisoning", + match=PolicyMatch( + categories=["file_write"], + target_patterns=[ + "**/MEMORY.md", + "**/memory.md", + "**/memory/**/*.md", + "**/.web4/**/memory*", + "**/.claude/**/memory*", + ], + ), + ), + PolicyRule( + id="warn-network", + name="Warn on network access", + priority=10, + decision="warn", + reason="Network access flagged by safety preset", + match=PolicyMatch(categories=["network"]), + ), + # Git push without PAT will fail on WSL - warn to save token burn + PolicyRule( + id="warn-git-push-no-pat", + name="Warn on git push without PAT authentication", + priority=8, + decision="warn", + reason="git push without PAT will fail on WSL. Use: grep GITHUB_PAT ../.env | cut -d= -f2 | xargs -I {} git push https://user:{}@github.com/...", + match=PolicyMatch( + tools=["Bash"], + command_patterns=[r"git\s+push"], + command_patterns_are_regex=True, + command_must_not_contain=["GITHUB_PAT", "@github.com"], + ), + ), +] + + +# All available presets +PRESETS: Dict[PresetName, PresetDefinition] = { + "permissive": PresetDefinition( + name="permissive", + description="Pure observation — no rules, all actions allowed", + config=PolicyConfig( + default_policy="allow", + enforce=False, + rules=[], + ), + ), + "safety": PresetDefinition( + name="safety", + description="Deny destructive bash, deny secret file reads, warn on network", + config=PolicyConfig( + default_policy="allow", + enforce=True, + rules=SAFETY_RULES.copy(), + ), + ), + "strict": PresetDefinition( + name="strict", + description="Deny everything except Read, Glob, Grep, and TodoWrite", + config=PolicyConfig( + default_policy="deny", + enforce=True, + rules=[ + PolicyRule( + id="allow-read-tools", + name="Allow read-only tools", + priority=1, + decision="allow", + reason="Read-only tool permitted by strict preset", + match=PolicyMatch(tools=["Read", "Glob", "Grep", "TodoWrite"]), + ), + ], + ), + ), + "audit-only": PresetDefinition( + name="audit-only", + description="Same rules as safety but enforce=false (dry-run, logs what would be blocked)", + config=PolicyConfig( + default_policy="allow", + enforce=False, + rules=SAFETY_RULES.copy(), + ), + ), +} + + +def get_preset(name: str) -> Optional[PresetDefinition]: + """Get a preset by name, or None if not found.""" + return PRESETS.get(name) + + +def list_presets() -> List[PresetDefinition]: + """List all available presets.""" + return list(PRESETS.values()) + + +def is_preset_name(name: str) -> bool: + """Check if a name is a valid preset name.""" + return name in PRESETS + + +def resolve_preset( + preset_name: str, + default_policy: Optional[PolicyDecision] = None, + enforce: Optional[bool] = None, + additional_rules: Optional[List[PolicyRule]] = None, +) -> PolicyConfig: + """ + Resolve a policy config from preset + overrides. + + Merge order: + 1. Preset defaults (default_policy, enforce, rules) + 2. Top-level overrides (default_policy, enforce) from kwargs + 3. Additional rules are appended after preset rules + + Args: + preset_name: Name of the preset to use as base + default_policy: Override for default policy + enforce: Override for enforce flag + additional_rules: Rules to append after preset rules + + Returns: + PolicyConfig with merged settings + + Raises: + ValueError: If preset_name is not recognized + """ + preset = get_preset(preset_name) + if not preset: + available = ", ".join(PRESETS.keys()) + raise ValueError(f'Unknown policy preset: "{preset_name}". Available: {available}') + + # Start with preset config + config = PolicyConfig( + default_policy=preset.config.default_policy, + enforce=preset.config.enforce, + rules=preset.config.rules.copy(), + preset=preset_name, + ) + + # Apply overrides + if default_policy is not None: + config.default_policy = default_policy + if enforce is not None: + config.enforce = enforce + if additional_rules: + config.rules = config.rules + additional_rules + + return config + + +def policy_config_to_dict(config: PolicyConfig) -> Dict[str, Any]: + """Convert PolicyConfig to JSON-serializable dict.""" + def match_to_dict(match: PolicyMatch) -> Dict[str, Any]: + result = { + "tools": match.tools, + "categories": match.categories, + "target_patterns": match.target_patterns, + "target_patterns_are_regex": match.target_patterns_are_regex, + "rate_limit": ( + {"max_count": match.rate_limit.max_count, "window_ms": match.rate_limit.window_ms} + if match.rate_limit + else None + ), + "time_window": ( + { + "allowed_hours": list(match.time_window.allowed_hours) if match.time_window.allowed_hours else None, + "allowed_days": match.time_window.allowed_days, + "timezone": match.time_window.timezone, + } + if match.time_window + else None + ), + "command_patterns": match.command_patterns, + "command_patterns_are_regex": match.command_patterns_are_regex, + "command_must_not_contain": match.command_must_not_contain, + } + return result + + return { + "default_policy": config.default_policy, + "enforce": config.enforce, + "preset": config.preset, + "rules": [ + { + "id": r.id, + "name": r.name, + "priority": r.priority, + "decision": r.decision, + "reason": r.reason, + "match": match_to_dict(r.match), + } + for r in config.rules + ], + } diff --git a/plugins/web4-governance/governance/rate_limiter.py b/plugins/web4-governance/governance/rate_limiter.py new file mode 100644 index 0000000000..ce6085182f --- /dev/null +++ b/plugins/web4-governance/governance/rate_limiter.py @@ -0,0 +1,150 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Rate Limiter +# https://github.com/dp-web4/web4 +""" +Rate Limiter - Sliding window counters for policy rate limiting. + +Memory-only (no persistence). Resets on session restart. +Keys are derived from rule context: e.g. "ratelimit:tool:Bash" + +Usage: + from governance.rate_limiter import RateLimiter + + limiter = RateLimiter() + + # Check if under limit + result = limiter.check("ratelimit:bash-rate:Bash", max_count=5, window_ms=60000) + if result.allowed: + # Proceed with action + pass + + # Record action after success + limiter.record("ratelimit:bash-rate:Bash") +""" + +import time +from dataclasses import dataclass +from typing import Dict, List + + +@dataclass +class RateLimitResult: + """Result of a rate limit check.""" + allowed: bool + current: int + limit: int + + +class RateLimiter: + """Sliding window rate limiter for policy enforcement.""" + + def __init__(self): + """Initialize with empty window map.""" + self._windows: Dict[str, List[float]] = {} + + def check(self, key: str, max_count: int, window_ms: int) -> RateLimitResult: + """ + Check whether a key is under its rate limit. Prunes expired entries. + + Args: + key: Rate limit key (e.g., "ratelimit:rule-id:tool:Bash") + max_count: Maximum allowed actions within window + window_ms: Window duration in milliseconds + + Returns: + RateLimitResult with allowed flag, current count, and limit + """ + now = time.time() * 1000 # Convert to milliseconds + cutoff = now - window_ms + timestamps = self._windows.get(key) + + if not timestamps: + return RateLimitResult(allowed=True, current=0, limit=max_count) + + # Prune expired entries in-place + pruned = [t for t in timestamps if t > cutoff] + self._windows[key] = pruned + + return RateLimitResult( + allowed=len(pruned) < max_count, + current=len(pruned), + limit=max_count, + ) + + def record(self, key: str) -> None: + """ + Record a new action for the given key. + + Call this after a successful action to track the rate. + + Args: + key: Rate limit key + """ + now = time.time() * 1000 + if key in self._windows: + self._windows[key].append(now) + else: + self._windows[key] = [now] + + def prune(self, window_ms: int) -> int: + """ + Prune all expired entries across all keys. + + Args: + window_ms: Window duration to use for pruning + + Returns: + Number of entries pruned + """ + now = time.time() * 1000 + cutoff = now - window_ms + pruned = 0 + + keys_to_remove = [] + for key, timestamps in self._windows.items(): + before = len(timestamps) + filtered = [t for t in timestamps if t > cutoff] + pruned += before - len(filtered) + + if not filtered: + keys_to_remove.append(key) + else: + self._windows[key] = filtered + + for key in keys_to_remove: + del self._windows[key] + + return pruned + + def count(self, key: str) -> int: + """ + Get current count for a key (without pruning). + + Args: + key: Rate limit key + + Returns: + Number of timestamps recorded for key + """ + return len(self._windows.get(key, [])) + + @property + def key_count(self) -> int: + """Number of tracked keys.""" + return len(self._windows) + + @staticmethod + def make_key(rule_id: str, tool_or_category: str) -> str: + """ + Build a rate limit key from rule context. + + Args: + rule_id: Policy rule ID + tool_or_category: Tool name or category string + + Returns: + Namespaced key string + """ + return f"ratelimit:{rule_id}:{tool_or_category}" diff --git a/plugins/web4-governance/governance/references.py b/plugins/web4-governance/governance/references.py new file mode 100644 index 0000000000..c6f7be8557 --- /dev/null +++ b/plugins/web4-governance/governance/references.py @@ -0,0 +1,572 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Reference Store +# https://github.com/dp-web4/web4 +""" +Persistent reference store for agent roles with witnessing. + +References are learned patterns, facts, and context extractions that +persist across sessions for a given role. This enables agents to: + +- Remember patterns they've learned +- Recall facts extracted from previous sessions +- Build on prior context without re-reading everything +- Accumulate role-specific knowledge + +References are Web4 entities with trust: +- Each reference has a trust score that evolves through witnessing +- When a reference is used and the task succeeds, its trust increases +- When a reference is used and the task fails, its trust decreases +- High-trust references are prioritized in context injection +- Low-trust references fade out (self-curation) + +This is the "accumulating reference" part of Web4 agent governance. +""" + +import json +import hashlib +import uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import List, Optional, Dict, Tuple +from dataclasses import dataclass, field, asdict + +from .ledger import Ledger + + +# Storage location +REFERENCES_DIR = Path.home() / ".web4" / "governance" / "references" + + +@dataclass +class Reference: + """ + A piece of learned/extracted context for a role. + + Represents information that persists across sessions: + - Patterns observed in code/data + - Facts extracted from documents + - User preferences learned over time + - Context summaries from previous sessions + + References are Web4 entities with witnessed trust: + - trust_score evolves through witnessing (usage outcomes) + - success_count/failure_count track correlation with task outcomes + - High trust = more likely to be included in context + - Low trust = self-curated out over time + """ + ref_id: str + role_id: str + content: str + source: str # Where it came from (file, session, user input) + ref_type: str # pattern, fact, preference, context, summary + + # Quality indicators + confidence: float = 0.5 # Initial confidence when created + relevance: float = 0.5 # How relevant to current work + + # Trust through witnessing (evolves over time) + trust_score: float = 0.5 # Current trust (0.0-1.0) + success_count: int = 0 # Times used in successful tasks + failure_count: int = 0 # Times used in failed tasks + + # Lifecycle + created_at: Optional[str] = None + last_used: Optional[str] = None + use_count: int = 0 + + # Metadata + tags: List[str] = field(default_factory=list) + expires_at: Optional[str] = None # Optional expiration + + def witness_outcome(self, success: bool, magnitude: float = 0.1): + """ + Update trust based on task outcome when this reference was used. + + Same asymmetric pattern as other trust: easier to lose than gain. + """ + self.use_count += 1 + self.last_used = datetime.now(timezone.utc).isoformat() + + if success: + self.success_count += 1 + # Diminishing returns as trust approaches 1.0 + delta = magnitude * 0.05 * (1 - self.trust_score) + else: + self.failure_count += 1 + # Bigger fall from height + delta = -magnitude * 0.10 * self.trust_score + + self.trust_score = max(0.0, min(1.0, self.trust_score + delta)) + + def effective_trust(self) -> float: + """ + Combined trust score considering initial confidence and witnessed trust. + + New references rely more on confidence, mature ones on trust_score. + """ + if self.use_count == 0: + return self.confidence + elif self.use_count < 5: + # Blend: more weight to confidence for new refs + weight = self.use_count / 5 + return (1 - weight) * self.confidence + weight * self.trust_score + else: + # Mature: trust_score dominates + return self.trust_score + + def trust_level(self) -> str: + """Categorical trust level.""" + t = self.effective_trust() + if t >= 0.8: + return "high" + elif t >= 0.6: + return "medium-high" + elif t >= 0.4: + return "medium" + elif t >= 0.2: + return "low" + else: + return "minimal" + + def to_dict(self) -> dict: + return asdict(self) + + @classmethod + def from_dict(cls, data: dict) -> 'Reference': + # Handle missing fields gracefully + if 'tags' not in data: + data['tags'] = [] + if 'trust_score' not in data: + data['trust_score'] = data.get('confidence', 0.5) + if 'success_count' not in data: + data['success_count'] = 0 + if 'failure_count' not in data: + data['failure_count'] = 0 + return cls(**data) + + +class ReferenceStore: + """ + Persistent storage for role-specific references. + + Each role (agent type) has its own reference collection. + References persist across sessions and can be: + - Added during work + - Queried for context + - Updated with usage stats + - Pruned when stale + """ + + def __init__(self, ledger: Optional[Ledger] = None): + REFERENCES_DIR.mkdir(parents=True, exist_ok=True) + self.ledger = ledger + + def _role_refs_file(self, role_id: str) -> Path: + """Get file path for role references.""" + safe_name = hashlib.sha256(role_id.encode()).hexdigest()[:16] + return REFERENCES_DIR / f"{safe_name}.jsonl" + + def add( + self, + role_id: str, + content: str, + source: str, + ref_type: str = "context", + confidence: float = 0.5, + tags: Optional[List[str]] = None + ) -> Reference: + """ + Add a reference for a role. + + Args: + role_id: The agent role this reference belongs to + content: The reference content (pattern, fact, etc.) + source: Where this came from (file path, session ID, etc.) + ref_type: Type of reference (pattern, fact, preference, context, summary) + confidence: Confidence in this reference (0.0-1.0) + tags: Optional tags for categorization + + Returns: + The created Reference + """ + ref = Reference( + ref_id=f"ref:{uuid.uuid4().hex[:12]}", + role_id=role_id, + content=content, + source=source, + ref_type=ref_type, + confidence=confidence, + tags=tags or [], + created_at=datetime.now(timezone.utc).isoformat() + ) + + # Append to role's reference file + refs_file = self._role_refs_file(role_id) + with open(refs_file, "a") as f: + f.write(json.dumps(ref.to_dict()) + "\n") + + return ref + + def get_for_role(self, role_id: str, limit: int = 50) -> List[Reference]: + """ + Get recent references for a role. + + Returns most recent references up to limit. + """ + refs_file = self._role_refs_file(role_id) + + if not refs_file.exists(): + return [] + + refs = [] + with open(refs_file) as f: + for line in f: + if line.strip(): + try: + refs.append(Reference.from_dict(json.loads(line))) + except (json.JSONDecodeError, TypeError): + pass + + # Return most recent + return refs[-limit:] + + def search( + self, + role_id: str, + query: str, + ref_type: Optional[str] = None, + tags: Optional[List[str]] = None, + limit: int = 10 + ) -> List[Reference]: + """ + Search references by content, type, or tags. + + Args: + role_id: Role to search + query: Text to search in content + ref_type: Filter by reference type + tags: Filter by tags (any match) + limit: Maximum results + + Returns: + Matching references sorted by relevance + """ + refs = self.get_for_role(role_id, limit=500) + query_lower = query.lower() + + matches = [] + for ref in refs: + # Check content match + if query_lower not in ref.content.lower(): + continue + + # Check type filter + if ref_type and ref.ref_type != ref_type: + continue + + # Check tags filter + if tags: + if not any(t in ref.tags for t in tags): + continue + + matches.append(ref) + + # Sort by confidence, recency, and use count + matches.sort(key=lambda r: ( + r.confidence, + r.use_count, + r.created_at or "" + ), reverse=True) + + return matches[:limit] + + def mark_used(self, ref_id: str, role_id: str): + """ + Mark a reference as used (update stats). + + This helps track which references are valuable. + """ + refs_file = self._role_refs_file(role_id) + + if not refs_file.exists(): + return + + # Read all, update matching, rewrite + refs = [] + with open(refs_file) as f: + for line in f: + if line.strip(): + try: + ref = Reference.from_dict(json.loads(line)) + if ref.ref_id == ref_id: + ref.use_count += 1 + ref.last_used = datetime.now(timezone.utc).isoformat() + refs.append(ref) + except (json.JSONDecodeError, TypeError): + pass + + # Rewrite file + with open(refs_file, "w") as f: + for ref in refs: + f.write(json.dumps(ref.to_dict()) + "\n") + + def witness_references( + self, + role_id: str, + ref_ids: List[str], + success: bool, + magnitude: float = 0.1 + ) -> List[Reference]: + """ + Witness multiple references based on task outcome. + + When a task completes, all references that were used get their + trust updated based on whether the task succeeded or failed. + + This enables self-curation: helpful references gain trust, + unhelpful ones lose it and eventually fade out. + + Args: + role_id: The role whose references to update + ref_ids: List of reference IDs that were used + success: Whether the task succeeded + magnitude: How much to adjust trust (default 0.1) + + Returns: + List of updated references + """ + refs_file = self._role_refs_file(role_id) + + if not refs_file.exists(): + return [] + + # Read all, update matching, rewrite + refs = [] + updated = [] + ref_ids_set = set(ref_ids) + + with open(refs_file) as f: + for line in f: + if line.strip(): + try: + ref = Reference.from_dict(json.loads(line)) + if ref.ref_id in ref_ids_set: + ref.witness_outcome(success, magnitude) + updated.append(ref) + refs.append(ref) + except (json.JSONDecodeError, TypeError): + pass + + # Rewrite file + with open(refs_file, "w") as f: + for ref in refs: + f.write(json.dumps(ref.to_dict()) + "\n") + + return updated + + def witness_all_for_role( + self, + role_id: str, + success: bool, + magnitude: float = 0.05 + ) -> int: + """ + Witness all recently-used references for a role. + + Simpler version: if a role's task succeeds/fails, all references + that were recently used get a trust update. + + Returns: Number of references updated + """ + refs_file = self._role_refs_file(role_id) + + if not refs_file.exists(): + return 0 + + now = datetime.now(timezone.utc) + refs = [] + updated_count = 0 + + with open(refs_file) as f: + for line in f: + if line.strip(): + try: + ref = Reference.from_dict(json.loads(line)) + + # Check if recently used (within last hour) + if ref.last_used: + last = datetime.fromisoformat(ref.last_used.replace("Z", "+00:00")) + if (now - last).total_seconds() < 3600: + ref.witness_outcome(success, magnitude) + updated_count += 1 + + refs.append(ref) + except (json.JSONDecodeError, TypeError, ValueError): + pass + + # Rewrite file + with open(refs_file, "w") as f: + for ref in refs: + f.write(json.dumps(ref.to_dict()) + "\n") + + return updated_count + + def get_context_for_role( + self, + role_id: str, + max_tokens: int = 2000, + min_trust: float = 0.2 + ) -> Tuple[str, List[str]]: + """ + Get consolidated context string for a role, prioritized by trust. + + Self-curation: High-trust references are included first. + Low-trust references (below min_trust) are excluded. + + Args: + role_id: Role to get context for + max_tokens: Approximate token limit (chars / 4) + min_trust: Minimum trust score to include (default 0.2) + + Returns: + Tuple of (formatted context string, list of ref_ids used) + """ + refs = self.get_for_role(role_id, limit=50) + + if not refs: + return "", [] + + # Filter by minimum trust and sort by effective trust + trusted_refs = [r for r in refs if r.effective_trust() >= min_trust] + trusted_refs.sort(key=lambda r: r.effective_trust(), reverse=True) + + # Group by type + by_type: Dict[str, List[Reference]] = {} + for ref in trusted_refs: + if ref.ref_type not in by_type: + by_type[ref.ref_type] = [] + by_type[ref.ref_type].append(ref) + + # Build context string + lines = ["## Prior Context for This Role\n"] + char_count = 0 + char_limit = max_tokens * 4 + used_ref_ids = [] + + for ref_type in ["pattern", "fact", "preference", "context", "summary"]: + if ref_type not in by_type: + continue + + lines.append(f"\n### {ref_type.title()}s\n") + + # Already sorted by trust within type + for ref in by_type[ref_type][:5]: # Max 5 per type + if char_count > char_limit: + break + + entry = f"- {ref.content[:200]}" + + # Show trust indicator + trust_lvl = ref.trust_level() + if trust_lvl == "high": + entry += " ★" + elif trust_lvl == "medium-high": + entry += " ☆" + + lines.append(entry) + char_count += len(entry) + used_ref_ids.append(ref.ref_id) + + # Mark as used + ref.use_count += 1 + ref.last_used = datetime.now(timezone.utc).isoformat() + + # Save updated use counts + self._save_refs(role_id, refs) + + return "\n".join(lines), used_ref_ids + + def _save_refs(self, role_id: str, refs: List[Reference]): + """Save all references for a role.""" + refs_file = self._role_refs_file(role_id) + with open(refs_file, "w") as f: + for ref in refs: + f.write(json.dumps(ref.to_dict()) + "\n") + + def prune_stale(self, role_id: str, max_age_days: int = 90) -> int: + """ + Remove old, unused references. + + Returns number of references pruned. + """ + refs_file = self._role_refs_file(role_id) + + if not refs_file.exists(): + return 0 + + now = datetime.now(timezone.utc) + kept = [] + pruned = 0 + + with open(refs_file) as f: + for line in f: + if line.strip(): + try: + ref = Reference.from_dict(json.loads(line)) + + # Keep if used recently or high confidence + if ref.use_count > 0 or ref.confidence >= 0.8: + kept.append(ref) + continue + + # Check age + if ref.created_at: + created = datetime.fromisoformat(ref.created_at.replace("Z", "+00:00")) + age = (now - created).days + if age <= max_age_days: + kept.append(ref) + continue + + pruned += 1 + + except (json.JSONDecodeError, TypeError, ValueError): + pass + + # Rewrite file + with open(refs_file, "w") as f: + for ref in kept: + f.write(json.dumps(ref.to_dict()) + "\n") + + return pruned + + def get_stats(self, role_id: str) -> dict: + """Get statistics about references for a role.""" + refs = self.get_for_role(role_id, limit=1000) + + if not refs: + return { + "role_id": role_id, + "total_references": 0, + "by_type": {}, + "avg_confidence": 0.0, + "total_uses": 0 + } + + by_type: Dict[str, int] = {} + total_confidence = 0.0 + total_uses = 0 + + for ref in refs: + by_type[ref.ref_type] = by_type.get(ref.ref_type, 0) + 1 + total_confidence += ref.confidence + total_uses += ref.use_count + + return { + "role_id": role_id, + "total_references": len(refs), + "by_type": by_type, + "avg_confidence": total_confidence / len(refs), + "total_uses": total_uses + } diff --git a/plugins/web4-governance/governance/reporter.py b/plugins/web4-governance/governance/reporter.py new file mode 100644 index 0000000000..510d9a9bee --- /dev/null +++ b/plugins/web4-governance/governance/reporter.py @@ -0,0 +1,378 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Audit Reporter +# https://github.com/dp-web4/web4 +""" +Audit Reporter - Aggregate audit data into summary reports. + +Accepts audit records and computes stats for tool usage, +category breakdown, policy decisions, errors, and timeline. + +Usage: + from governance.reporter import AuditReporter + + reporter = AuditReporter(audit_records) + report = reporter.generate() + + # Or formatted text + print(reporter.format_text()) +""" + +import json +from dataclasses import dataclass, field +from datetime import datetime +from typing import Dict, List, Optional, Any + + +@dataclass +class ToolStats: + """Statistics for a single tool.""" + tool: str + invocations: int + success_count: int + error_count: int + blocked_count: int + success_rate: float + avg_duration_ms: Optional[float] + + +@dataclass +class CategoryBreakdown: + """Breakdown for a category.""" + category: str + count: int + percentage: float + + +@dataclass +class PolicyStats: + """Policy decision statistics.""" + total_evaluated: int + allow_count: int + deny_count: int + warn_count: int + block_rate: float + + +@dataclass +class ErrorSummary: + """Error summary for a tool.""" + tool: str + count: int + top_messages: List[str] + + +@dataclass +class TimelineBucket: + """Activity in a time bucket.""" + minute: str + count: int + + +@dataclass +class AuditReport: + """Complete audit report.""" + total_records: int + time_range: Optional[Dict[str, str]] + tool_stats: List[ToolStats] + category_breakdown: List[CategoryBreakdown] + policy_stats: PolicyStats + errors: List[ErrorSummary] + timeline: List[TimelineBucket] + + +class AuditReporter: + """Generate aggregated reports from audit records.""" + + def __init__(self, records: List[Dict[str, Any]]): + """ + Initialize reporter with audit records. + + Args: + records: List of audit record dicts (from ledger.get_session_audit_trail + or ledger.query_audit) + """ + self._records = records + + def generate(self) -> AuditReport: + """Generate complete audit report.""" + return AuditReport( + total_records=len(self._records), + time_range=self._compute_time_range(), + tool_stats=self._compute_tool_stats(), + category_breakdown=self._compute_category_breakdown(), + policy_stats=self._compute_policy_stats(), + errors=self._compute_errors(), + timeline=self._compute_timeline(), + ) + + def _compute_time_range(self) -> Optional[Dict[str, str]]: + """Compute time range of records.""" + if not self._records: + return None + + timestamps = [r.get("timestamp") for r in self._records if r.get("timestamp")] + if not timestamps: + return None + + sorted_ts = sorted(timestamps) + return {"from": sorted_ts[0], "to": sorted_ts[-1]} + + def _compute_tool_stats(self) -> List[ToolStats]: + """Compute per-tool statistics.""" + tool_data: Dict[str, Dict] = {} + + for r in self._records: + tool = r.get("tool_name") + if not tool: + continue + + if tool not in tool_data: + tool_data[tool] = { + "success": 0, + "error": 0, + "blocked": 0, + "durations": [], + } + + entry = tool_data[tool] + status = r.get("status", "") + + if status == "success": + entry["success"] += 1 + elif status == "error": + entry["error"] += 1 + elif status == "blocked": + entry["blocked"] += 1 + + # Try to extract duration from r6_data + if r.get("r6_data"): + try: + r6 = json.loads(r["r6_data"]) if isinstance(r["r6_data"], str) else r["r6_data"] + result = r6.get("result", {}) + if result and result.get("duration_ms") is not None: + entry["durations"].append(result["duration_ms"]) + except (json.JSONDecodeError, TypeError, KeyError): + pass + + stats = [] + for tool, data in tool_data.items(): + total = data["success"] + data["error"] + data["blocked"] + avg_dur = sum(data["durations"]) / len(data["durations"]) if data["durations"] else None + + stats.append(ToolStats( + tool=tool, + invocations=total, + success_count=data["success"], + error_count=data["error"], + blocked_count=data["blocked"], + success_rate=data["success"] / total if total > 0 else 0.0, + avg_duration_ms=avg_dur, + )) + + return sorted(stats, key=lambda s: s.invocations, reverse=True) + + def _compute_category_breakdown(self) -> List[CategoryBreakdown]: + """Compute category breakdown.""" + counts: Dict[str, int] = {} + + for r in self._records: + # Try to get category from r6_data + category = None + if r.get("r6_data"): + try: + r6 = json.loads(r["r6_data"]) if isinstance(r["r6_data"], str) else r["r6_data"] + category = r6.get("request", {}).get("category") + except (json.JSONDecodeError, TypeError, KeyError): + pass + + if category: + counts[category] = counts.get(category, 0) + 1 + + total = sum(counts.values()) + breakdown = [ + CategoryBreakdown( + category=cat, + count=count, + percentage=(count / total * 100) if total > 0 else 0.0, + ) + for cat, count in counts.items() + ] + + return sorted(breakdown, key=lambda b: b.count, reverse=True) + + def _compute_policy_stats(self) -> PolicyStats: + """Compute policy decision statistics.""" + # Derive from status: blocked → deny, otherwise → allow + allow_count = 0 + deny_count = 0 + + for r in self._records: + status = r.get("status", "") + if status == "blocked": + deny_count += 1 + else: + allow_count += 1 + + total = len(self._records) + return PolicyStats( + total_evaluated=total, + allow_count=allow_count, + deny_count=deny_count, + warn_count=0, # Warn doesn't appear in status + block_rate=deny_count / total if total > 0 else 0.0, + ) + + def _compute_errors(self) -> List[ErrorSummary]: + """Compute error summaries by tool.""" + error_data: Dict[str, Dict] = {} + + for r in self._records: + status = r.get("status", "") + if status != "error": + continue + + tool = r.get("tool_name", "unknown") + if tool not in error_data: + error_data[tool] = {"count": 0, "messages": {}} + + entry = error_data[tool] + entry["count"] += 1 + + # Try to get error message from r6_data + if r.get("r6_data"): + try: + r6 = json.loads(r["r6_data"]) if isinstance(r["r6_data"], str) else r["r6_data"] + result = r6.get("result", {}) + msg = result.get("error_message") + if msg: + entry["messages"][msg] = entry["messages"].get(msg, 0) + 1 + except (json.JSONDecodeError, TypeError, KeyError): + pass + + summaries = [] + for tool, data in error_data.items(): + # Sort messages by frequency + sorted_msgs = sorted(data["messages"].items(), key=lambda x: x[1], reverse=True) + top_msgs = [msg for msg, _ in sorted_msgs[:5]] + + summaries.append(ErrorSummary( + tool=tool, + count=data["count"], + top_messages=top_msgs, + )) + + return sorted(summaries, key=lambda s: s.count, reverse=True) + + def _compute_timeline(self) -> List[TimelineBucket]: + """Compute activity timeline bucketed by minute.""" + buckets: Dict[str, int] = {} + + for r in self._records: + ts = r.get("timestamp") + if not ts: + continue + + try: + # Parse and truncate to minute + if ts.endswith("Z"): + ts = ts[:-1] + "+00:00" + dt = datetime.fromisoformat(ts) + minute = dt.strftime("%Y-%m-%dT%H:%M") + buckets[minute] = buckets.get(minute, 0) + 1 + except (ValueError, TypeError): + pass + + timeline = [TimelineBucket(minute=m, count=c) for m, c in buckets.items()] + return sorted(timeline, key=lambda b: b.minute) + + def format_text(self) -> str: + """Format report as structured text.""" + report = self.generate() + lines: List[str] = [] + + lines.append("=== Audit Report ===") + lines.append(f"Total records: {report.total_records}") + if report.time_range: + lines.append(f"Time range: {report.time_range['from']} → {report.time_range['to']}") + lines.append("") + + # Tool stats + lines.append("--- Tool Stats ---") + if not report.tool_stats: + lines.append(" (no data)") + for ts in report.tool_stats: + dur = f"{ts.avg_duration_ms:.0f}ms avg" if ts.avg_duration_ms is not None else "n/a" + lines.append(f" {ts.tool}: {ts.invocations} calls, {ts.success_rate * 100:.0f}% success, {dur}") + lines.append("") + + # Category breakdown + lines.append("--- Categories ---") + for cb in report.category_breakdown: + lines.append(f" {cb.category}: {cb.count} ({cb.percentage:.1f}%)") + lines.append("") + + # Policy + lines.append("--- Policy ---") + lines.append(f" Evaluated: {report.policy_stats.total_evaluated}") + lines.append(f" Allowed: {report.policy_stats.allow_count}") + lines.append(f" Denied: {report.policy_stats.deny_count}") + lines.append(f" Block rate: {report.policy_stats.block_rate * 100:.1f}%") + lines.append("") + + # Errors + if report.errors: + lines.append("--- Errors ---") + for err in report.errors: + lines.append(f" {err.tool}: {err.count} errors") + for msg in err.top_messages: + lines.append(f" - {msg}") + lines.append("") + + # Timeline + if report.timeline: + lines.append("--- Timeline (actions/min) ---") + for bucket in report.timeline: + lines.append(f" {bucket.minute}: {bucket.count}") + + return "\n".join(lines) + + def to_dict(self) -> Dict[str, Any]: + """Convert report to JSON-serializable dict.""" + report = self.generate() + return { + "total_records": report.total_records, + "time_range": report.time_range, + "tool_stats": [ + { + "tool": ts.tool, + "invocations": ts.invocations, + "success_count": ts.success_count, + "error_count": ts.error_count, + "blocked_count": ts.blocked_count, + "success_rate": ts.success_rate, + "avg_duration_ms": ts.avg_duration_ms, + } + for ts in report.tool_stats + ], + "category_breakdown": [ + {"category": cb.category, "count": cb.count, "percentage": cb.percentage} + for cb in report.category_breakdown + ], + "policy_stats": { + "total_evaluated": report.policy_stats.total_evaluated, + "allow_count": report.policy_stats.allow_count, + "deny_count": report.policy_stats.deny_count, + "warn_count": report.policy_stats.warn_count, + "block_rate": report.policy_stats.block_rate, + }, + "errors": [ + {"tool": err.tool, "count": err.count, "top_messages": err.top_messages} + for err in report.errors + ], + "timeline": [ + {"minute": b.minute, "count": b.count} + for b in report.timeline + ], + } diff --git a/plugins/web4-governance/governance/role_trust.py b/plugins/web4-governance/governance/role_trust.py new file mode 100644 index 0000000000..3df4b15809 --- /dev/null +++ b/plugins/web4-governance/governance/role_trust.py @@ -0,0 +1,590 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Role Trust Store +# https://github.com/dp-web4/web4 +""" +Role-specific trust accumulation. + +Maps Claude Code agents to Web4 role entities with: +- T3 Trust Tensor (fractal 3D: Talent/Training/Temperament) +- V3 Value Tensor (fractal 3D: Valuation/Veracity/Validity) +- Action history and success rates +- Trust-based capability modulation + +Key concept: Trust is NEVER global. Each role (agent) accumulates +its own trust independently. A highly trusted code-reviewer may +have low trust as a test-generator (and vice versa). + +## Fractal Tensor Structure + +T3 (base 3D) with subdimensions: + Talent → (competence, alignment) + Training → (lineage, witnesses) + Temperament → (reliability, consistency) + +V3 (base 3D) with subdimensions: + Valuation → (reputation, contribution) + Veracity → (stewardship, energy) + Validity → (network, temporal) +""" + +import json +import hashlib +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional +from dataclasses import dataclass, field, asdict + +from .ledger import Ledger +from .tensors import ( + T3Tensor, V3Tensor, + migrate_legacy_t3, migrate_legacy_v3, +) + + +# Storage location +ROLES_DIR = Path.home() / ".web4" / "governance" / "roles" + + +@dataclass +class RoleTrust: + """ + Trust tensors for a specific role (agent type). + + Uses fractal T3/V3 tensor structure per Web4 spec: + + T3 Trust Tensor (base 3D, each with 2 subdimensions): + - Talent → (competence, alignment) + - Training → (lineage, witnesses) + - Temperament → (reliability, consistency) + + V3 Value Tensor (base 3D, each with 2 subdimensions): + - Valuation → (reputation, contribution) + - Veracity → (stewardship, energy) + - Validity → (network, temporal) + + Trust is ROLE-CONTEXTUAL: an entity's T3/V3 exists only within + a role context. + """ + role_id: str + + # Fractal T3 Trust Tensor + t3: T3Tensor = field(default_factory=T3Tensor) + + # Fractal V3 Value Tensor + v3: V3Tensor = field(default_factory=V3Tensor) + + # Metadata + action_count: int = 0 + success_count: int = 0 + last_action: Optional[str] = None + created_at: Optional[str] = None + + # ========================================================================= + # Backward-compatible property accessors for subdimensions + # ========================================================================= + + @property + def competence(self) -> float: + return self.t3.competence + + @competence.setter + def competence(self, value: float): + self.t3.talent_sub.competence = value + + @property + def alignment(self) -> float: + return self.t3.alignment + + @alignment.setter + def alignment(self, value: float): + self.t3.talent_sub.alignment = value + + @property + def lineage(self) -> float: + return self.t3.lineage + + @lineage.setter + def lineage(self, value: float): + self.t3.training_sub.lineage = value + + @property + def witnesses(self) -> float: + return self.t3.witnesses + + @witnesses.setter + def witnesses(self, value: float): + self.t3.training_sub.witnesses = value + + @property + def reliability(self) -> float: + return self.t3.reliability + + @reliability.setter + def reliability(self, value: float): + self.t3.temperament_sub.reliability = value + + @property + def consistency(self) -> float: + return self.t3.consistency + + @consistency.setter + def consistency(self, value: float): + self.t3.temperament_sub.consistency = value + + # V3 subdimension accessors + @property + def reputation(self) -> float: + return self.v3.reputation + + @reputation.setter + def reputation(self, value: float): + self.v3.valuation_sub.reputation = value + + @property + def contribution(self) -> float: + return self.v3.contribution + + @contribution.setter + def contribution(self, value: float): + self.v3.valuation_sub.contribution = value + + @property + def stewardship(self) -> float: + return self.v3.stewardship + + @stewardship.setter + def stewardship(self, value: float): + self.v3.veracity_sub.stewardship = value + + @property + def energy(self) -> float: + return self.v3.energy + + @energy.setter + def energy(self, value: float): + self.v3.veracity_sub.energy = value + + @property + def network(self) -> float: + return self.v3.network + + @network.setter + def network(self, value: float): + self.v3.validity_sub.network = value + + @property + def temporal(self) -> float: + return self.v3.temporal + + @temporal.setter + def temporal(self, value: float): + self.v3.validity_sub.temporal = value + + # ========================================================================= + # Tensor aggregate methods + # ========================================================================= + + def t3_composite(self) -> float: + """ + Weighted composite T3 trust score per Web4 spec. + + Formula: talent * 0.3 + training * 0.4 + temperament * 0.3 + """ + return self.t3.composite() + + def t3_average(self) -> float: + """ + @deprecated Use t3_composite() for spec-compliant scoring. + Average of all 6 subdimensions for backward compatibility. + """ + return (self.competence + self.reliability + self.consistency + + self.witnesses + self.lineage + self.alignment) / 6 + + def v3_composite(self) -> float: + """Composite V3 value score.""" + return self.v3.composite() + + def v3_average(self) -> float: + """ + @deprecated Use v3_composite() for spec-compliant scoring. + Average of all 6 subdimensions for backward compatibility. + """ + return (self.energy + self.contribution + self.stewardship + + self.network + self.reputation + self.temporal) / 6 + + def update_from_outcome(self, success: bool, is_novel: bool = False): + """ + Update trust based on action outcome per Web4 spec. + + | Outcome | Talent Impact | Training Impact | Temperament Impact | + |-----------------|---------------|-----------------|-------------------| + | Novel Success | +0.02 to +0.05| +0.01 to +0.02 | +0.01 | + | Standard Success| 0 | +0.005 to +0.01 | +0.005 | + | Failure | -0.02 | -0.01 | -0.02 | + """ + self.action_count += 1 + if success: + self.success_count += 1 + + # Use the spec-compliant T3Tensor update + self.t3.update_from_outcome(success, is_novel) + + # Update V3 contribution and energy based on outcome + clamp = lambda v: max(0.0, min(1.0, v)) + if success: + self.v3.valuation_sub.contribution = clamp(self.contribution + 0.01) + self.v3.veracity_sub.energy = clamp(self.energy + 0.01) + else: + self.v3.valuation_sub.contribution = clamp(self.contribution - 0.005) + + self.last_action = datetime.now(timezone.utc).isoformat() + + def trust_level(self) -> str: + """ + Categorical trust level based on T3 composite score. + + Uses weighted composite per Web4 spec, not simple average. + """ + return self.t3.level() + + def apply_decay(self, days_inactive: float, decay_rate: float = 0.01) -> bool: + """ + Apply trust decay based on inactivity. + + Trust decays slowly over time if not used. This prevents + stale trust from persisting indefinitely. + + Decay primarily affects Temperament (reliability, consistency) + and V3 temporal/energy dimensions. + + Decay is asymptotic to 0.3 (never fully decays to 0). + + Args: + days_inactive: Days since last action + decay_rate: Decay rate per day (default 1% per day) + + Returns: + True if decay was applied, False if no decay needed + """ + if days_inactive <= 0: + return False + + # Calculate decay factor (exponential decay) + # After 30 days: ~74% remaining, 60 days: ~55%, 90 days: ~41% + decay_factor = (1 - decay_rate) ** days_inactive + + # Apply decay with floor at 0.3 (minimum trust) + floor = 0.3 + + def decay_value(current: float) -> float: + decayed = floor + (current - floor) * decay_factor + return max(floor, decayed) + + old_reliability = self.reliability + + # Decay Temperament subdimensions (reliability most affected) + self.t3.temperament_sub.reliability = decay_value(self.reliability) + self.t3.temperament_sub.consistency = decay_value(self.consistency * 0.98) + + # Talent.competence decays slower (skills don't fade as fast) + self.t3.talent_sub.competence = decay_value(self.competence * 0.995) + + # Decay V3 Validity.temporal (time-based value) + self.v3.validity_sub.temporal = decay_value(self.temporal) + + # Decay V3 Veracity.energy (effort fades) + self.v3.veracity_sub.energy = decay_value(self.energy * 0.99) + + # Return whether meaningful decay occurred + return abs(old_reliability - self.reliability) > 0.001 + + def days_since_last_action(self) -> float: + """Calculate days since last action.""" + if not self.last_action: + if self.created_at: + # Use creation time if never acted + try: + created = datetime.fromisoformat( + self.created_at.replace("Z", "+00:00") + ) + return (datetime.now(timezone.utc) - created).days + except (ValueError, TypeError): + return 0 + return 0 + + try: + last = datetime.fromisoformat( + self.last_action.replace("Z", "+00:00") + ) + delta = datetime.now(timezone.utc) - last + return delta.total_seconds() / 86400 # Convert to days + except (ValueError, TypeError): + return 0 + + def to_dict(self) -> dict: + """ + Serialize to dict. + + Includes both fractal structure (t3/v3) and flattened subdimensions + for backward compatibility. + """ + return { + "role_id": self.role_id, + # Fractal T3 tensor + "t3": self.t3.to_dict(), + # Fractal V3 tensor (simplified) + "v3": { + "valuation": self.v3.valuation, + "veracity": self.v3.veracity, + "validity": self.v3.validity, + "reputation": self.reputation, + "contribution": self.contribution, + "stewardship": self.stewardship, + "energy": self.energy, + "network": self.network, + "temporal": self.temporal, + "composite": self.v3.composite(), + }, + # Legacy 6D flattened view (backward compatibility) + "competence": self.competence, + "reliability": self.reliability, + "consistency": self.consistency, + "witnesses": self.witnesses, + "lineage": self.lineage, + "alignment": self.alignment, + "energy": self.energy, + "contribution": self.contribution, + "stewardship": self.stewardship, + "network": self.network, + "reputation": self.reputation, + "temporal": self.temporal, + # Metadata + "action_count": self.action_count, + "success_count": self.success_count, + "last_action": self.last_action, + "created_at": self.created_at, + } + + @classmethod + def from_dict(cls, data: dict) -> 'RoleTrust': + """ + Deserialize from dict. + + Handles both new fractal format and legacy 6D flat format. + """ + role = cls( + role_id=data.get("role_id", ""), + action_count=data.get("action_count", 0), + success_count=data.get("success_count", 0), + last_action=data.get("last_action"), + created_at=data.get("created_at"), + ) + + # Check if data has new fractal t3 structure + if "t3" in data and isinstance(data["t3"], dict): + role.t3 = T3Tensor.from_dict(data["t3"]) + else: + # Migrate from legacy 6D flat format + role.t3 = migrate_legacy_t3({ + "competence": data.get("competence", 0.5), + "reliability": data.get("reliability", 0.5), + "consistency": data.get("consistency", 0.5), + "witnesses": data.get("witnesses", 0.5), + "lineage": data.get("lineage", 0.5), + "alignment": data.get("alignment", 0.5), + }) + + # Check if data has new fractal v3 structure + if "v3" in data and isinstance(data["v3"], dict): + role.v3 = migrate_legacy_v3({ + "reputation": data["v3"].get("reputation", 0.5), + "contribution": data["v3"].get("contribution", 0.5), + "stewardship": data["v3"].get("stewardship", 0.5), + "energy": data["v3"].get("energy", 0.5), + "network": data["v3"].get("network", 0.5), + "temporal": data["v3"].get("temporal", 0.5), + }) + else: + # Migrate from legacy 6D flat format + role.v3 = migrate_legacy_v3({ + "reputation": data.get("reputation", 0.5), + "contribution": data.get("contribution", 0.5), + "stewardship": data.get("stewardship", 0.5), + "energy": data.get("energy", 0.5), + "network": data.get("network", 0.5), + "temporal": data.get("temporal", 0.5), + }) + + return role + + +class RoleTrustStore: + """ + Persistent storage for role trust tensors. + + Each agent type (role) accumulates trust independently. + Trust persists across sessions. + """ + + def __init__(self, ledger: Optional[Ledger] = None): + ROLES_DIR.mkdir(parents=True, exist_ok=True) + self.ledger = ledger + + def _role_file(self, role_id: str) -> Path: + """Get file path for role trust data.""" + safe_name = hashlib.sha256(role_id.encode()).hexdigest()[:16] + return ROLES_DIR / f"{safe_name}.json" + + def get(self, role_id: str) -> RoleTrust: + """Get trust for role, creating with defaults if new.""" + role_file = self._role_file(role_id) + + if role_file.exists(): + with open(role_file) as f: + data = json.load(f) + return RoleTrust.from_dict(data) + + # New role with default trust (neutral starting point) + trust = RoleTrust( + role_id=role_id, + created_at=datetime.now(timezone.utc).isoformat() + ) + self.save(trust) + return trust + + def save(self, trust: RoleTrust): + """Save role trust to disk.""" + role_file = self._role_file(trust.role_id) + with open(role_file, "w") as f: + json.dump(trust.to_dict(), f, indent=2) + + def update(self, role_id: str, success: bool, magnitude: float = 0.1) -> RoleTrust: + """Update role trust based on action outcome.""" + trust = self.get(role_id) + trust.update_from_outcome(success, magnitude) + self.save(trust) + + # Record in ledger if available + if self.ledger: + try: + self.ledger.record_audit( + session_id="role_trust", + action_type="trust_update", + tool_name=role_id, + target=f"success={success}", + input_hash=None, + output_hash=hashlib.sha256( + f"{trust.t3_average():.3f}".encode() + ).hexdigest()[:8], + status="success" + ) + except Exception: + pass # Don't fail on audit issues + + return trust + + def list_roles(self) -> List[str]: + """List all known role IDs.""" + roles = [] + for f in ROLES_DIR.glob("*.json"): + try: + with open(f) as file: + data = json.load(file) + roles.append(data.get("role_id", f.stem)) + except Exception: + pass + return roles + + def get_all(self) -> Dict[str, RoleTrust]: + """Get all role trusts.""" + return {role_id: self.get(role_id) for role_id in self.list_roles()} + + def derive_capabilities(self, role_id: str) -> dict: + """ + Derive capabilities from trust level. + + Higher trust = more permissions. + Uses spec-compliant T3 composite score. + """ + trust = self.get(role_id) + t3_composite = trust.t3_composite() + + return { + "can_read": True, # Always allowed + "can_write": t3_composite >= 0.3, + "can_execute": t3_composite >= 0.4, + "can_network": t3_composite >= 0.5, + "can_delegate": t3_composite >= 0.6, + "max_atp_per_action": int(10 + 90 * t3_composite), + "trust_level": trust.trust_level(), + "t3_composite": round(t3_composite, 3), + "t3_average": round(trust.t3_average(), 3), # Legacy compatibility + "action_count": trust.action_count, + "success_rate": trust.success_count / max(1, trust.action_count) + } + + def apply_decay_all(self, decay_rate: float = 0.01) -> Dict[str, dict]: + """ + Apply trust decay to all roles based on inactivity. + + Should be called periodically (e.g., at session start) to + ensure trust reflects recency. + + Args: + decay_rate: Decay rate per day (default 1% per day) + + Returns: + Dict of {role_id: {"decayed": bool, "days_inactive": float, "t3_before": float, "t3_after": float}} + """ + results = {} + + for role_id in self.list_roles(): + trust = self.get(role_id) + days_inactive = trust.days_since_last_action() + + if days_inactive > 1: # Only decay if > 1 day inactive + t3_before = trust.t3_average() + decayed = trust.apply_decay(days_inactive, decay_rate) + + if decayed: + self.save(trust) + results[role_id] = { + "decayed": True, + "days_inactive": round(days_inactive, 1), + "t3_before": round(t3_before, 3), + "t3_after": round(trust.t3_average(), 3) + } + + # Record in ledger if available + if self.ledger: + try: + self.ledger.record_audit( + session_id="trust_decay", + action_type="decay", + tool_name=role_id, + target=f"days={days_inactive:.1f}", + input_hash=f"t3={t3_before:.3f}", + output_hash=f"t3={trust.t3_average():.3f}", + status="success" + ) + except Exception: + pass + + return results + + def get_with_decay(self, role_id: str, decay_rate: float = 0.01) -> RoleTrust: + """ + Get trust for role, applying decay if needed. + + Convenience method that applies decay before returning trust. + """ + trust = self.get(role_id) + days_inactive = trust.days_since_last_action() + + if days_inactive > 1: + if trust.apply_decay(days_inactive, decay_rate): + self.save(trust) + + return trust diff --git a/plugins/web4-governance/governance/session_manager.py b/plugins/web4-governance/governance/session_manager.py new file mode 100644 index 0000000000..65e15b9526 --- /dev/null +++ b/plugins/web4-governance/governance/session_manager.py @@ -0,0 +1,303 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Session Manager +# https://github.com/dp-web4/web4 +""" +Session Manager for lightweight governance. + +This is the main API for hooks to interact with governance: +- Start/end sessions with automatic numbering +- Track actions with ATP accounting +- Register work products +- Maintain audit trail + +The session manager coordinates between: +- Soft LCT (identity) +- Ledger (persistence) +- Filesystem (session number sync) +""" + +import os +import re +import hashlib +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, Dict, List + +from .ledger import Ledger +from .soft_lct import SoftLCT + + +class SessionManager: + """ + High-level session management API. + + Provides the main interface for hooks to use governance features. + """ + + def __init__(self, ledger: Optional[Ledger] = None, lct: Optional[SoftLCT] = None): + """ + Initialize session manager. + + Args: + ledger: Ledger instance. Creates one if not provided. + lct: SoftLCT instance. Creates one if not provided. + """ + self.ledger = ledger or Ledger() + self.lct = lct or SoftLCT(self.ledger) + self._current_session: Optional[Dict] = None + + def start_session(self, project: Optional[str] = None, atp_budget: int = 100, + sync_with_filesystem: bool = True, fs_pattern: Optional[str] = None, + fs_path: Optional[Path] = None) -> Dict: + """ + Start a new session with automatic numbering. + + Args: + project: Project name for session grouping + atp_budget: Action budget for this session + sync_with_filesystem: Whether to sync session number with filesystem + fs_pattern: Regex pattern for session files (default: Session(\\d+)_.*.md) + fs_path: Path to scan for existing sessions + + Returns: + Session information dict + """ + # Get or create identity + identity = self.lct.get_or_create() + + # Sync with filesystem if requested + if sync_with_filesystem and project: + fs_max = self._scan_existing_sessions(project, fs_pattern, fs_path) + if fs_max > 0: + self.ledger.sync_session_number(project, fs_max) + + # Get next session number from ledger + session_number = self.ledger.get_next_session_number(project) if project else None + + # Generate session ID + timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") + session_id = f"{project or 'default'}-{session_number or 'x'}-{timestamp}" + + # Start session in ledger + session = self.ledger.start_session( + session_id=session_id, + lct_id=identity["lct_id"], + project=project, + session_number=session_number, + atp_budget=atp_budget + ) + + self._current_session = session + return session + + def _scan_existing_sessions(self, project: str, pattern: Optional[str] = None, + path: Optional[Path] = None) -> int: + """ + Scan filesystem for existing session files to determine max session number. + + This ensures ledger stays in sync with filesystem reality. + """ + # Default pattern: Session###_*.md + if pattern is None: + pattern = r"Session(\d+)_.*\.md" + + # Default paths based on project + if path is None: + # Try common locations relative to current working directory + candidates = [ + Path.cwd() / "Research", + Path.cwd(), + ] + for candidate in candidates: + if candidate.exists(): + path = candidate + break + else: + return 0 + + if not path.exists(): + return 0 + + max_num = 0 + regex = re.compile(pattern) + + for file in path.iterdir(): + if file.is_file(): + match = regex.match(file.name) + if match: + try: + num = int(match.group(1)) + max_num = max(max_num, num) + except (ValueError, IndexError): + pass + + return max_num + + def end_session(self, status: str = "completed") -> Optional[Dict]: + """ + End the current session. + + Args: + status: Session end status (completed, aborted, error) + + Returns: + Session summary or None if no current session + """ + if not self._current_session: + return None + + session_id = self._current_session["session_id"] + self.ledger.end_session(session_id, status) + + summary = self.ledger.get_session_summary(session_id) + self._current_session = None + + return summary + + def record_action(self, tool_name: str, target: Optional[str] = None, + input_data: Optional[Dict] = None, output_data: Optional[str] = None, + status: str = "success", atp_cost: int = 1) -> Dict: + """ + Record an action (tool use) in the current session. + + Args: + tool_name: Name of the tool used + target: Target of the action (file path, URL, etc.) + input_data: Tool input (will be hashed) + output_data: Tool output (will be hashed) + status: Action status + atp_cost: ATP to consume for this action + + Returns: + Action record + """ + if not self._current_session: + raise RuntimeError("No active session. Call start_session first.") + + session_id = self._current_session["session_id"] + + # Hash inputs/outputs for audit (don't store raw data) + input_hash = None + if input_data: + input_hash = hashlib.sha256( + str(input_data).encode() + ).hexdigest()[:16] + + output_hash = None + if output_data: + output_hash = hashlib.sha256( + str(output_data).encode() + ).hexdigest()[:16] + + # Record in audit trail (returns chain info) + audit_record = self.ledger.record_audit( + session_id=session_id, + action_type="tool_use", + tool_name=tool_name, + target=target, + input_hash=input_hash, + output_hash=output_hash, + status=status + ) + + # Consume ATP + atp_remaining = self.ledger.consume_atp(session_id, atp_cost) + + return { + "audit_id": audit_record["audit_id"], + "sequence": audit_record["sequence"], + "record_hash": audit_record["record_hash"], + "previous_hash": audit_record["previous_hash"], + "tool_name": tool_name, + "target": target, + "status": status, + "atp_remaining": atp_remaining + } + + def register_work_product(self, product_type: str, path: Optional[str] = None, + content: Optional[str] = None, + metadata: Optional[Dict] = None) -> str: + """ + Register a work product created in this session. + + Args: + product_type: Type of product (file, commit, session_doc, etc.) + path: Path to the product + content: Content to hash (optional) + metadata: Additional metadata + + Returns: + Product ID + """ + if not self._current_session: + raise RuntimeError("No active session. Call start_session first.") + + return self.ledger.register_work_product( + session_id=self._current_session["session_id"], + product_type=product_type, + path=path, + content=content, + metadata=metadata + ) + + def get_current_session(self) -> Optional[Dict]: + """Get current session info.""" + if not self._current_session: + return None + + return self.ledger.get_session(self._current_session["session_id"]) + + def get_atp_remaining(self) -> int: + """Get remaining ATP for current session.""" + session = self.get_current_session() + return session["atp_remaining"] if session else 0 + + def get_session_summary(self) -> Optional[Dict]: + """Get summary of current session.""" + if not self._current_session: + return None + + return self.ledger.get_session_summary(self._current_session["session_id"]) + + def get_session_number(self) -> Optional[int]: + """Get current session number.""" + if not self._current_session: + return None + return self._current_session.get("session_number") + + def verify_witnessing_chain(self) -> tuple: + """ + Verify the audit trail witnessing chain for current session. + + Each record witnesses (hashes) the previous, creating an unforgeable + sequence. This is the chain of witnessing - proof of ordered actions. + + Returns: + (is_valid: bool, error_message: Optional[str]) + """ + if not self._current_session: + return (False, "No active session") + + return self.ledger.verify_audit_chain(self._current_session["session_id"]) + + # --- Convenience methods for common patterns --- + + def quick_start(self, project: str) -> int: + """ + Quick start for autonomous sessions. + + Returns session number for use in filenames. + """ + session = self.start_session(project=project, sync_with_filesystem=True) + return session["session_number"] + + def quick_end(self) -> Dict: + """ + Quick end with summary. + + Returns session summary. + """ + summary = self.end_session(status="completed") + return summary or {"error": "no_session"} diff --git a/plugins/web4-governance/governance/signing.py b/plugins/web4-governance/governance/signing.py new file mode 100644 index 0000000000..35717cc00e --- /dev/null +++ b/plugins/web4-governance/governance/signing.py @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Ed25519 Cryptographic Signing +# https://github.com/dp-web4/web4 +""" +Ed25519 Cryptographic Signing for Audit Records. + +Each session generates a unique Ed25519 keypair. The private key is stored +in session state (hex-encoded), and the public key is used as the keyId. +Signatures provide non-repudiation: even if an attacker gains file access, +they cannot forge valid signatures without the private key. + +Usage: + from governance.signing import generate_signing_keypair, sign_data, verify_signature + + # Generate keypair for a session + keypair = generate_signing_keypair() + print(f"Key ID: {keypair['key_id']}") + + # Sign audit record + data = json.dumps(record) + signature = sign_data(data, keypair['private_key_hex']) + + # Verify signature + is_valid = verify_signature(data, signature, keypair['public_key_hex']) +""" + +from typing import TypedDict +from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey, Ed25519PublicKey +from cryptography.hazmat.primitives import serialization + + +class SigningKeyPair(TypedDict): + """Ed25519 keypair for signing audit records.""" + private_key_hex: str # Hex-encoded private key (keep secret) + public_key_hex: str # Hex-encoded public key (safe to expose) + key_id: str # Short key ID (last 32 hex chars of public key) + + +def generate_signing_keypair() -> SigningKeyPair: + """ + Generate a new Ed25519 keypair for signing audit records. + + Returns: + SigningKeyPair with private_key_hex, public_key_hex, and key_id + """ + private_key = Ed25519PrivateKey.generate() + public_key = private_key.public_key() + + # Export as raw bytes then hex-encode + private_bytes = private_key.private_bytes( + encoding=serialization.Encoding.Raw, + format=serialization.PrivateFormat.Raw, + encryption_algorithm=serialization.NoEncryption() + ) + public_bytes = public_key.public_bytes( + encoding=serialization.Encoding.Raw, + format=serialization.PublicFormat.Raw + ) + + private_key_hex = private_bytes.hex() + public_key_hex = public_bytes.hex() + + return SigningKeyPair( + private_key_hex=private_key_hex, + public_key_hex=public_key_hex, + key_id=public_key_hex[-32:] # Last 32 hex chars (16 bytes) as short ID + ) + + +def sign_data(data: str, private_key_hex: str) -> str: + """ + Sign data with a private key. + + Args: + data: The data to sign (will be UTF-8 encoded) + private_key_hex: Hex-encoded private key from generate_signing_keypair + + Returns: + Hex-encoded signature + """ + private_bytes = bytes.fromhex(private_key_hex) + private_key = Ed25519PrivateKey.from_private_bytes(private_bytes) + + signature = private_key.sign(data.encode('utf-8')) + return signature.hex() + + +def verify_signature(data: str, signature_hex: str, public_key_hex: str) -> bool: + """ + Verify a signature against data and public key. + + Args: + data: The original data that was signed + signature_hex: Hex-encoded signature + public_key_hex: Hex-encoded public key + + Returns: + True if signature is valid, False otherwise + """ + try: + public_bytes = bytes.fromhex(public_key_hex) + public_key = Ed25519PublicKey.from_public_bytes(public_bytes) + + signature = bytes.fromhex(signature_hex) + public_key.verify(signature, data.encode('utf-8')) + return True + except Exception: + return False + + +def key_id_from_public_key(public_key_hex: str) -> str: + """ + Extract the keyId from a public key hex string. + + Args: + public_key_hex: Hex-encoded public key + + Returns: + Short key ID (last 32 hex chars) + """ + return public_key_hex[-32:] diff --git a/plugins/web4-governance/governance/soft_lct.py b/plugins/web4-governance/governance/soft_lct.py new file mode 100644 index 0000000000..6c93607d6a --- /dev/null +++ b/plugins/web4-governance/governance/soft_lct.py @@ -0,0 +1,209 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Soft LCT +# https://github.com/dp-web4/web4 +""" +Software-only Linked Context Token (Soft LCT). + +This is a lightweight implementation of LCT for environments without +TPM or hardware security modules. It provides: +- Machine + user derived identity +- Stable identity across sessions on same machine +- Clear indication of binding type (software vs hardware) + +Trust interpretation is up to the relying party. A soft LCT indicates +the session was initiated from a particular machine/user combination, +but without hardware attestation. + +For hardware-bound LCTs with TPM attestation, see Hardbound. +""" + +import os +import hashlib +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, Dict + +from .ledger import Ledger + + +class SoftLCT: + """Software-bound Linked Context Token.""" + + def __init__(self, ledger: Optional[Ledger] = None): + """ + Initialize soft LCT manager. + + Args: + ledger: Ledger instance for persistence. Creates one if not provided. + """ + self.ledger = ledger or Ledger() + self._lct_cache: Optional[Dict] = None + + def _get_machine_hash(self) -> str: + """Get stable machine identifier hash.""" + # Use nodename as primary identifier + # In full implementation, could include more hardware identifiers + try: + nodename = os.uname().nodename + except AttributeError: + # Windows fallback + import socket + nodename = socket.gethostname() + + return hashlib.sha256(nodename.encode()).hexdigest()[:16] + + def _get_user_hash(self) -> str: + """Get stable user identifier hash.""" + try: + uid = str(os.getuid()) + except AttributeError: + # Windows fallback + uid = os.environ.get("USERNAME", "unknown") + + return hashlib.sha256(uid.encode()).hexdigest()[:16] + + def _generate_lct_id(self) -> str: + """Generate a new soft LCT ID.""" + machine = self._get_machine_hash() + user = self._get_user_hash() + timestamp = datetime.now(timezone.utc).isoformat() + + # Create unique token combining machine, user, and time + seed = f"{machine}:{user}:{timestamp}" + token_hash = hashlib.sha256(seed.encode()).hexdigest()[:12] + + # LCT format: web4:soft::: + return f"web4:soft:{machine[:8]}:{user[:8]}:{token_hash}" + + def get_or_create(self) -> Dict: + """ + Get existing LCT for this machine/user or create new one. + + Returns the same LCT for the same machine/user combination, + providing stable identity across sessions. + """ + if self._lct_cache: + return self._lct_cache + + machine_hash = self._get_machine_hash() + user_hash = self._get_user_hash() + + # Check for existing identity + existing = self._find_existing_identity(machine_hash, user_hash) + if existing: + self._lct_cache = existing + return existing + + # Create new identity + lct_id = self._generate_lct_id() + now = datetime.now(timezone.utc).isoformat() + "Z" + + identity = { + "lct_id": lct_id, + "machine_hash": machine_hash, + "user_hash": user_hash, + "binding": "software", + "created_at": now, + "trust_note": "Software-bound identity without hardware attestation" + } + + # Register in ledger + self.ledger.register_identity( + lct_id=lct_id, + machine_hash=machine_hash, + user_hash=user_hash, + binding="software", + metadata={"trust_note": identity["trust_note"]} + ) + + self._lct_cache = identity + return identity + + def _find_existing_identity(self, machine_hash: str, user_hash: str) -> Optional[Dict]: + """Find existing identity for machine/user combination.""" + # Query ledger for matching identity + # In SQLite, we need to search - this is a simplified implementation + import sqlite3 + + with sqlite3.connect(self.ledger.db_path) as conn: + conn.row_factory = sqlite3.Row + row = conn.execute(""" + SELECT * FROM identities + WHERE machine_hash = ? AND user_hash = ? AND binding = 'software' + ORDER BY created_at DESC LIMIT 1 + """, (machine_hash, user_hash)).fetchone() + + if row: + identity = dict(row) + identity["trust_note"] = "Software-bound identity without hardware attestation" + return identity + + return None + + def get_current(self) -> Optional[Dict]: + """Get current LCT without creating if none exists.""" + if self._lct_cache: + return self._lct_cache + + machine_hash = self._get_machine_hash() + user_hash = self._get_user_hash() + + return self._find_existing_identity(machine_hash, user_hash) + + def get_token_id(self) -> str: + """Get the LCT ID string.""" + identity = self.get_or_create() + return identity["lct_id"] + + def verify_local(self) -> Dict: + """ + Verify this is the expected machine/user. + + Returns verification result with confidence indicators. + """ + identity = self.get_current() + if not identity: + return { + "verified": False, + "reason": "no_identity", + "confidence": 0.0 + } + + current_machine = self._get_machine_hash() + current_user = self._get_user_hash() + + machine_match = identity["machine_hash"] == current_machine + user_match = identity["user_hash"] == current_user + + if machine_match and user_match: + return { + "verified": True, + "lct_id": identity["lct_id"], + "binding": "software", + "confidence": 0.7, # Software binding = moderate confidence + "note": "Verified against local machine/user. No hardware attestation." + } + else: + return { + "verified": False, + "reason": "mismatch", + "machine_match": machine_match, + "user_match": user_match, + "confidence": 0.0 + } + + def to_header(self) -> str: + """ + Get LCT formatted for use in headers or logging. + + Format: web4:soft:XXXXXXXX (shortened for display) + """ + lct_id = self.get_token_id() + # Return shortened version for display + parts = lct_id.split(":") + if len(parts) >= 5: + return f"{parts[0]}:{parts[1]}:{parts[4]}" + return lct_id diff --git a/plugins/web4-governance/governance/target_extraction.py b/plugins/web4-governance/governance/target_extraction.py new file mode 100644 index 0000000000..ed420fd5e5 --- /dev/null +++ b/plugins/web4-governance/governance/target_extraction.py @@ -0,0 +1,256 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Lightweight Governance - Multi-Target Extraction +# https://github.com/dp-web4/web4 +""" +Multi-Target Extraction for Bash Commands and Task Prompts. + +Extracts all identifiable targets (file paths, URLs, patterns) from +tool parameters for comprehensive audit trails and policy evaluation. + +Usage: + from governance.target_extraction import extract_targets, extract_target + + # Extract primary target + target = extract_target("Bash", {"command": "cat /etc/passwd"}) + + # Extract all targets (for multi-file operations) + targets = extract_targets("Bash", {"command": "rm -rf /tmp/a /tmp/b"}) + # Returns: ["/tmp/a", "/tmp/b"] +""" + +import re +from typing import Dict, Any, List, Optional + + +# Credential file patterns +CREDENTIAL_PATTERNS = [ + re.compile(r"\.env$", re.IGNORECASE), + re.compile(r"\.env\.[^/]+$", re.IGNORECASE), + re.compile(r"credentials\.[^/]+$", re.IGNORECASE), + re.compile(r"secrets?\.[^/]+$", re.IGNORECASE), + re.compile(r"\.aws/credentials$", re.IGNORECASE), + re.compile(r"\.ssh/id_[^/]+$", re.IGNORECASE), + re.compile(r"\.ssh/known_hosts$", re.IGNORECASE), + re.compile(r"\.netrc$", re.IGNORECASE), + re.compile(r"\.pgpass$", re.IGNORECASE), + re.compile(r"\.npmrc$", re.IGNORECASE), + re.compile(r"\.pypirc$", re.IGNORECASE), + re.compile(r"token[^/]*\.json$", re.IGNORECASE), + re.compile(r"auth[^/]*\.json$", re.IGNORECASE), + re.compile(r"apikey[^/]*$", re.IGNORECASE), + re.compile(r"\.docker/config\.json$", re.IGNORECASE), + re.compile(r"\.kube/config$", re.IGNORECASE), + re.compile(r"\.gnupg/", re.IGNORECASE), + re.compile(r"\.gpg/", re.IGNORECASE), +] + +# Memory file patterns +MEMORY_FILE_PATTERNS = [ + re.compile(r"MEMORY\.md$", re.IGNORECASE), + re.compile(r"memory\.md$", re.IGNORECASE), + re.compile(r"/memory/[^/]+\.md$", re.IGNORECASE), + re.compile(r"\.web4/.*memory", re.IGNORECASE), + re.compile(r"\.claude/.*memory", re.IGNORECASE), +] + + +def is_credential_target(target: Optional[str]) -> bool: + """ + Check if a target path matches credential file patterns. + + Args: + target: File path or target string + + Returns: + True if target matches a credential pattern + """ + if not target: + return False + return any(pattern.search(target) for pattern in CREDENTIAL_PATTERNS) + + +def is_memory_target(target: Optional[str]) -> bool: + """ + Check if a target path matches memory file patterns. + + Args: + target: File path or target string + + Returns: + True if target matches a memory file pattern + """ + if not target: + return False + return any(pattern.search(target) for pattern in MEMORY_FILE_PATTERNS) + + +def extract_target(tool_name: str, params: Dict[str, Any]) -> Optional[str]: + """ + Extract the primary target from tool parameters. + + Args: + tool_name: Name of the tool (e.g., "Bash", "Read") + params: Tool parameters dictionary + + Returns: + Primary target string, or None if not identifiable + """ + if params.get("file_path"): + return str(params["file_path"]) + if params.get("path"): + return str(params["path"]) + if params.get("pattern"): + return str(params["pattern"]) + if params.get("command"): + cmd = str(params["command"]) + return cmd[:80] + "..." if len(cmd) > 80 else cmd + if params.get("url"): + return str(params["url"]) + return None + + +def extract_targets(tool_name: str, params: Dict[str, Any]) -> List[str]: + """ + Extract all targets from tool parameters for multi-file operations. + + Returns an array of all identifiable targets (paths, patterns, URLs). + Useful for comprehensive audit trails and policy evaluation. + + Args: + tool_name: Name of the tool + params: Tool parameters dictionary + + Returns: + List of unique target strings + """ + targets: List[str] = [] + + # Direct file paths + if params.get("file_path"): + targets.append(str(params["file_path"])) + if params.get("path"): + targets.append(str(params["path"])) + + # Glob patterns (may match multiple files) + if params.get("pattern"): + targets.append(str(params["pattern"])) + + # URLs + if params.get("url"): + targets.append(str(params["url"])) + + # Bash commands - extract file paths from command string + if params.get("command") and tool_name == "Bash": + cmd = str(params["command"]) + extracted = _extract_paths_from_command(cmd) + targets.extend(extracted) + + # Task tool - check for file references in prompt + if params.get("prompt") and tool_name == "Task": + prompt = str(params["prompt"]) + extracted = _extract_paths_from_text(prompt) + targets.extend(extracted) + + # Grep tool - may have additional glob context + if params.get("glob") and tool_name == "Grep": + targets.append(str(params["glob"])) + + # Deduplicate and return + return list(dict.fromkeys(targets)) # Preserves order, removes duplicates + + +def _extract_paths_from_command(cmd: str) -> List[str]: + """ + Extract file paths from a bash command string. + + Identifies common path patterns in commands. + """ + paths: List[str] = [] + + # Match absolute paths + for match in re.finditer(r"(?:^|\s)(/[^\s;|&<>'\"]+)", cmd): + path = match.group(1) + # Filter out common non-file paths + if not any(path.startswith(prefix) for prefix in ["/dev/", "/proc/", "/sys/"]): + paths.append(path) + + # Match relative paths with common extensions + for match in re.finditer(r"(?:^|\s)(\.{0,2}/[^\s;|&<>'\"]+\.[a-zA-Z0-9]+)", cmd): + paths.append(match.group(1)) + + # Match home directory paths + for match in re.finditer(r"(?:^|\s)(~/[^\s;|&<>'\"]+)", cmd): + paths.append(match.group(1)) + + return paths + + +def _extract_paths_from_text(text: str) -> List[str]: + """ + Extract file paths mentioned in text (e.g., Task prompts). + + Looks for path-like patterns in quotes, backticks, or standalone. + """ + paths: List[str] = [] + + # Match paths in backticks or quotes + for match in re.finditer(r"[`\"']([/~][^`\"'\s]+)[`\"']", text): + paths.append(match.group(1)) + + # Match standalone absolute paths with extensions + for match in re.finditer(r"\s(/[^\s,;:]+\.[a-zA-Z0-9]+)", text): + paths.append(match.group(1)) + + return paths + + +# Tool category mapping +TOOL_CATEGORIES = { + "Read": "file_read", + "Glob": "file_read", + "Grep": "file_read", + "Write": "file_write", + "Edit": "file_write", + "NotebookEdit": "file_write", + "Bash": "command", + "WebFetch": "network", + "WebSearch": "network", + "Task": "delegation", + "TodoWrite": "state", +} + + +def classify_tool(tool_name: str) -> str: + """ + Classify a tool into a category. + + Args: + tool_name: Name of the tool + + Returns: + Category string (file_read, file_write, command, network, delegation, state, unknown) + """ + return TOOL_CATEGORIES.get(tool_name, "unknown") + + +def classify_tool_with_target(tool_name: str, target: Optional[str]) -> str: + """ + Classify tool with target context - may upgrade to credential_access + if the target matches credential file patterns. + + Args: + tool_name: Name of the tool + target: Target file path or string + + Returns: + Category string, possibly upgraded to "credential_access" + """ + base_category = TOOL_CATEGORIES.get(tool_name, "unknown") + + # Upgrade file_read/file_write to credential_access if target matches patterns + if base_category in ("file_read", "file_write") and is_credential_target(target): + return "credential_access" + + return base_category diff --git a/plugins/web4-governance/governance/tensors.py b/plugins/web4-governance/governance/tensors.py new file mode 100644 index 0000000000..26a3a84f9e --- /dev/null +++ b/plugins/web4-governance/governance/tensors.py @@ -0,0 +1,486 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Tensor Definitions +# https://github.com/dp-web4/web4 +""" +Canonical T3/V3 Tensor Structure. + +The tensors are FRACTAL: +- Base: 3 dimensions each +- Each dimension expands to implementation-specific subdimensions +- Full implementation: RDF-bound links to LCTs for roles, entities + +## T3 Trust Tensor (Base 3D) + +Per spec (t3-v3-tensors.md): +- Talent: Role-specific capability, natural aptitude +- Training: Role-specific expertise, learned skills +- Temperament: Role-contextual reliability, consistency + +## V3 Value Tensor (Base 3D) + +Per spec: +- Valuation: Subjective worth, perceived value +- Veracity: Objective accuracy, truthfulness +- Validity: Confirmed transfer, actual delivery + +## Subdimension Mapping + +This implementation expands each base dimension into 2 subdimensions +for finer-grained tracking: + +T3: + Talent → competence (can do), alignment (values fit) + Training → lineage (history), witnesses (validation) + Temperament → reliability (consistency), consistency (quality) + +V3: + Valuation → reputation (perception), contribution (value added) + Veracity → stewardship (care), energy (effort) + Validity → network (reach), temporal (time-based) + +## Role Context + +CRITICAL: T3/V3 tensors are NEVER absolute properties. +They exist only within role contexts. An entity trusted as +a surgeon has no inherent trust as a mechanic. + +In full implementation, each tensor binds to: +- Entity LCT (who) +- Role LCT (in what capacity) +- Context (when/where) + +Example RDF: + _:tensor1 a web4:T3Tensor ; + web4:entity lct:alice ; + web4:role web4:Surgeon ; + web4:talent 0.95 ; + web4:training 0.92 ; + web4:temperament 0.88 . +""" + +from dataclasses import dataclass, field +from typing import Optional, Dict, List +from datetime import datetime, timezone + + +# ============================================================================= +# Base 3D Tensors (Canonical) +# ============================================================================= + +@dataclass +class T3Base: + """ + T3 Trust Tensor - Base 3 Dimensions. + + Per Web4 spec, these are the canonical dimensions: + - Talent: Role-specific capability + - Training: Role-specific expertise + - Temperament: Role-contextual reliability + """ + talent: float = 0.5 + training: float = 0.5 + temperament: float = 0.5 + + def composite(self) -> float: + """ + Weighted composite score per spec. + Formula: talent * 0.3 + training * 0.4 + temperament * 0.3 + """ + return self.talent * 0.3 + self.training * 0.4 + self.temperament * 0.3 + + def level(self) -> str: + """Categorical trust level.""" + score = self.composite() + if score >= 0.8: + return "high" + elif score >= 0.6: + return "medium-high" + elif score >= 0.4: + return "medium" + elif score >= 0.2: + return "low" + return "minimal" + + +@dataclass +class V3Base: + """ + V3 Value Tensor - Base 3 Dimensions. + + Per Web4 spec, these are the canonical dimensions: + - Valuation: Subjective worth + - Veracity: Objective accuracy + - Validity: Confirmed transfer + """ + valuation: float = 0.5 + veracity: float = 0.5 + validity: float = 0.5 + + def composite(self) -> float: + """Weighted composite value score.""" + return (self.valuation + self.veracity + self.validity) / 3 + + +# ============================================================================= +# Subdimension Expansions +# ============================================================================= + +@dataclass +class TalentSubdims: + """Subdimensions of Talent.""" + competence: float = 0.5 # Can they do it? + alignment: float = 0.5 # Values match context? + + def aggregate(self) -> float: + return (self.competence + self.alignment) / 2 + + +@dataclass +class TrainingSubdims: + """Subdimensions of Training.""" + lineage: float = 0.5 # Track record / history + witnesses: float = 0.5 # Corroborated by others? + + def aggregate(self) -> float: + return (self.lineage + self.witnesses) / 2 + + +@dataclass +class TemperamentSubdims: + """Subdimensions of Temperament.""" + reliability: float = 0.5 # Will they do it consistently? + consistency: float = 0.5 # Same quality over time? + + def aggregate(self) -> float: + return (self.reliability + self.consistency) / 2 + + +@dataclass +class ValuationSubdims: + """Subdimensions of Valuation.""" + reputation: float = 0.5 # External perception + contribution: float = 0.5 # Value added + + def aggregate(self) -> float: + return (self.reputation + self.contribution) / 2 + + +@dataclass +class VeracitySubdims: + """Subdimensions of Veracity.""" + stewardship: float = 0.5 # Care for resources + energy: float = 0.5 # Effort invested + + def aggregate(self) -> float: + return (self.stewardship + self.energy) / 2 + + +@dataclass +class ValiditySubdims: + """Subdimensions of Validity.""" + network: float = 0.5 # Connections / reach + temporal: float = 0.5 # Time-based accumulation + + def aggregate(self) -> float: + return (self.network + self.temporal) / 2 + + +# ============================================================================= +# Full Fractal Tensors +# ============================================================================= + +@dataclass +class T3Tensor: + """ + Full T3 Trust Tensor with subdimensions. + + Structure: + T3 (base 3D) + ├── Talent + │ ├── competence + │ └── alignment + ├── Training + │ ├── lineage + │ └── witnesses + └── Temperament + ├── reliability + └── consistency + """ + # Subdimensions (the 6D flattened view) + talent_sub: TalentSubdims = field(default_factory=TalentSubdims) + training_sub: TrainingSubdims = field(default_factory=TrainingSubdims) + temperament_sub: TemperamentSubdims = field(default_factory=TemperamentSubdims) + + # Role context (for full implementation) + role: Optional[str] = None + entity: Optional[str] = None + + @property + def talent(self) -> float: + """Base Talent dimension (aggregate of subdimensions).""" + return self.talent_sub.aggregate() + + @property + def training(self) -> float: + """Base Training dimension (aggregate of subdimensions).""" + return self.training_sub.aggregate() + + @property + def temperament(self) -> float: + """Base Temperament dimension (aggregate of subdimensions).""" + return self.temperament_sub.aggregate() + + def base(self) -> T3Base: + """Get base 3D tensor.""" + return T3Base( + talent=self.talent, + training=self.training, + temperament=self.temperament + ) + + def composite(self) -> float: + """Weighted composite per spec.""" + return self.talent * 0.3 + self.training * 0.4 + self.temperament * 0.3 + + def level(self) -> str: + """Categorical trust level.""" + return self.base().level() + + # Convenience accessors for 6D flattened view + @property + def competence(self) -> float: + return self.talent_sub.competence + + @property + def alignment(self) -> float: + return self.talent_sub.alignment + + @property + def lineage(self) -> float: + return self.training_sub.lineage + + @property + def witnesses(self) -> float: + return self.training_sub.witnesses + + @property + def reliability(self) -> float: + return self.temperament_sub.reliability + + @property + def consistency(self) -> float: + return self.temperament_sub.consistency + + def update_from_outcome(self, success: bool, is_novel: bool = False): + """ + Update tensor from outcome per Web4 spec. + + | Outcome | Talent Impact | Training Impact | Temperament Impact | + |-----------------|---------------|-----------------|-------------------| + | Novel Success | +0.02 to +0.05| +0.01 to +0.02 | +0.01 | + | Standard Success| 0 | +0.005 to +0.01 | +0.005 | + | Failure | -0.02 | -0.01 | -0.02 | + """ + clamp = lambda v: max(0.0, min(1.0, v)) + + if success: + if is_novel: + # Novel success: all dimensions improve + self.talent_sub.competence = clamp(self.talent_sub.competence + 0.03) + self.talent_sub.alignment = clamp(self.talent_sub.alignment + 0.02) + self.training_sub.lineage = clamp(self.training_sub.lineage + 0.015) + self.training_sub.witnesses = clamp(self.training_sub.witnesses + 0.01) + self.temperament_sub.reliability = clamp(self.temperament_sub.reliability + 0.01) + self.temperament_sub.consistency = clamp(self.temperament_sub.consistency + 0.01) + else: + # Standard success: training and temperament improve + self.training_sub.lineage = clamp(self.training_sub.lineage + 0.008) + self.training_sub.witnesses = clamp(self.training_sub.witnesses + 0.005) + self.temperament_sub.reliability = clamp(self.temperament_sub.reliability + 0.005) + self.temperament_sub.consistency = clamp(self.temperament_sub.consistency + 0.005) + else: + # Failure: all dimensions decrease + self.talent_sub.competence = clamp(self.talent_sub.competence - 0.02) + self.talent_sub.alignment = clamp(self.talent_sub.alignment - 0.01) + self.training_sub.lineage = clamp(self.training_sub.lineage - 0.01) + self.training_sub.witnesses = clamp(self.training_sub.witnesses - 0.01) + self.temperament_sub.reliability = clamp(self.temperament_sub.reliability - 0.02) + self.temperament_sub.consistency = clamp(self.temperament_sub.consistency - 0.02) + + def to_dict(self) -> dict: + """Serialize to dict (6D flattened view for compatibility).""" + return { + # Base dimensions (computed) + "talent": self.talent, + "training": self.training, + "temperament": self.temperament, + # Subdimensions (stored) + "competence": self.competence, + "alignment": self.alignment, + "lineage": self.lineage, + "witnesses": self.witnesses, + "reliability": self.reliability, + "consistency": self.consistency, + # Context + "role": self.role, + "entity": self.entity, + # Computed + "composite": self.composite(), + "level": self.level(), + } + + @classmethod + def from_dict(cls, data: dict) -> 'T3Tensor': + """Deserialize from dict.""" + tensor = cls( + role=data.get("role"), + entity=data.get("entity"), + ) + # Load subdimensions + if "competence" in data: + tensor.talent_sub.competence = data["competence"] + if "alignment" in data: + tensor.talent_sub.alignment = data["alignment"] + if "lineage" in data: + tensor.training_sub.lineage = data["lineage"] + if "witnesses" in data: + tensor.training_sub.witnesses = data["witnesses"] + if "reliability" in data: + tensor.temperament_sub.reliability = data["reliability"] + if "consistency" in data: + tensor.temperament_sub.consistency = data["consistency"] + return tensor + + @classmethod + def from_6d(cls, competence: float = 0.5, reliability: float = 0.5, + consistency: float = 0.5, witnesses: float = 0.5, + lineage: float = 0.5, alignment: float = 0.5, + role: Optional[str] = None, entity: Optional[str] = None) -> 'T3Tensor': + """Create from legacy 6D format.""" + tensor = cls(role=role, entity=entity) + tensor.talent_sub.competence = competence + tensor.talent_sub.alignment = alignment + tensor.training_sub.lineage = lineage + tensor.training_sub.witnesses = witnesses + tensor.temperament_sub.reliability = reliability + tensor.temperament_sub.consistency = consistency + return tensor + + +@dataclass +class V3Tensor: + """ + Full V3 Value Tensor with subdimensions. + + Structure: + V3 (base 3D) + ├── Valuation + │ ├── reputation + │ └── contribution + ├── Veracity + │ ├── stewardship + │ └── energy + └── Validity + ├── network + └── temporal + """ + valuation_sub: ValuationSubdims = field(default_factory=ValuationSubdims) + veracity_sub: VeracitySubdims = field(default_factory=VeracitySubdims) + validity_sub: ValiditySubdims = field(default_factory=ValiditySubdims) + + @property + def valuation(self) -> float: + return self.valuation_sub.aggregate() + + @property + def veracity(self) -> float: + return self.veracity_sub.aggregate() + + @property + def validity(self) -> float: + return self.validity_sub.aggregate() + + def base(self) -> V3Base: + return V3Base( + valuation=self.valuation, + veracity=self.veracity, + validity=self.validity + ) + + def composite(self) -> float: + return (self.valuation + self.veracity + self.validity) / 3 + + # Convenience accessors for 6D flattened view + @property + def reputation(self) -> float: + return self.valuation_sub.reputation + + @property + def contribution(self) -> float: + return self.valuation_sub.contribution + + @property + def stewardship(self) -> float: + return self.veracity_sub.stewardship + + @property + def energy(self) -> float: + return self.veracity_sub.energy + + @property + def network(self) -> float: + return self.validity_sub.network + + @property + def temporal(self) -> float: + return self.validity_sub.temporal + + +# ============================================================================= +# Migration Helpers +# ============================================================================= + +def migrate_legacy_t3(legacy: dict) -> T3Tensor: + """ + Migrate from legacy 6D format to fractal tensor. + + Legacy format: + competence, reliability, consistency, witnesses, lineage, alignment + + Maps to: + Talent ← competence, alignment + Training ← lineage, witnesses + Temperament ← reliability, consistency + """ + return T3Tensor.from_6d( + competence=legacy.get("competence", 0.5), + reliability=legacy.get("reliability", 0.5), + consistency=legacy.get("consistency", 0.5), + witnesses=legacy.get("witnesses", 0.5), + lineage=legacy.get("lineage", 0.5), + alignment=legacy.get("alignment", 0.5), + ) + + +def migrate_legacy_v3(legacy: dict) -> V3Tensor: + """ + Migrate from legacy 6D format to fractal tensor. + + Legacy format: + energy, contribution, stewardship, network, reputation, temporal + + Maps to: + Valuation ← reputation, contribution + Veracity ← stewardship, energy + Validity ← network, temporal + """ + tensor = V3Tensor() + tensor.valuation_sub.reputation = legacy.get("reputation", 0.5) + tensor.valuation_sub.contribution = legacy.get("contribution", 0.5) + tensor.veracity_sub.stewardship = legacy.get("stewardship", 0.5) + tensor.veracity_sub.energy = legacy.get("energy", 0.5) + tensor.validity_sub.network = legacy.get("network", 0.5) + tensor.validity_sub.temporal = legacy.get("temporal", 0.5) + return tensor diff --git a/plugins/web4-governance/governance/trust_backend.py b/plugins/web4-governance/governance/trust_backend.py new file mode 100644 index 0000000000..ccae540095 --- /dev/null +++ b/plugins/web4-governance/governance/trust_backend.py @@ -0,0 +1,231 @@ +""" +Trust Backend Bridge Module + +Provides a unified interface to trust primitives, using the Rust backend +(web4_trust) when available, falling back to pure Python implementation. + +The Rust backend provides: +- 10-50x faster tensor operations +- 2-5x lower memory usage +- Same JSON format compatibility + +Usage: + from governance.trust_backend import EntityTrust, TrustStore, T3Tensor, V3Tensor + from governance.trust_backend import get_backend_info, RUST_BACKEND +""" + +import os +from typing import Optional, Dict, Any, List + +# Check for environment variable to force Python backend +FORCE_PYTHON_BACKEND = os.environ.get('WEB4_FORCE_PYTHON_BACKEND', '').lower() in ('true', '1', 'yes') + +# Try to import Rust backend +RUST_BACKEND = False +_backend_error = None + +if not FORCE_PYTHON_BACKEND: + try: + from web4_trust import ( + EntityTrust as _RustEntityTrust, + T3Tensor as _RustT3Tensor, + V3Tensor as _RustV3Tensor, + TrustStore as _RustTrustStore, + create_memory_store as _rust_create_memory_store, + ) + RUST_BACKEND = True + except ImportError as e: + _backend_error = str(e) + +# Import Python fallback +from .entity_trust import EntityTrust as _PyEntityTrust + +# Python implementation doesn't have separate tensor classes +# We create simple wrappers for API compatibility +class _PyT3Tensor: + """Python fallback T3 Tensor wrapper.""" + def __init__(self, competence=0.5, reliability=0.5, consistency=0.5, + witnesses=0.5, lineage=0.5, alignment=0.5): + self.competence = competence + self.reliability = reliability + self.consistency = consistency + self.witnesses = witnesses + self.lineage = lineage + self.alignment = alignment + + def average(self): + return (self.competence + self.reliability + self.consistency + + self.witnesses + self.lineage + self.alignment) / 6.0 + + @classmethod + def neutral(cls): + return cls() + + +class _PyV3Tensor: + """Python fallback V3 Tensor wrapper.""" + def __init__(self, energy=0.5, contribution=0.5, stewardship=0.5, + network=0.5, reputation=0.5, temporal=0.5): + self.energy = energy + self.contribution = contribution + self.stewardship = stewardship + self.network = network + self.reputation = reputation + self.temporal = temporal + + def average(self): + return (self.energy + self.contribution + self.stewardship + + self.network + self.reputation + self.temporal) / 6.0 + + @classmethod + def neutral(cls): + return cls() + +# Select backend +if RUST_BACKEND: + EntityTrust = _RustEntityTrust + T3Tensor = _RustT3Tensor + V3Tensor = _RustV3Tensor + TrustStore = _RustTrustStore + + def create_memory_store(): + """Create an in-memory trust store (Rust backend).""" + return _rust_create_memory_store() +else: + EntityTrust = _PyEntityTrust + T3Tensor = _PyT3Tensor + V3Tensor = _PyV3Tensor + + # Python fallback for TrustStore + class TrustStore: + """Simple file-based trust store (Python fallback).""" + + def __init__(self, base_dir: str): + self.base_dir = base_dir + self._entities: Dict[str, Any] = {} + os.makedirs(base_dir, exist_ok=True) + self._load_existing() + + def _load_existing(self): + """Load existing entities from disk.""" + import json + import hashlib + + if not os.path.exists(self.base_dir): + return + + for filename in os.listdir(self.base_dir): + if filename.endswith('.json'): + filepath = os.path.join(self.base_dir, filename) + try: + with open(filepath, 'r') as f: + data = json.load(f) + entity_id = data.get('entity_id') + if entity_id: + entity = _PyEntityTrust(entity_id) + entity.from_dict(data) + self._entities[entity_id] = entity + except Exception: + pass + + def _entity_path(self, entity_id: str) -> str: + """Get file path for entity.""" + import hashlib + hash_id = hashlib.sha256(entity_id.encode()).hexdigest()[:16] + return os.path.join(self.base_dir, f"{hash_id}.json") + + def get(self, entity_id: str) -> Any: + """Get entity by ID.""" + if entity_id in self._entities: + return self._entities[entity_id] + raise KeyError(f"Entity not found: {entity_id}") + + def get_or_create(self, entity_id: str) -> Any: + """Get entity or create if not exists.""" + if entity_id not in self._entities: + self._entities[entity_id] = _PyEntityTrust(entity_id) + return self._entities[entity_id] + + def save(self, entity: Any): + """Save entity to store.""" + import json + self._entities[entity.entity_id] = entity + filepath = self._entity_path(entity.entity_id) + with open(filepath, 'w') as f: + json.dump(entity.to_dict(), f, indent=2, default=str) + + def delete(self, entity_id: str) -> bool: + """Delete entity from store.""" + if entity_id in self._entities: + del self._entities[entity_id] + filepath = self._entity_path(entity_id) + if os.path.exists(filepath): + os.remove(filepath) + return True + return False + + def exists(self, entity_id: str) -> bool: + """Check if entity exists.""" + return entity_id in self._entities + + def list_entities(self) -> List[str]: + """List all entity IDs.""" + return list(self._entities.keys()) + + def count(self) -> int: + """Count entities in store.""" + return len(self._entities) + + def create_memory_store(): + """Create an in-memory trust store (Python fallback).""" + import tempfile + return TrustStore(tempfile.mkdtemp()) + + +def get_backend_info() -> Dict[str, Any]: + """Get information about the active backend.""" + return { + "backend": "rust" if RUST_BACKEND else "python", + "version": "0.1.0", + "rust_available": RUST_BACKEND, + "forced_python": FORCE_PYTHON_BACKEND, + "error": _backend_error if not RUST_BACKEND else None, + } + + +def verify_backend() -> bool: + """Verify the backend is working correctly.""" + try: + # Create test entity + e = EntityTrust("test:verify") + e.update_from_outcome(True, 0.1) + + # Verify basic operations + assert e.entity_id == "test:verify", f"entity_id mismatch: {e.entity_id}" + + # t3_average may be < 0.5 due to decay or update mechanics + t3_avg = e.t3_average() + assert 0.0 <= t3_avg <= 1.0, f"t3_average out of range: {t3_avg}" + + # trust_level returns lowercase string + level = e.trust_level() + valid_levels = ["very_low", "low", "medium", "high", "very_high"] + assert level.lower() in valid_levels, f"trust_level invalid: {level}" + + return True + except Exception as ex: + print(f"Backend verification failed: {ex}") + return False + + +# Export all +__all__ = [ + 'EntityTrust', + 'T3Tensor', + 'V3Tensor', + 'TrustStore', + 'create_memory_store', + 'get_backend_info', + 'verify_backend', + 'RUST_BACKEND', +] diff --git a/plugins/web4-governance/hooks/heartbeat.py b/plugins/web4-governance/hooks/heartbeat.py new file mode 100644 index 0000000000..77678b6239 --- /dev/null +++ b/plugins/web4-governance/hooks/heartbeat.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Governance Plugin - Heartbeat Ledger +# https://github.com/dp-web4/web4 + +""" +Lightweight Heartbeat Ledger for Claude Code Sessions + +Provides timing-based coherence tracking for audit trails. +Now backed by SQLite via the governance Ledger for unified storage. + +Records: +- Session activity heartbeats (each tool call) +- Timing validation (on_time, early, late, gap) +- Hash-linked chain for integrity verification +- Timing coherence score for overall session health +""" + +import sys +import hashlib +from datetime import datetime, timedelta +from pathlib import Path +from typing import Optional, List, Dict, Tuple + +# Add parent directory to path for governance import +sys.path.insert(0, str(Path(__file__).parent.parent)) +from governance import Ledger +from governance.presence import get_tracker + + +# Configuration +EXPECTED_INTERVAL = timedelta(seconds=60) # Expected time between tool calls +JITTER_TOLERANCE = 0.5 # 50% tolerance + + +class SessionHeartbeat: + """ + Heartbeat tracker for a single session. + + Records each tool call as a heartbeat, creating a timing-auditable + chain of activity. Now uses SQLite via Ledger for persistence. + """ + + def __init__(self, session_id: str, ledger: Optional[Ledger] = None): + self.session_id = session_id + self.ledger = ledger or Ledger() + self._last_entry: Optional[dict] = None + self._loaded = False + + def _load(self): + """Load last entry from database.""" + if self._loaded: + return + + self._last_entry = self.ledger.get_last_heartbeat(self.session_id) + self._loaded = True + + def record(self, tool_name: str, action_index: int) -> dict: + """ + Record a heartbeat for a tool call. + + Args: + tool_name: Name of the tool being called + action_index: Sequential action number + + Returns: + Heartbeat entry dict + """ + self._load() + + now = datetime.now() + timestamp = now.isoformat() + + if self._last_entry is None: + # First heartbeat + status = "initial" + delta_seconds = 0.0 + previous_hash = "" + sequence = 1 + else: + # Calculate timing + last_time = datetime.fromisoformat(self._last_entry['timestamp']) + elapsed = now - last_time + delta_seconds = elapsed.total_seconds() + + # Classify timing + expected = EXPECTED_INTERVAL.total_seconds() + min_interval = expected * (1 - JITTER_TOLERANCE) + max_interval = expected * (1 + JITTER_TOLERANCE) + + if delta_seconds < min_interval: + status = "early" + elif delta_seconds <= max_interval: + status = "on_time" + elif delta_seconds <= expected * 3: + status = "late" + else: + status = "gap" + + previous_hash = self._last_entry['entry_hash'] + sequence = self._last_entry['sequence'] + 1 + + # Compute entry hash + hash_input = f"{self.session_id}:{timestamp}:{previous_hash}:{sequence}" + entry_hash = hashlib.sha256(hash_input.encode()).hexdigest()[:32] + + entry = { + 'sequence': sequence, + 'timestamp': timestamp, + 'status': status, + 'delta_seconds': round(delta_seconds, 2), + 'tool_name': tool_name, + 'action_index': action_index, + 'previous_hash': previous_hash, + 'entry_hash': entry_hash + } + + # Store in database + self.ledger.record_heartbeat( + session_id=self.session_id, + sequence=sequence, + timestamp=timestamp, + status=status, + delta_seconds=round(delta_seconds, 2), + tool_name=tool_name, + action_index=action_index, + previous_hash=previous_hash, + entry_hash=entry_hash + ) + + # Record presence heartbeat for session entity + # This enables "silence as signal" detection + try: + tracker = get_tracker() + tracker.heartbeat(f"session:{self.session_id}") + except Exception: + pass # Don't fail heartbeat if presence tracking fails + + self._last_entry = entry + return entry + + def timing_coherence(self, window: int = 10) -> float: + """ + Compute timing coherence score. + + Returns [0.0, 1.0] based on how regular the heartbeats are. + """ + entries = self.ledger.get_heartbeats(self.session_id, limit=window) + + if len(entries) < 2: + return 1.0 + + # Score each entry + scores = [] + for entry in entries: + status = entry['status'] + if status == 'initial': + scores.append(1.0) + elif status == 'on_time': + scores.append(1.0) + elif status == 'early': + scores.append(0.8) # Slightly suspicious + elif status == 'late': + scores.append(0.7) + elif status == 'gap': + scores.append(0.3) + else: + scores.append(0.5) + + # Weighted average (recent scores weighted higher) + total_weight = 0.0 + weighted_sum = 0.0 + for i, score in enumerate(scores): + weight = (i + 1) / len(scores) + weighted_sum += score * weight + total_weight += weight + + return round(weighted_sum / total_weight, 3) if total_weight > 0 else 1.0 + + def verify_chain(self) -> Tuple[bool, Optional[str]]: + """Verify hash chain integrity.""" + entries = self.ledger.get_heartbeats(self.session_id) + + if not entries: + return (True, None) + + for i, entry in enumerate(entries): + if i == 0: + if entry.get('previous_hash'): + return (False, f"First entry has non-empty previous_hash") + continue + + prev = entries[i - 1] + + # Check sequence + if entry['sequence'] != prev['sequence'] + 1: + return (False, f"Sequence gap at {i}") + + # Check hash chain + if entry['previous_hash'] != prev['entry_hash']: + return (False, f"Hash chain broken at {i}") + + # Check timestamp order + if entry['timestamp'] <= prev['timestamp']: + return (False, f"Timestamp order violation at {i}") + + return (True, None) + + def summary(self) -> dict: + """Get heartbeat summary.""" + total = self.ledger.get_heartbeat_count(self.session_id) + status_counts = self.ledger.get_heartbeat_status_distribution(self.session_id) + last_entry = self.ledger.get_last_heartbeat(self.session_id) + + valid, error = self.verify_chain() + + return { + 'session_id': self.session_id, + 'total_heartbeats': total, + 'timing_coherence': self.timing_coherence(), + 'chain_valid': valid, + 'chain_error': error, + 'status_distribution': status_counts, + 'last_heartbeat': last_entry['timestamp'] if last_entry else None + } + + def get_recent(self, count: int = 10) -> List[dict]: + """Get recent heartbeat entries.""" + return self.ledger.get_heartbeats(self.session_id, limit=count) + + +def get_session_heartbeat(session_id: str) -> SessionHeartbeat: + """Get or create heartbeat tracker for session.""" + return SessionHeartbeat(session_id) diff --git a/plugins/web4-governance/hooks/hooks.json b/plugins/web4-governance/hooks/hooks.json new file mode 100644 index 0000000000..e23535b157 --- /dev/null +++ b/plugins/web4-governance/hooks/hooks.json @@ -0,0 +1,40 @@ +{ + "description": "Web4 Governance - R6 Workflow and Audit Trails", + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/hooks/session_start.py", + "timeout": 10 + } + ] + } + ], + "PreToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/hooks/pre_tool_use.py", + "timeout": 5 + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "${CLAUDE_PLUGIN_ROOT}/hooks/post_tool_use.py", + "timeout": 5 + } + ] + } + ] + } +} diff --git a/plugins/web4-governance/hooks/post_tool_use.py b/plugins/web4-governance/hooks/post_tool_use.py new file mode 100755 index 0000000000..fc6d89e857 --- /dev/null +++ b/plugins/web4-governance/hooks/post_tool_use.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Governance Plugin - Post-Tool-Use Hook +# https://github.com/dp-web4/web4 + +""" +Web4 Post-Tool-Use Hook + +Completes the R6 workflow with Result: + + R6 = Rules + Role + Request + Reference + Resource → **Result** + +Creates an audit record that: +- Links to the R6 request (intent) +- Records outcome (success/error) +- Maintains provenance chain +- Enables after-the-fact verification + +## Audit Record Schema + +Each action produces a record with: +- request_id: Links to R6 request +- result_status: success/error +- result_hash: Hash of output (not output itself) +- timestamp: When completed +- chain_link: Hash of previous record (provenance) + +This creates a verifiable chain of actions with structured intent. +""" + +import json +import os +import sys +import hashlib +from datetime import datetime, timezone +from pathlib import Path + +# Import agent governance +sys.path.insert(0, str(Path(__file__).parent.parent)) +try: + from governance import AgentGovernance, EntityTrustStore, PolicyRegistry + GOVERNANCE_AVAILABLE = True +except ImportError: + GOVERNANCE_AVAILABLE = False + EntityTrustStore = None + PolicyRegistry = None + +WEB4_DIR = Path.home() / ".web4" +SESSION_DIR = WEB4_DIR / "sessions" +AUDIT_DIR = WEB4_DIR / "audit" + + +def load_session(session_id): + """Load session state.""" + session_file = SESSION_DIR / f"{session_id}.json" + if not session_file.exists(): + return None + with open(session_file) as f: + return json.load(f) + + +def save_session(session): + """Save session state.""" + session_file = SESSION_DIR / f"{session['session_id']}.json" + with open(session_file, "w") as f: + json.dump(session, f, indent=2) + + +def hash_content(content): + """Create hash of content for audit.""" + if content is None: + return "null" + if isinstance(content, dict): + content = json.dumps(content, sort_keys=True) + elif not isinstance(content, str): + content = str(content) + return hashlib.sha256(content.encode()).hexdigest()[:16] + + +def create_audit_record(session, r6_request, tool_output, tool_error): + """ + Create audit record completing the R6 workflow. + + The audit record links intent (R6 request) to outcome (result). + """ + # Determine result + if tool_error: + status = "error" + result_hash = hash_content(str(tool_error)) + else: + status = "success" + result_hash = hash_content(tool_output) + + # Chain link for provenance + prev_hash = session["audit_chain"][-1] if session["audit_chain"] else "genesis" + + record = { + "record_id": r6_request["id"].replace("r6:", "audit:"), + "timestamp": datetime.now(timezone.utc).isoformat() + "Z", + + # Link to intent + "r6_request_id": r6_request["id"], + "tool": r6_request["request"]["tool"], + "category": r6_request["request"]["category"], + "target": r6_request["request"]["target"], + + # Result (R6 completion) + "result": { + "status": status, + "output_hash": result_hash, + }, + + # Heartbeat timing (from R6 request) + "heartbeat": r6_request.get("heartbeat", {}), + + # Provenance chain + "provenance": { + "session_id": session["session_id"], + "session_token": session["token"]["token_id"], + "action_index": r6_request["role"]["action_index"], + "prev_record_hash": prev_hash + } + } + + # Compute this record's hash for chain + record["record_hash"] = hash_content(record) + + return record + + +def store_audit_record(session, record): + """Store audit record to session log.""" + AUDIT_DIR.mkdir(parents=True, exist_ok=True) + + # Session-specific audit log + audit_file = AUDIT_DIR / f"{session['session_id']}.jsonl" + + with open(audit_file, "a") as f: + f.write(json.dumps(record) + "\n") + + +def main(): + """Post-tool-use hook entry point.""" + try: + raw_input = sys.stdin.read() + input_data = json.loads(raw_input) if raw_input.strip() else {} + except json.JSONDecodeError: + sys.exit(0) + + session_id = input_data.get("session_id", "default") + tool_output = input_data.get("tool_output") + tool_error = input_data.get("tool_error") + + # Load session + session = load_session(session_id) + if not session: + sys.exit(0) + + # Get pending R6 request + r6_request = session.get("pending_r6") + if not r6_request: + sys.exit(0) + + # Create audit record + record = create_audit_record(session, r6_request, tool_output, tool_error) + + # Handle agent completion (Task tool = agent delegation) + if r6_request["request"]["tool"] == "Task" and GOVERNANCE_AVAILABLE: + agent_name = session.get("active_agent") + if agent_name: + try: + gov = AgentGovernance() + success = tool_error is None + trust_update = gov.on_agent_complete(session_id, agent_name, success) + + # Add trust update to audit record + record["agent_completion"] = { + "agent_name": agent_name, + "success": success, + "trust_updated": trust_update.get("trust_updated", {}) + } + + # Clear active agent + session["active_agent"] = None + + except Exception as e: + record["agent_completion"] = {"error": str(e)} + + # Handle MCP tool completion - witness the MCP server + pending_mcp = session.get("pending_mcp") + if pending_mcp and GOVERNANCE_AVAILABLE and EntityTrustStore: + try: + store = EntityTrustStore() + success = tool_error is None + + # Session witnesses the MCP server + session_entity = f"session:{session_id}" + mcp_entity = pending_mcp["entity_id"] + + witness_trust, target_trust = store.witness( + session_entity, mcp_entity, success, magnitude=0.1 + ) + + # Add MCP witnessing to audit record + record["mcp_witnessed"] = { + "server": pending_mcp["server"], + "tool": pending_mcp["tool"], + "success": success, + "t3_after": round(target_trust.t3_average(), 3), + "trust_level": target_trust.trust_level(), + "action_count": target_trust.action_count + } + + # Clear pending MCP + session["pending_mcp"] = None + + except Exception as e: + record["mcp_witnessed"] = {"error": str(e)} + + # Handle policy witnessing - policy witnesses the allowed decision outcome + policy_entity_id = session.get("policy_entity_id") + policy_eval = r6_request.get("policy") + if policy_entity_id and GOVERNANCE_AVAILABLE and PolicyRegistry: + try: + registry = PolicyRegistry() + tool_name = r6_request["request"]["tool"] + success = tool_error is None + decision = policy_eval.get("decision", "allow") if policy_eval else "allow" + + # Policy witnesses this decision's outcome + registry.witness_decision( + policy_entity_id, + session["session_id"], + tool_name, + decision, + success=success, + ) + + # Add policy witnessing to audit record + record["policy_witnessed"] = { + "policy_entity_id": policy_entity_id, + "decision": decision, + "success": success, + "rule_id": policy_eval.get("rule_id") if policy_eval else None, + } + + except Exception as e: + record["policy_witnessed"] = {"error": str(e)} + + # Store audit record + store_audit_record(session, record) + + # Update session + session["r6_requests"].append(r6_request["id"]) + session["audit_chain"].append(record["record_hash"]) + session["pending_r6"] = None + save_session(session) + + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/plugins/web4-governance/hooks/pre_tool_use.py b/plugins/web4-governance/hooks/pre_tool_use.py new file mode 100755 index 0000000000..7d98cd3ecd --- /dev/null +++ b/plugins/web4-governance/hooks/pre_tool_use.py @@ -0,0 +1,697 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Governance Plugin - Pre-Tool-Use Hook +# https://github.com/dp-web4/web4 + +""" +Web4 Pre-Tool-Use Hook + +Implements R6 workflow formalism for every tool call: + + R6 = Rules + Role + Request + Reference + Resource → Result + +This creates a structured, auditable record of intent before execution. + +## R6 Framework + +1. **Rules** - What constraints apply to this action? +2. **Role** - Who is requesting? What's their context? +3. **Request** - What action is being requested? +4. **Reference** - What's the relevant history? +5. **Resource** - What resources are needed/consumed? +6. **Result** - (Completed in post_tool_use) + +The R6 framework provides: +- Structured intent capture +- Audit trail foundation +- Context for trust evaluation +- Basis for policy enforcement +""" + +import json +import os +import sys +import uuid +import hashlib +from datetime import datetime, timezone +from pathlib import Path + +# Import heartbeat tracker +from heartbeat import get_session_heartbeat + +# Import agent governance +sys.path.insert(0, str(Path(__file__).parent.parent)) +try: + from governance import ( + AgentGovernance, + EntityTrustStore, + PolicyRegistry, + PolicyEntity, + resolve_preset, + is_preset_name, + RateLimiter, + ) + GOVERNANCE_AVAILABLE = True +except ImportError: + GOVERNANCE_AVAILABLE = False + EntityTrustStore = None + PolicyRegistry = None + PolicyEntity = None + RateLimiter = None + +# Session-level rate limiter (memory-only, resets on restart) +_rate_limiter = None + + +def get_rate_limiter(): + """Get or create session rate limiter.""" + global _rate_limiter + if _rate_limiter is None and RateLimiter is not None: + _rate_limiter = RateLimiter() + return _rate_limiter + + +def evaluate_policy(session, tool_name: str, category: str, target: str, full_command: str = None): + """ + Evaluate tool call against policy entity. + + Args: + session: Session dict with policy_entity_id + tool_name: Name of the tool (e.g., "Bash", "Write") + category: Tool category (e.g., "command", "file_write") + target: Target of the operation (file path, command, URL) + full_command: For Bash tools, the full command string (enables command_patterns matching) + + Returns: + Tuple of (decision, evaluation_dict) where decision is "allow", "deny", or "warn" + Returns ("allow", None) if no policy or policy unavailable. + """ + if not GOVERNANCE_AVAILABLE or PolicyRegistry is None: + return "allow", None + + policy_entity_id = session.get("policy_entity_id") + if not policy_entity_id: + return "allow", None + + try: + registry = PolicyRegistry() + policy_entity = registry.get_policy(policy_entity_id) + if not policy_entity: + return "allow", None + + # Evaluate with rate limiter + rate_limiter = get_rate_limiter() + evaluation = policy_entity.evaluate(tool_name, category, target, rate_limiter, full_command) + + eval_dict = { + "decision": evaluation.decision, + "rule_id": evaluation.rule_id, + "rule_name": evaluation.rule_name, + "reason": evaluation.reason, + "enforced": evaluation.enforced, + "constraints": evaluation.constraints, + } + + return evaluation.decision, eval_dict + + except Exception as e: + # Policy evaluation failed - default to allow + return "allow", {"error": str(e)} + +WEB4_DIR = Path.home() / ".web4" +SESSION_DIR = WEB4_DIR / "sessions" +R6_LOG_DIR = WEB4_DIR / "r6" + + +def check_git_push_divergence(command: str) -> tuple: + """ + Check if a git push command might fail due to remote divergence. + + This is a heuristic check that runs before git push to catch the common + case where remote has commits we don't have locally. + + Future: This will be augmented with model-based reasoning for more + sophisticated governance decisions. + + Args: + command: The bash command being executed + + Returns: + (should_block, reason) - (True, "reason") to block, (False, None) to allow + """ + import re + import subprocess + + # Only check git push commands + if not re.search(r'\bgit\s+push\b', command): + return False, None + + # Don't block force push - user explicitly wants to override + if re.search(r'\bgit\s+push\s+.*(-f|--force)', command): + return False, None + + try: + # Get the repo root (if we're in a git repo) + result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + capture_output=True, + text=True, + timeout=5 + ) + if result.returncode != 0: + # Not in a git repo - let git push fail naturally + return False, None + + # Fetch to get current remote state (quiet, no output) + subprocess.run( + ["git", "fetch", "--quiet"], + capture_output=True, + timeout=30 + ) + + # Get local HEAD + local = subprocess.run( + ["git", "rev-parse", "@"], + capture_output=True, + text=True, + timeout=5 + ) + if local.returncode != 0: + return False, None + local_ref = local.stdout.strip() + + # Get upstream HEAD + remote = subprocess.run( + ["git", "rev-parse", "@{u}"], + capture_output=True, + text=True, + timeout=5 + ) + if remote.returncode != 0: + # No upstream configured - allow push to set it + return False, None + remote_ref = remote.stdout.strip() + + # Get merge base + base = subprocess.run( + ["git", "merge-base", "@", "@{u}"], + capture_output=True, + text=True, + timeout=5 + ) + if base.returncode != 0: + return False, None + base_ref = base.stdout.strip() + + # Determine divergence state + if local_ref == remote_ref: + # Already up to date - push will be a no-op but that's fine + return False, None + elif local_ref == base_ref: + # Local is behind remote - push will fail, need to pull first + return True, "Remote has commits you don't have. Run 'git pull --rebase' first." + elif remote_ref == base_ref: + # Local is ahead - normal push, allow + return False, None + else: + # Diverged - both have unique commits + return True, "Local and remote have diverged. Run 'git pull --rebase' to sync before pushing." + + except subprocess.TimeoutExpired: + # Don't block on timeout - let git handle it + return False, None + except Exception: + # Don't block on errors - let git handle it + return False, None + + +def create_session_token(): + """Create a software-bound session token (mirrors session_start.py).""" + seed = f"{os.uname().nodename}:{os.getuid()}:{datetime.now(timezone.utc).isoformat()}" + token_hash = hashlib.sha256(seed.encode()).hexdigest()[:12] + return { + "token_id": f"web4:session:{token_hash}", + "binding": "software", + "created_at": datetime.now(timezone.utc).isoformat() + "Z", + "machine_hint": hashlib.sha256(os.uname().nodename.encode()).hexdigest()[:8] + } + + +def register_policy_for_session(session_id: str, prefs: dict): + """ + Register policy entity for a session (used in lazy init). + + Returns (policy_entity_id, policy_entity_dict) or (None, None). + """ + if not GOVERNANCE_AVAILABLE or PolicyRegistry is None: + return None, None + + preset_name = prefs.get("policy_preset", "safety") + if not is_preset_name(preset_name): + preset_name = "safety" + + try: + registry = PolicyRegistry() + policy_entity = registry.register_policy(name=preset_name, preset=preset_name) + registry.witness_session(policy_entity.entity_id, session_id) + return policy_entity.entity_id, policy_entity.to_dict() + except Exception as e: + print(f"[Web4] Policy registration failed: {e}", file=sys.stderr) + return None, None + + +def load_or_create_session(session_id): + """ + Load session state, or create one if missing (lazy initialization). + + This handles context compaction continuations where SessionStart + doesn't fire but PreToolUse does. + """ + SESSION_DIR.mkdir(parents=True, exist_ok=True) + session_file = SESSION_DIR / f"{session_id}.json" + + if session_file.exists(): + with open(session_file) as f: + return json.load(f) + + # Lazy initialization for continued/recovered sessions + prefs = { + "audit_level": "standard", + "show_r6_status": True, + "action_budget": None, + "policy_preset": "safety", + } + + # Register policy as first-class entity (hash-tracked) + policy_entity_id, policy_entity_dict = register_policy_for_session(session_id, prefs) + + session = { + "session_id": session_id, + "token": create_session_token(), + "preferences": prefs, + "started_at": datetime.now(timezone.utc).isoformat() + "Z", + "recovered_at": datetime.now(timezone.utc).isoformat() + "Z", # Mark as recovered + "action_count": 0, + "r6_requests": [], + "audit_chain": [], + "active_agent": None, + "agents_used": [], + "governance_available": GOVERNANCE_AVAILABLE, + # Policy entity (society's law) + "policy_entity_id": policy_entity_id, + "policy_entity": policy_entity_dict, + } + + # Save immediately + with open(session_file, "w") as f: + json.dump(session, f, indent=2) + + # Initialize heartbeat for recovered session + heartbeat = get_session_heartbeat(session_id) + heartbeat.record("session_recovered", 0) + + # Session recovered - logging removed to avoid Claude Code "hook error" warnings + # (Claude Code displays any stderr output as "hook error" even for informational messages) + + return session + + +def load_session(session_id): + """Load session state (wrapper for compatibility).""" + return load_or_create_session(session_id) + + +def save_session(session): + """Save session state.""" + session_file = SESSION_DIR / f"{session['session_id']}.json" + with open(session_file, "w") as f: + json.dump(session, f, indent=2) + + +def detect_mcp_tool(tool_name: str) -> tuple: + """ + Detect if a tool is from an MCP server. + + MCP tools typically follow patterns: + - mcp__servername__toolname (double underscore) + - mcp_servername_toolname (single underscore) + - servername.toolname (dot notation) + - web4.io/namespace/tool (URI style) + + Returns: (is_mcp, server_name, tool_name) or (False, None, None) + """ + # Pattern 1: mcp__server__tool + if tool_name.startswith("mcp__"): + parts = tool_name.split("__") + if len(parts) >= 3: + return True, parts[1], "__".join(parts[2:]) + + # Pattern 2: mcp_server_tool (but not native tools) + if tool_name.startswith("mcp_"): + parts = tool_name[4:].split("_", 1) + if len(parts) >= 2: + return True, parts[0], parts[1] + + # Pattern 3: web4.io/... or other.io/... + if ".io/" in tool_name: + parts = tool_name.split("/") + if len(parts) >= 2: + server = parts[0].replace(".io", "") + tool = "/".join(parts[1:]) + return True, server, tool + + # Pattern 4: server.tool (dot notation, but not file extensions) + if "." in tool_name and not tool_name.endswith((".py", ".js", ".ts", ".json")): + parts = tool_name.split(".", 1) + if len(parts) == 2 and parts[0].isalnum(): + return True, parts[0], parts[1] + + return False, None, None + + +def classify_action(tool_name): + """Classify tool into action category.""" + # Check for MCP tool first + is_mcp, server, _ = detect_mcp_tool(tool_name) + if is_mcp: + return "mcp" + + categories = { + "file_read": ["Read", "Glob", "Grep"], + "file_write": ["Write", "Edit", "NotebookEdit"], + "command": ["Bash"], + "network": ["WebFetch", "WebSearch"], + "delegation": ["Task"], + "state": ["TodoWrite"], + } + for category, tools in categories.items(): + if tool_name in tools: + return category + return "other" + + +def extract_target(tool_name, tool_input): + """Extract primary target from tool input.""" + if tool_name in ["Read", "Write", "Edit", "Glob"]: + return tool_input.get("file_path", tool_input.get("path", "")) + elif tool_name == "Bash": + cmd = tool_input.get("command", "") + # First word or first 50 chars + return cmd.split()[0] if cmd.split() else cmd[:50] + elif tool_name == "Grep": + return f"pattern:{tool_input.get('pattern', '')[:30]}" + elif tool_name == "WebFetch": + return tool_input.get("url", "")[:100] + elif tool_name == "WebSearch": + return f"search:{tool_input.get('query', '')[:50]}" + elif tool_name == "Task": + return tool_input.get("description", "")[:50] + return "" + + +def create_r6_request(session, tool_name, tool_input): + """ + Create R6 request capturing intent. + + This is the core of the R6 framework - structured intent capture. + """ + r6_id = str(uuid.uuid4())[:8] + action_category = classify_action(tool_name) + target = extract_target(tool_name, tool_input) + + r6 = { + "id": f"r6:{r6_id}", + "timestamp": datetime.now(timezone.utc).isoformat() + "Z", + + # R1: Rules - constraints (policy entity is society's law) + "rules": { + "audit_level": session["preferences"]["audit_level"], + "budget_remaining": session["preferences"].get("action_budget"), + "policy_entity_id": session.get("policy_entity_id"), + }, + + # R2: Role - who's asking + "role": { + "session_token": session["token"]["token_id"], + "binding": session["token"]["binding"], + "action_index": session["action_count"] + }, + + # R3: Request - what's being asked + "request": { + "tool": tool_name, + "category": action_category, + "target": target, + "input_hash": hashlib.sha256( + json.dumps(tool_input, sort_keys=True).encode() + ).hexdigest()[:16] + }, + + # R4: Reference - history context + "reference": { + "session_id": session["session_id"], + "prev_r6": session["r6_requests"][-1] if session["r6_requests"] else None, + "chain_length": len(session["r6_requests"]) + }, + + # R5: Resource - what's needed (extensible) + "resource": { + "estimated_tokens": None, # Could be estimated + "requires_approval": False # Could be policy-driven + } + + # R6: Result - filled in by post_tool_use + } + + return r6 + + +def log_r6(r6_request): + """Log R6 request for audit trail.""" + R6_LOG_DIR.mkdir(parents=True, exist_ok=True) + + today = datetime.now(timezone.utc).strftime("%Y-%m-%d") + log_file = R6_LOG_DIR / f"{today}.jsonl" + + with open(log_file, "a") as f: + f.write(json.dumps(r6_request) + "\n") + + +def main(): + """Pre-tool-use hook entry point.""" + try: + raw_input = sys.stdin.read() + input_data = json.loads(raw_input) if raw_input.strip() else {} + except json.JSONDecodeError: + sys.exit(0) + + session_id = input_data.get("session_id", "default") + tool_name = input_data.get("tool_name", "unknown") + tool_input = input_data.get("tool_input", {}) + + # Load session + session = load_session(session_id) + if not session: + # No session - allow tool to proceed without R6 tracking + sys.exit(0) + + # Create R6 request + r6 = create_r6_request(session, tool_name, tool_input) + + # Evaluate policy - society's law + category = r6["request"]["category"] + target = r6["request"]["target"] + # For Bash tools, pass full command to enable command_patterns matching + full_command = tool_input.get("command") if tool_name == "Bash" else None + + # Git push divergence check (heuristic - will be model-augmented later) + if tool_name == "Bash" and full_command: + should_block, divergence_reason = check_git_push_divergence(full_command) + if should_block: + r6["git_check"] = { + "blocked": True, + "reason": divergence_reason, + } + r6["result"] = { + "status": "blocked", + "reason": divergence_reason, + "rule_id": "git-divergence-check", + } + log_r6(r6) + print(f"[Web4/Git] BLOCKED: {divergence_reason}", file=sys.stderr) + print(json.dumps({"decision": "deny", "reason": divergence_reason})) + sys.exit(0) + + decision, policy_eval = evaluate_policy(session, tool_name, category, target, full_command) + + # Add policy evaluation to R6 record + if policy_eval: + r6["policy"] = policy_eval + + # Handle policy decision + if decision == "deny" and policy_eval and policy_eval.get("enforced", True): + # Policy blocks this action + r6["result"] = { + "status": "blocked", + "reason": policy_eval.get("reason", "Blocked by policy"), + "rule_id": policy_eval.get("rule_id"), + } + log_r6(r6) + + # Witness the policy decision (policy witnesses a deny) + if GOVERNANCE_AVAILABLE and PolicyRegistry: + try: + registry = PolicyRegistry() + policy_entity_id = session.get("policy_entity_id") + if policy_entity_id: + registry.witness_decision( + policy_entity_id, session["session_id"], tool_name, "deny", success=False + ) + except Exception: + pass # Don't fail hook on witnessing error + + # Output block message + print(f"[Web4/Policy] BLOCKED: {policy_eval.get('reason', 'Blocked by policy')}", file=sys.stderr) + + # Exit with non-zero to signal Claude Code to block the tool + # Note: Claude Code hooks expect specific exit codes or JSON output + # Exit 0 = allow, non-zero = deny (or output {"decision": "deny"}) + print(json.dumps({"decision": "deny", "reason": policy_eval.get("reason")})) + sys.exit(0) # Exit 0 but with deny decision in stdout + + elif decision == "warn": + # Log warning but allow — don't print to stderr (Claude Code treats any stderr as "hook error") + pass + + # Check trust-based capabilities if an agent is active + active_agent = session.get("active_agent") + if active_agent and GOVERNANCE_AVAILABLE and tool_name != "Task": + try: + gov = AgentGovernance() + cap_check = gov.on_tool_use( + session_id=session_id, + role_id=active_agent, + tool_name=tool_name, + tool_input=tool_input, + atp_cost=1 + ) + + if not cap_check.get("allowed", True): + # Agent lacks trust for this tool + r6["capability"] = { + "blocked": True, + "agent": active_agent, + "required": cap_check.get("required"), + "trust_level": cap_check.get("trust_level"), + "error": cap_check.get("error"), + } + r6["result"] = { + "status": "blocked", + "reason": cap_check.get("error", "Insufficient trust"), + } + log_r6(r6) + + print(f"[Web4/Trust] BLOCKED: {cap_check.get('error')} (agent: {active_agent})", file=sys.stderr) + print(json.dumps({"decision": "deny", "reason": cap_check.get("error")})) + sys.exit(0) + + r6["capability"] = { + "allowed": True, + "agent": active_agent, + "trust_level": cap_check.get("trust_level", "unknown"), + } + except Exception as e: + r6["capability"] = {"error": str(e)} + + # Handle agent spawn (Task tool = agent delegation) + agent_context = None + if tool_name == "Task" and GOVERNANCE_AVAILABLE: + agent_name = tool_input.get("subagent_type", tool_input.get("description", "unknown")) + try: + gov = AgentGovernance() + agent_context = gov.on_agent_spawn(session_id, agent_name) + + # Add agent context to R6 request + r6["agent"] = { + "name": agent_name, + "trust_level": agent_context.get("trust", {}).get("trust_level", "unknown"), + "t3_average": agent_context.get("trust", {}).get("t3_average", 0.5), + "references_loaded": agent_context.get("references_loaded", 0), + "capabilities": agent_context.get("capabilities", {}) + } + + # Track active agent in session + session["active_agent"] = agent_name + + except Exception as e: + # Don't fail the hook on governance errors + r6["agent"] = {"name": agent_name, "error": str(e)} + + # Handle MCP tool calls - track for witnessing + is_mcp, mcp_server, mcp_tool = detect_mcp_tool(tool_name) + if is_mcp and GOVERNANCE_AVAILABLE and EntityTrustStore: + try: + store = EntityTrustStore() + mcp_entity_id = f"mcp:{mcp_server}" + mcp_trust = store.get(mcp_entity_id) + + # Add MCP context to R6 request + r6["mcp"] = { + "server": mcp_server, + "tool": mcp_tool, + "entity_id": mcp_entity_id, + "t3_average": mcp_trust.t3_average(), + "trust_level": mcp_trust.trust_level(), + "action_count": mcp_trust.action_count + } + + # Track pending MCP call in session for witnessing on complete + session["pending_mcp"] = { + "server": mcp_server, + "entity_id": mcp_entity_id, + "tool": mcp_tool + } + + except Exception as e: + r6["mcp"] = {"server": mcp_server, "error": str(e)} + + # Log for audit + log_r6(r6) + + # Record heartbeat for timing coherence tracking + heartbeat = get_session_heartbeat(session_id) + hb_entry = heartbeat.record(tool_name, session["action_count"]) + timing_coherence = heartbeat.timing_coherence() + + # Attach heartbeat info to R6 request + r6["heartbeat"] = { + "sequence": hb_entry["sequence"], + "status": hb_entry["status"], + "delta_seconds": hb_entry["delta_seconds"], + "timing_coherence": timing_coherence + } + + # Update session + session["pending_r6"] = r6 + session["action_count"] += 1 + session["timing_coherence"] = timing_coherence + save_session(session) + + # Verbose R6 status removed - stderr output causes Claude Code "hook error" warnings + # R6 data is still logged to r6_log/ for audit purposes + + # Record rate limit usage for allowed actions + if decision == "allow" and policy_eval and policy_eval.get("rule_id"): + rate_limiter = get_rate_limiter() + if rate_limiter: + key = f"ratelimit:{policy_eval['rule_id']}:{tool_name}" + rate_limiter.record(key) + + # Allow tool to proceed + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/plugins/web4-governance/hooks/session_start.py b/plugins/web4-governance/hooks/session_start.py new file mode 100755 index 0000000000..661ddade1d --- /dev/null +++ b/plugins/web4-governance/hooks/session_start.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Governance Plugin - Session Start Hook +# https://github.com/dp-web4/web4 + +""" +Web4 Session Start Hook + +Initializes governance context for the Claude Code session: +- Creates session identity token +- Loads governance preferences +- Initializes audit trail + +This is a lightweight implementation focused on the R6 workflow formalism. +For hardware-bound identity and enterprise features, see Hardbound. +""" + +import json +import os +import sys +import uuid +import hashlib +from datetime import datetime, timezone +from pathlib import Path + +# Import heartbeat tracker +from heartbeat import get_session_heartbeat + +# Import agent governance +sys.path.insert(0, str(Path(__file__).parent.parent)) +try: + from governance import AgentGovernance, RoleTrustStore, PolicyRegistry, resolve_preset, is_preset_name + GOVERNANCE_AVAILABLE = True +except ImportError: + GOVERNANCE_AVAILABLE = False + PolicyRegistry = None + +# Web4 state directory +WEB4_DIR = Path.home() / ".web4" +SESSION_DIR = WEB4_DIR / "sessions" + + +def create_session_token(): + """ + Create a software-bound session token. + + In the full Web4 spec, this would be an LCT (Linked Context Token) + bound to hardware. This light version uses a software-derived token. + + Trust interpretation is up to the relying party. + """ + # Derive from machine + user + timestamp for uniqueness + seed = f"{os.uname().nodename}:{os.getuid()}:{datetime.now(timezone.utc).isoformat()}" + token_hash = hashlib.sha256(seed.encode()).hexdigest()[:12] + + return { + "token_id": f"web4:session:{token_hash}", + "binding": "software", # Explicit: not hardware-bound + "created_at": datetime.now(timezone.utc).isoformat() + "Z", + "machine_hint": hashlib.sha256(os.uname().nodename.encode()).hexdigest()[:8] + } + + +def load_preferences(): + """Load user governance preferences.""" + prefs_file = WEB4_DIR / "preferences.json" + + if prefs_file.exists(): + with open(prefs_file) as f: + return json.load(f) + + # Default preferences + return { + "audit_level": "standard", # minimal, standard, verbose + "show_r6_status": True, + "action_budget": None, # No limit by default + "policy_preset": "safety", # Default policy preset (permissive, safety, strict, audit-only) + } + + +def register_policy_entity(session_id: str, prefs: dict): + """ + Register policy as a first-class entity in the trust network. + + Policy is society's law - immutable once registered, hash-tracked in the chain. + Creates bidirectional witnessing: session witnesses operating under policy. + + Returns: + Tuple of (policy_entity_id, policy_entity) or (None, None) if unavailable + """ + if not GOVERNANCE_AVAILABLE or PolicyRegistry is None: + return None, None + + preset_name = prefs.get("policy_preset", "safety") + + # Validate preset name + if not is_preset_name(preset_name): + # Fall back to safety if invalid preset specified + preset_name = "safety" + + try: + registry = PolicyRegistry() + + # Register policy (creates hash-identified entity, persists to disk) + policy_entity = registry.register_policy( + name=preset_name, + preset=preset_name, + ) + + # Session witnesses operating under this policy + registry.witness_session(policy_entity.entity_id, session_id) + + return policy_entity.entity_id, policy_entity.to_dict() + except Exception as e: + # Policy registration failed - continue without policy entity + print(f"[Web4] Policy registration failed: {e}", file=sys.stderr) + return None, None + + +def initialize_session(session_id): + """Initialize Web4 session state.""" + SESSION_DIR.mkdir(parents=True, exist_ok=True) + + token = create_session_token() + prefs = load_preferences() + + # Register policy as first-class entity (hash-tracked, witnessable) + policy_entity_id, policy_entity_dict = register_policy_entity(session_id, prefs) + + session = { + "session_id": session_id, + "token": token, + "preferences": prefs, + "started_at": datetime.now(timezone.utc).isoformat() + "Z", + "action_count": 0, + "r6_requests": [], + "audit_chain": [], + # Agent governance tracking + "active_agent": None, + "agents_used": [], + "governance_available": GOVERNANCE_AVAILABLE, + # Policy entity (society's law - hash-tracked in chain) + "policy_entity_id": policy_entity_id, + "policy_entity": policy_entity_dict, + } + + session_file = SESSION_DIR / f"{session_id}.json" + with open(session_file, "w") as f: + json.dump(session, f, indent=2) + + # Initialize heartbeat tracker and record session start + heartbeat = get_session_heartbeat(session_id) + heartbeat.record("session_start", 0) + + return session + + +def main(): + """Session start hook entry point.""" + try: + raw_input = sys.stdin.read() + input_data = json.loads(raw_input) if raw_input.strip() else {} + except json.JSONDecodeError: + input_data = {} + + session_id = input_data.get("session_id", str(uuid.uuid4())[:8]) + + # Initialize session + session = initialize_session(session_id) + + # Show status if preference enabled + if session["preferences"]["show_r6_status"]: + # Session info logging removed - stderr output causes Claude Code "hook error" warnings + # Session data is still tracked in state files + pass + + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/plugins/web4-governance/settings.template.json b/plugins/web4-governance/settings.template.json new file mode 100644 index 0000000000..9e738e5594 --- /dev/null +++ b/plugins/web4-governance/settings.template.json @@ -0,0 +1,36 @@ +{ + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/web4/claude-code-plugin/hooks/session_start.py" + } + ] + } + ], + "PreToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/web4/claude-code-plugin/hooks/pre_tool_use.py" + } + ] + } + ], + "PostToolUse": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": "$CLAUDE_PROJECT_DIR/web4/claude-code-plugin/hooks/post_tool_use.py" + } + ] + } + ] + } +} diff --git a/plugins/web4-governance/test_agent_flow.py b/plugins/web4-governance/test_agent_flow.py new file mode 100644 index 0000000000..1ae6c5c044 --- /dev/null +++ b/plugins/web4-governance/test_agent_flow.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +"""Test the full agent governance flow through hooks.""" + +import json +import sys +import uuid +from pathlib import Path + +# Add paths +sys.path.insert(0, str(Path(__file__).parent)) +sys.path.insert(0, str(Path(__file__).parent / "hooks")) + +from governance import AgentGovernance, RoleTrustStore, ReferenceStore + + +def test_agent_flow(): + """Simulate the full agent lifecycle.""" + print("=" * 60) + print("Testing Agent Governance Flow") + print("=" * 60) + + session_id = f"test-{uuid.uuid4().hex[:8]}" + agent_name = "code-reviewer" + + gov = AgentGovernance() + trust_store = RoleTrustStore() + ref_store = ReferenceStore() + + # 1. Get initial trust + initial_trust = trust_store.get(agent_name) + print(f"\n1. Initial trust for '{agent_name}':") + print(f" T3 average: {initial_trust.t3_average():.3f}") + print(f" Trust level: {initial_trust.trust_level()}") + print(f" Action count: {initial_trust.action_count}") + + # 2. Simulate agent spawn (like pre_tool_use with Task) + print(f"\n2. Agent spawn (session: {session_id}):") + spawn_ctx = gov.on_agent_spawn(session_id, agent_name) + print(f" Trust loaded: T3={spawn_ctx['trust']['t3_average']:.3f}") + print(f" References loaded: {spawn_ctx['references_loaded']}") + print(f" Capabilities: write={spawn_ctx['capabilities']['can_write']}, execute={spawn_ctx['capabilities']['can_execute']}") + + # 3. Add a reference (simulate agent learning something) + print(f"\n3. Extract reference:") + ref = ref_store.add( + agent_name, + "Pattern: Always validate input before processing", + "code review of auth.py", + "pattern", + confidence=0.8 + ) + print(f" Added: {ref.ref_id}") + print(f" Content: {ref.content[:50]}...") + + # 4. Simulate agent completion (success) + print(f"\n4. Agent complete (success):") + result = gov.on_agent_complete(session_id, agent_name, success=True) + print(f" Trust updated: T3={result['trust_updated']['t3_average']:.3f}") + print(f" Reliability: {result['trust_updated']['reliability']:.3f}") + + # 5. Check updated trust + updated_trust = trust_store.get(agent_name) + print(f"\n5. Updated trust:") + print(f" T3 average: {updated_trust.t3_average():.3f} (was {initial_trust.t3_average():.3f})") + print(f" Action count: {updated_trust.action_count}") + print(f" Success rate: {updated_trust.success_count}/{updated_trust.action_count}") + + # 6. Spawn again - should have more references now + print(f"\n6. Second spawn (same agent):") + spawn_ctx2 = gov.on_agent_spawn(session_id + "-2", agent_name) + print(f" References now: {spawn_ctx2['references_loaded']}") + print(f" Context preview: {spawn_ctx2['context'][:100]}..." if spawn_ctx2['context'] else " (no context)") + + # 7. Simulate failure + print(f"\n7. Agent complete (failure):") + result_fail = gov.on_agent_complete(session_id + "-2", agent_name, success=False) + print(f" Trust after failure: T3={result_fail['trust_updated']['t3_average']:.3f}") + + # 8. Check capabilities changed + final_caps = trust_store.derive_capabilities(agent_name) + print(f"\n8. Final capabilities:") + print(f" Trust level: {final_caps['trust_level']}") + print(f" Can write: {final_caps['can_write']}") + print(f" Can delegate: {final_caps['can_delegate']}") + print(f" Max ATP/action: {final_caps['max_atp_per_action']}") + + # 9. List all roles + print(f"\n9. All known roles:") + all_roles = gov.get_all_roles() + for role in all_roles[:5]: + print(f" {role['role_id']}: T3={role['t3_average']:.2f} ({role['action_count']} actions)") + + print("\n" + "=" * 60) + print("Agent governance flow test complete!") + print("=" * 60) + + +if __name__ == "__main__": + test_agent_flow() diff --git a/plugins/web4-governance/test_entity_trust.py b/plugins/web4-governance/test_entity_trust.py new file mode 100644 index 0000000000..9042f8100b --- /dev/null +++ b/plugins/web4-governance/test_entity_trust.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +"""Test the entity trust and witnessing system.""" + +import json +import sys +import uuid +from pathlib import Path + +# Add paths +sys.path.insert(0, str(Path(__file__).parent)) +sys.path.insert(0, str(Path(__file__).parent / "governance")) + +from governance import ( + EntityTrust, EntityTrustStore, + get_mcp_trust, update_mcp_trust, + ReferenceStore, AgentGovernance +) + + +def test_entity_trust(): + """Test entity trust accumulation and witnessing.""" + print("=" * 60) + print("Testing Entity Trust & Witnessing") + print("=" * 60) + + store = EntityTrustStore() + + # 1. Create MCP server entities + print("\n1. MCP Server Trust:") + mcp_names = ["filesystem", "web4", "database"] + for name in mcp_names: + trust = store.get(f"mcp:{name}") + print(f" mcp:{name}: T3={trust.t3_average():.3f}") + + # 2. Session witnesses MCP calls + print("\n2. Session witnesses MCP calls:") + session_id = f"session:test-{uuid.uuid4().hex[:8]}" + + # Successful calls + for _ in range(3): + witness, target = store.witness(session_id, "mcp:filesystem", True, 0.1) + print(f" mcp:filesystem after 3 successes: T3={target.t3_average():.3f}") + + # One failure + witness, target = store.witness(session_id, "mcp:filesystem", False, 0.1) + print(f" mcp:filesystem after 1 failure: T3={target.t3_average():.3f}") + + # 3. Check witnessing relationships + print("\n3. Witnessing relationships:") + fs_trust = store.get("mcp:filesystem") + print(f" mcp:filesystem witnessed by: {fs_trust.witnessed_by[:3]}") + + session_trust = store.get(session_id) + print(f" {session_id} has witnessed: {session_trust.has_witnessed[:3]}") + + # 4. Get witnessing chain + print("\n4. Witnessing chain:") + chain = store.get_witnessing_chain("mcp:filesystem", depth=2) + print(f" mcp:filesystem chain:") + print(f" T3: {chain['t3_average']:.3f}") + print(f" Witnessed by: {len(chain['witnessed_by'])} entities") + print(f" Has witnessed: {len(chain['has_witnessed'])} entities") + + # 5. List entities by type + print("\n5. Entity listing:") + mcp_entities = store.list_entities("mcp") + print(f" MCP servers: {mcp_entities}") + + session_entities = store.list_entities("session") + print(f" Sessions: {len(session_entities)} total") + + # 6. Convenience function + print("\n6. update_mcp_trust convenience:") + updated = update_mcp_trust("web4", success=True, witness_id=session_id) + print(f" mcp:web4 after success: T3={updated.t3_average():.3f}") + + print("\n" + "=" * 60) + print("Entity trust test complete!") + print("=" * 60) + + +def test_reference_self_curation(): + """Test reference trust and self-curation.""" + print("\n" + "=" * 60) + print("Testing Reference Self-Curation") + print("=" * 60) + + refs = ReferenceStore() + role_id = f"test-curator-{uuid.uuid4().hex[:6]}" + + # 1. Add references with varying initial confidence + print("\n1. Adding references:") + patterns = [ + ("Always validate input", 0.8), + ("Check null before access", 0.6), + ("Use async for IO", 0.4), + ("This might not work", 0.2), + ] + + ref_ids = [] + for content, confidence in patterns: + ref = refs.add(role_id, content, "test", "pattern", confidence) + ref_ids.append(ref.ref_id) + print(f" {content[:25]}... conf={confidence} trust={ref.trust_score:.2f}") + + # 2. Get context (uses refs, tracks them) + print("\n2. Getting context (marks refs as used):") + context, used_ids = refs.get_context_for_role(role_id, min_trust=0.15) + print(f" Refs included: {len(used_ids)}") + + # Check which refs were excluded due to low trust + all_refs = refs.get_for_role(role_id) + excluded = [r for r in all_refs if r.ref_id not in used_ids] + print(f" Refs excluded (low trust): {len(excluded)}") + + # 3. Simulate successful task - witness refs + print("\n3. Task succeeds - witnessing refs:") + updated = refs.witness_references(role_id, used_ids, success=True, magnitude=0.2) + for r in updated: + print(f" {r.content[:25]}... trust={r.trust_score:.3f} ({r.trust_level()})") + + # 4. Simulate failed task + print("\n4. Task fails - witnessing refs:") + # Re-get context to mark as used + _, used_again = refs.get_context_for_role(role_id, min_trust=0.15) + updated = refs.witness_references(role_id, used_again, success=False, magnitude=0.2) + for r in updated: + print(f" {r.content[:25]}... trust={r.trust_score:.3f} ({r.trust_level()})") + + # 5. Check self-curation effect + print("\n5. Self-curation effect after mixed outcomes:") + all_refs = refs.get_for_role(role_id) + for r in all_refs: + print(f" {r.content[:25]}... trust={r.trust_score:.3f} success={r.success_count} fail={r.failure_count}") + + print("\n" + "=" * 60) + print("Reference self-curation test complete!") + print("=" * 60) + + +def test_agent_reference_witnessing(): + """Test end-to-end agent + reference witnessing.""" + print("\n" + "=" * 60) + print("Testing Agent + Reference Integration") + print("=" * 60) + + gov = AgentGovernance() + session_id = f"integration-{uuid.uuid4().hex[:8]}" + role_id = "test-integrator" + + # 1. Add some references + print("\n1. Adding references for role:") + gov.extract_reference(session_id, role_id, "Pattern: test first", "tests.py", "pattern") + gov.extract_reference(session_id, role_id, "Fact: API uses REST", "docs.md", "fact") + print(" Added 2 references") + + # 2. Spawn agent (loads refs, tracks for witnessing) + print("\n2. Agent spawn:") + spawn_ctx = gov.on_agent_spawn(session_id, role_id) + print(f" Trust: T3={spawn_ctx['trust']['t3_average']:.3f}") + print(f" Refs used: {spawn_ctx.get('references_used', 0)}") + + # 3. Complete successfully + print("\n3. Agent complete (success):") + result = gov.on_agent_complete(session_id, role_id, success=True) + print(f" Trust updated: T3={result['trust_updated']['t3_average']:.3f}") + print(f" Refs witnessed: {result.get('references_witnessed', 0)}") + + # 4. Spawn again - refs should have higher trust + print("\n4. Second spawn (refs have evolved):") + spawn_ctx2 = gov.on_agent_spawn(session_id + "-2", role_id) + print(f" Refs loaded: {spawn_ctx2['references_loaded']}") + + # Check ref trust levels + refs = gov.references.get_for_role(role_id) + for r in refs: + print(f" {r.ref_type}: {r.content[:30]}... trust={r.effective_trust():.3f}") + + print("\n" + "=" * 60) + print("Agent + Reference integration test complete!") + print("=" * 60) + + +if __name__ == "__main__": + test_entity_trust() + test_reference_self_curation() + test_agent_reference_witnessing() + + print("\n" + "=" * 60) + print("ALL ENTITY TRUST TESTS PASSED!") + print("=" * 60) diff --git a/plugins/web4-governance/test_governance_session.py b/plugins/web4-governance/test_governance_session.py new file mode 100644 index 0000000000..fb1c947c5a --- /dev/null +++ b/plugins/web4-governance/test_governance_session.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Web4 Governance Plugin - Integration Test +# https://github.com/dp-web4/web4 +""" +Full integration test simulating a Claude Code session. + +Tests: +1. Session initialization (via session_start hook pattern) +2. R6 request creation and audit +3. MCP tool witnessing +4. Agent delegation and inter-agent witnessing +5. Trust accumulation and decay +6. Reference extraction +""" + +import json +import os +import sys +import uuid +import hashlib +from pathlib import Path +from datetime import datetime, timezone, timedelta + +# Add plugin to path +sys.path.insert(0, str(Path(__file__).parent)) +sys.path.insert(0, str(Path(__file__).parent / "hooks")) + +from governance import AgentGovernance, EntityTrustStore, RoleTrustStore +from governance.entity_trust import EntityTrust +from governance.role_trust import RoleTrust + +# Web4 directories +WEB4_DIR = Path.home() / ".web4" +SESSION_DIR = WEB4_DIR / "sessions" + + +def print_section(title): + """Print section header.""" + print(f"\n{'='*60}") + print(f" {title}") + print(f"{'='*60}") + + +def print_result(name, success, details=""): + """Print test result.""" + status = "✓ PASS" if success else "✗ FAIL" + print(f" {status}: {name}") + if details and not success: + print(f" {details}") + + +def create_test_session(session_id: str) -> dict: + """Create a test session (mimics session_start hook).""" + SESSION_DIR.mkdir(parents=True, exist_ok=True) + + seed = f"test:{session_id}:{datetime.now(timezone.utc).isoformat()}" + token_hash = hashlib.sha256(seed.encode()).hexdigest()[:12] + + session = { + "session_id": session_id, + "token": { + "token_id": f"web4:session:{token_hash}", + "binding": "software", + "created_at": datetime.now(timezone.utc).isoformat() + "Z", + }, + "preferences": { + "audit_level": "standard", + "show_r6_status": True, + "action_budget": None, + }, + "started_at": datetime.now(timezone.utc).isoformat() + "Z", + "action_count": 0, + "r6_requests": [], + "audit_chain": [], + "active_agent": None, + "agents_used": [], + } + + session_file = SESSION_DIR / f"{session_id}.json" + with open(session_file, "w") as f: + json.dump(session, f, indent=2) + + return session + + +def test_session_init(): + """Test session initialization.""" + print_section("Session Initialization") + + session_id = f"test-session-{uuid.uuid4().hex[:8]}" + session = create_test_session(session_id) + + print_result("Session created", session is not None) + print_result("Token assigned", "token" in session) + print_result("Preferences set", "preferences" in session) + + # Verify session file persisted + session_file = SESSION_DIR / f"{session_id}.json" + print_result("Session persisted", session_file.exists()) + + return session_id + + +def test_mcp_witnessing(): + """Test MCP tool trust and witnessing.""" + print_section("MCP Witnessing") + + store = EntityTrustStore() + + # Simulate MCP server call + mcp_entity = "mcp:filesystem" + session_entity = "session:test-001" + + # Initial trust + initial = store.get(mcp_entity) + initial_t3 = initial.t3_average() + print(f" Initial MCP trust: {initial_t3:.3f} ({initial.trust_level()})") + + # Successful call - session witnesses MCP + witness, target = store.witness(session_entity, mcp_entity, success=True, magnitude=0.1) + + after_success = store.get(mcp_entity) + success_t3 = after_success.t3_average() + print(f" After success: {success_t3:.3f} ({after_success.trust_level()})") + + print_result("Trust increased on success", success_t3 > initial_t3) + print_result("Witness recorded", session_entity in after_success.witnessed_by) + + # Failed call + store.witness(session_entity, mcp_entity, success=False, magnitude=0.1) + after_fail = store.get(mcp_entity) + fail_t3 = after_fail.t3_average() + print(f" After failure: {fail_t3:.3f} ({after_fail.trust_level()})") + + print_result("Trust decreased on failure", fail_t3 < success_t3) + + # Multiple successes + for _ in range(5): + store.witness(session_entity, mcp_entity, success=True, magnitude=0.1) + + final = store.get(mcp_entity) + final_t3 = final.t3_average() + print(f" After 5 more successes: {final_t3:.3f} ({final.trust_level()})") + + print_result("Trust accumulated over time", final_t3 > fail_t3) + print_result("Action count tracked", final.action_count >= 6) + + return True + + +def test_agent_delegation(): + """Test agent spawning and completion with witnessing.""" + print_section("Agent Delegation") + + gov = AgentGovernance() + session_id = f"test-session-agent-{uuid.uuid4().hex[:6]}" + + # Create session (mimics session_start hook) + create_test_session(session_id) + + # Spawn first agent + agent1 = "code-reviewer" + context1 = gov.on_agent_spawn(session_id, agent1) + + print(f" Agent '{agent1}' spawned") + print(f" Trust level: {context1.get('trust', {}).get('trust_level', 'unknown')}") + print(f" T3 average: {context1.get('trust', {}).get('t3_average', 0.5):.3f}") + + print_result("Agent context returned", "trust" in context1) + print_result("Capabilities derived", "capabilities" in context1) + + # Complete agent successfully + result1 = gov.on_agent_complete(session_id, agent1, success=True) + print(f" Agent '{agent1}' completed successfully") + + print_result("Trust updated on completion", result1.get("trust_updated")) + + # Spawn second agent + agent2 = "test-generator" + context2 = gov.on_agent_spawn(session_id, agent2) + print(f" Agent '{agent2}' spawned") + + # Complete successfully - should witness agent1 + result2 = gov.on_agent_complete(session_id, agent2, success=True) + + witnessed = result2.get("witnessed_agents", []) + print(f" Witnessed previous agents: {witnessed}") + + print_result("Inter-agent witnessing occurred", len(witnessed) > 0 or agent1 in str(witnessed)) + + return True + + +def test_trust_decay(): + """Test trust decay over time.""" + print_section("Trust Decay") + + store = RoleTrustStore() + role_id = "decay-test-role" + + # Create role with high trust + trust = store.get(role_id) + trust.reliability = 0.9 + trust.consistency = 0.85 + trust.competence = 0.8 + trust.temporal = 0.9 + trust.last_action = (datetime.now(timezone.utc) - timedelta(days=30)).isoformat() + store.save(trust) + + initial_t3 = trust.t3_average() + print(f" Initial T3: {initial_t3:.3f}") + + # Apply decay (30 days inactive) + trust = store.get(role_id) + days_inactive = trust.days_since_last_action() + print(f" Days inactive: {days_inactive:.1f}") + + decayed = trust.apply_decay(days_inactive, decay_rate=0.01) + if decayed: + store.save(trust) + + final_t3 = trust.t3_average() + print(f" After decay T3: {final_t3:.3f}") + + print_result("Decay applied", decayed) + print_result("T3 decreased", final_t3 < initial_t3) + print_result("Floor maintained (>0.3)", trust.reliability >= 0.3) + + # Test entity trust decay too + entity_store = EntityTrustStore() + entity_id = "mcp:decay-test" + + entity = entity_store.get(entity_id) + entity.reliability = 0.85 + entity.consistency = 0.8 + entity.temporal = 0.85 + entity.last_action = (datetime.now(timezone.utc) - timedelta(days=60)).isoformat() + entity_store.save(entity) + + entity = entity_store.get_with_decay(entity_id, decay_rate=0.01) + print(f" Entity decay after 60 days: {entity.t3_average():.3f}") + + print_result("Entity decay works", entity.t3_average() < 0.7) + + return True + + +def test_reference_extraction(): + """Test auto-extraction of references from task output.""" + print_section("Reference Extraction") + + gov = AgentGovernance() + session_id = f"test-ref-extract-{uuid.uuid4().hex[:6]}" + create_test_session(session_id) + + # Simulate task output with extractable patterns + task_output = """ + Analysis complete. Here are the key findings: + + Pattern: Always use snake_case for Python function names + Pattern: Database connections should use connection pooling + + Fact: The API endpoint is at /api/v2/users + Fact: Maximum request size is 10MB + + Preference: User prefers dark mode themes + + Summary: The codebase follows a clean architecture pattern with + dependency injection throughout. Main entry point is src/main.py. + """ + + # Extract references (role_id is required) + role_id = "test-analyzer" + refs = gov.auto_extract_references(session_id, role_id, task_output) + + print(f" Extracted {len(refs)} references:") + for ref in refs[:5]: + print(f" - [{ref['ref_type']}] {ref['content'][:50]}...") + + print_result("Patterns extracted", any(r['ref_type'] == 'pattern' for r in refs)) + print_result("Facts extracted", any(r['ref_type'] == 'fact' for r in refs)) + print_result("Preferences extracted", any(r['ref_type'] == 'preference' for r in refs)) + print_result("Confidence assigned", all('confidence' in r for r in refs)) + + return True + + +def test_witnessing_chain(): + """Test witnessing chain tracking.""" + print_section("Witnessing Chains") + + store = EntityTrustStore() + + # Create a witnessing chain: A -> B -> C + store.witness("session:A", "mcp:B", True) + store.witness("mcp:B", "role:C", True) + store.witness("role:C", "ref:D", True) + + # Get chain for B + chain = store.get_witnessing_chain("mcp:B") + + print(f" Entity: {chain['entity_id']}") + print(f" T3 average: {chain['t3_average']:.3f}") + print(f" Witnessed by: {[w['entity_id'] for w in chain['witnessed_by']]}") + print(f" Has witnessed: {[w['entity_id'] for w in chain['has_witnessed']]}") + + print_result("Witnessed_by tracked", len(chain['witnessed_by']) > 0) + print_result("Has_witnessed tracked", len(chain['has_witnessed']) > 0) + print_result("Chain depth works", chain['t3_average'] > 0) + + return True + + +def test_r6_workflow(): + """Test R6 request creation (simulated).""" + print_section("R6 Workflow") + + session_id = f"test-r6-{uuid.uuid4().hex[:6]}" + session = create_test_session(session_id) + + # Verify session has required fields for R6 workflow + print_result("Session has token", "token" in session) + print_result("Session has preferences", "preferences" in session) + print_result("Session tracks r6_requests", "r6_requests" in session) + print_result("Session tracks audit_chain", "audit_chain" in session) + + # Verify session directory exists + session_file = SESSION_DIR / f"{session_id}.json" + print_result("Session persisted to disk", session_file.exists()) + + return True + + +def main(): + """Run all integration tests.""" + print("\n" + "="*60) + print(" Web4 Governance - Integration Test Suite") + print("="*60) + + tests = [ + ("Session Initialization", test_session_init), + ("MCP Witnessing", test_mcp_witnessing), + ("Agent Delegation", test_agent_delegation), + ("Trust Decay", test_trust_decay), + ("Reference Extraction", test_reference_extraction), + ("Witnessing Chains", test_witnessing_chain), + ("R6 Workflow", test_r6_workflow), + ] + + results = [] + for name, test_fn in tests: + try: + result = test_fn() + results.append((name, True, None)) + except Exception as e: + results.append((name, False, str(e))) + print(f"\n ERROR: {e}") + + # Summary + print_section("Test Summary") + passed = sum(1 for _, success, _ in results if success) + total = len(results) + + print(f"\n Results: {passed}/{total} tests passed\n") + for name, success, error in results: + status = "✓" if success else "✗" + print(f" {status} {name}") + if error: + print(f" Error: {error}") + + print("\n" + "="*60) + + # Show storage stats + print("\n Storage Stats:") + web4_dir = Path.home() / ".web4" + if web4_dir.exists(): + sessions = list((web4_dir / "sessions").glob("*.json")) + entities = list((web4_dir / "governance" / "entities").glob("*.json")) + roles = list((web4_dir / "governance" / "roles").glob("*.json")) + print(f" Sessions: {len(sessions)}") + print(f" Entities: {len(entities)}") + print(f" Roles: {len(roles)}") + + return passed == total + + +if __name__ == "__main__": + success = main() + sys.exit(0 if success else 1) diff --git a/plugins/web4-governance/test_heartbeat.py b/plugins/web4-governance/test_heartbeat.py new file mode 100644 index 0000000000..aadf285353 --- /dev/null +++ b/plugins/web4-governance/test_heartbeat.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +"""Test heartbeat integration for claude-code plugin.""" + +import sys +import json +from pathlib import Path + +# Add hooks to path +sys.path.insert(0, str(Path(__file__).parent / "hooks")) + +from heartbeat import get_session_heartbeat + +def test_heartbeat(): + """Test basic heartbeat functionality.""" + import uuid + + # Create test session + session_id = f"test_{uuid.uuid4().hex[:8]}" + print(f"Testing session: {session_id}") + + heartbeat = get_session_heartbeat(session_id) + + # Record some heartbeats + entries = [] + for i, tool in enumerate(["session_start", "Read", "Edit", "Bash", "Read"]): + entry = heartbeat.record(tool, i) + entries.append(entry) + print(f" [{i}] {tool}: status={entry['status']}, delta={entry['delta_seconds']}s") + + # Check coherence + coherence = heartbeat.timing_coherence() + print(f"\nTiming coherence: {coherence}") + + # Verify chain + valid, error = heartbeat.verify_chain() + print(f"Chain valid: {valid}") + if error: + print(f" Error: {error}") + + # Summary + summary = heartbeat.summary() + print(f"\nSummary:") + print(f" Total heartbeats: {summary['total_heartbeats']}") + print(f" Status distribution: {summary['status_distribution']}") + + # Cleanup + ledger_file = Path.home() / ".web4" / "heartbeat" / f"{session_id}.jsonl" + if ledger_file.exists(): + ledger_file.unlink() + print(f"\nCleaned up test ledger: {ledger_file}") + + print("\n✓ Heartbeat integration test passed") + return True + +if __name__ == "__main__": + success = test_heartbeat() + sys.exit(0 if success else 1) diff --git a/plugins/web4-governance/test_policy_entity.py b/plugins/web4-governance/test_policy_entity.py new file mode 100644 index 0000000000..fe689ce8d8 --- /dev/null +++ b/plugins/web4-governance/test_policy_entity.py @@ -0,0 +1,406 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +# +# Test suite for Policy Entity integration + +""" +Tests for PolicyEntity - policy as a first-class participant in the trust network. + +Covers: +- Policy creation with hash identification +- Policy registration and persistence +- Policy evaluation (allow/deny/warn) +- Policy witnessing +- Integration with presets +""" + +import json +import pytest +import tempfile +import shutil +from pathlib import Path + +from governance.policy_entity import PolicyEntity, PolicyRegistry, PolicyEvaluation +from governance.presets import ( + PolicyConfig, + PolicyRule, + PolicyMatch, + RateLimitSpec, + resolve_preset, + get_preset, + SAFETY_RULES, +) +from governance.rate_limiter import RateLimiter + + +@pytest.fixture +def temp_storage(): + """Create temporary storage directory.""" + tmp = tempfile.mkdtemp() + yield Path(tmp) + shutil.rmtree(tmp) + + +class TestPolicyEntity: + """Tests for PolicyEntity class.""" + + def test_entity_creation(self, temp_storage): + """Test creating a policy entity.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("test", preset="safety") + + assert entity.name == "test" + assert entity.source == "preset" + assert entity.entity_id.startswith("policy:test:") + assert len(entity.content_hash) == 16 # SHA256 first 16 chars + + def test_entity_hash_uniqueness(self, temp_storage): + """Test that different configs produce different hashes.""" + registry = PolicyRegistry(temp_storage) + + entity1 = registry.register_policy("safety", preset="safety") + entity2 = registry.register_policy("permissive", preset="permissive") + + assert entity1.content_hash != entity2.content_hash + assert entity1.entity_id != entity2.entity_id + + def test_entity_hash_consistency(self, temp_storage): + """Test that same config produces same hash.""" + registry = PolicyRegistry(temp_storage) + + entity1 = registry.register_policy("safety", preset="safety", version="v1") + entity2 = registry.register_policy("safety", preset="safety", version="v1") + + # Same version and preset should return cached entity + assert entity1.entity_id == entity2.entity_id + assert entity1.content_hash == entity2.content_hash + + def test_entity_persistence(self, temp_storage): + """Test that policy entity is persisted to disk.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("test", preset="strict") + + # Check file exists + policy_file = temp_storage / "policies" / f"{entity.content_hash}.json" + assert policy_file.exists() + + # Verify content + data = json.loads(policy_file.read_text()) + assert data["name"] == "test" + assert data["entity_id"] == entity.entity_id + + def test_entity_reload(self, temp_storage): + """Test loading policy entity from disk.""" + registry1 = PolicyRegistry(temp_storage) + entity1 = registry1.register_policy("test", preset="safety") + + # Create new registry (simulates restart) + registry2 = PolicyRegistry(temp_storage) + entity2 = registry2.get_policy(entity1.entity_id) + + assert entity2 is not None + assert entity2.entity_id == entity1.entity_id + assert entity2.content_hash == entity1.content_hash + assert entity2.config.default_policy == entity1.config.default_policy + + def test_entity_to_dict(self, temp_storage): + """Test serialization to dict.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("test", preset="safety") + + data = entity.to_dict() + + assert data["entity_id"] == entity.entity_id + assert data["name"] == "test" + assert data["content_hash"] == entity.content_hash + assert "config" in data + assert data["config"]["default_policy"] == "allow" + + +class TestPolicyEvaluation: + """Tests for policy evaluation logic.""" + + def test_evaluate_allow_default(self, temp_storage): + """Test default allow policy.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("permissive", preset="permissive") + + result = entity.evaluate("Read", "file_read", "/tmp/test.txt") + + assert result.decision == "allow" + assert result.rule_id is None + assert "default" in result.reason.lower() + + def test_evaluate_deny_destructive(self, temp_storage): + """Test safety preset blocks destructive commands.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("safety", preset="safety") + + # Test with realistic folder path, not root + result = entity.evaluate("Bash", "command", "rm -rf ./temp_build") + + assert result.decision == "deny" + assert result.rule_id == "deny-destructive-commands" + assert result.enforced is True + + def test_evaluate_deny_secrets(self, temp_storage): + """Test safety preset blocks secret file reads.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("safety", preset="safety") + + result = entity.evaluate("Read", "file_read", "/app/.env") + + assert result.decision == "deny" + assert result.rule_id == "deny-secret-files" + + def test_evaluate_warn_network(self, temp_storage): + """Test safety preset warns on network.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("safety", preset="safety") + + result = entity.evaluate("WebFetch", "network", "https://example.com") + + assert result.decision == "warn" + assert result.rule_id == "warn-network" + + def test_evaluate_strict_deny_default(self, temp_storage): + """Test strict preset denies by default.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("strict", preset="strict") + + result = entity.evaluate("Bash", "command", "ls") + + assert result.decision == "deny" + assert result.rule_id is None # Default policy, not a rule + assert "default" in result.reason.lower() + + def test_evaluate_strict_allow_read(self, temp_storage): + """Test strict preset allows read tools.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("strict", preset="strict") + + for tool in ["Read", "Glob", "Grep", "TodoWrite"]: + result = entity.evaluate(tool, "file_read", "/tmp/test.txt") + assert result.decision == "allow", f"Expected allow for {tool}" + assert result.rule_id == "allow-read-tools" + + def test_evaluate_constraints(self, temp_storage): + """Test evaluation returns constraints.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("safety", preset="safety") + + # Test with realistic folder path, not root + result = entity.evaluate("Bash", "command", "rm -rf ./node_modules") + + assert f"policy:{entity.entity_id}" in result.constraints + assert "decision:deny" in result.constraints + assert "rule:deny-destructive-commands" in result.constraints + + +class TestPolicyWithRateLimiter: + """Tests for rate limit evaluation.""" + + def test_rate_limit_under_threshold(self, temp_storage): + """Test rate limit allows under threshold.""" + config = PolicyConfig( + default_policy="deny", + enforce=True, + rules=[ + PolicyRule( + id="rate-bash", + name="Rate limit Bash", + priority=1, + decision="deny", + match=PolicyMatch( + tools=["Bash"], + rate_limit=RateLimitSpec(max_count=5, window_ms=60000), + ), + ), + ], + ) + + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("rate-test", config=config) + limiter = RateLimiter() + + # Under limit - rule doesn't fire, falls through to default + result = entity.evaluate("Bash", "command", "ls", rate_limiter=limiter) + # Since rate limit check passes (0 < 5), the rule doesn't match + # Falls through to default policy which is "deny" + assert result.decision == "deny" + assert result.rule_id is None # Default, not the rate rule + + def test_rate_limit_over_threshold(self, temp_storage): + """Test rate limit triggers over threshold.""" + config = PolicyConfig( + default_policy="allow", + enforce=True, + rules=[ + PolicyRule( + id="rate-bash", + name="Rate limit Bash", + priority=1, + decision="deny", + match=PolicyMatch( + tools=["Bash"], + rate_limit=RateLimitSpec(max_count=2, window_ms=60000), + ), + ), + ], + ) + + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("rate-test", config=config) + limiter = RateLimiter() + + # Record actions to exceed limit + key = "ratelimit:rate-bash:tool:Bash" + limiter.record(key) + limiter.record(key) + + # Now at limit - rule fires + result = entity.evaluate("Bash", "command", "ls", rate_limiter=limiter) + assert result.decision == "deny" + assert result.rule_id == "rate-bash" + + +class TestPolicyWitnessing: + """Tests for policy witnessing.""" + + def test_witness_session(self, temp_storage): + """Test session witnessing policy.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("safety", preset="safety") + + # Witness session + registry.witness_session(entity.entity_id, "session-123") + + # Check entity trust was affected (witnessing records in witnessed_by) + trust = registry.get_policy_trust(entity.entity_id) + assert "session:session-123" in trust.witnessed_by + + def test_witness_decision(self, temp_storage): + """Test policy witnessing decision.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("safety", preset="safety") + + # Initial trust + trust_before = registry.get_policy_trust(entity.entity_id) + initial_witness_count = trust_before.witness_count + + # Witness successful decision + registry.witness_decision(entity.entity_id, "session-123", "Read", "allow", success=True) + + # Check trust updated (policy witnesses the session, so has_witnessed grows) + trust_after = registry.get_policy_trust(entity.entity_id) + assert "session:session-123" in trust_after.has_witnessed + + def test_witness_deny_failure(self, temp_storage): + """Test witnessing a denied action (counts as failure).""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("safety", preset="safety") + + # Witness denied action + registry.witness_decision(entity.entity_id, "session-123", "Bash", "deny", success=False) + + # Entity should record the witnessing + trust = registry.get_policy_trust(entity.entity_id) + assert "session:session-123" in trust.has_witnessed + + +class TestPolicyRegistry: + """Tests for PolicyRegistry.""" + + def test_list_policies(self, temp_storage): + """Test listing all policies.""" + registry = PolicyRegistry(temp_storage) + registry.register_policy("safety", preset="safety") + registry.register_policy("strict", preset="strict") + + policies = registry.list_policies() + + assert len(policies) == 2 + names = [p.name for p in policies] + assert "safety" in names + assert "strict" in names + + def test_get_policy_by_hash(self, temp_storage): + """Test getting policy by content hash.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("test", preset="safety") + + found = registry.get_policy_by_hash(entity.content_hash) + + assert found is not None + assert found.entity_id == entity.entity_id + + def test_register_custom_config(self, temp_storage): + """Test registering custom policy config.""" + config = PolicyConfig( + default_policy="warn", + enforce=False, + rules=[ + PolicyRule( + id="custom-rule", + name="Custom Rule", + priority=1, + decision="deny", + match=PolicyMatch(categories=["command"]), + ), + ], + ) + + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("custom", config=config) + + assert entity.source == "custom" + assert entity.config.default_policy == "warn" + assert len(entity.config.rules) == 1 + + def test_register_requires_config_or_preset(self, temp_storage): + """Test that registration requires config or preset.""" + registry = PolicyRegistry(temp_storage) + + with pytest.raises(ValueError): + registry.register_policy("invalid") + + def test_register_rejects_both_config_and_preset(self, temp_storage): + """Test that registration rejects both config and preset.""" + registry = PolicyRegistry(temp_storage) + config = PolicyConfig(default_policy="allow", enforce=False, rules=[]) + + with pytest.raises(ValueError): + registry.register_policy("invalid", config=config, preset="safety") + + +class TestPolicyEntityId: + """Tests for policy entity ID format.""" + + def test_entity_id_format(self, temp_storage): + """Test entity ID follows policy::: format.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("test", preset="safety", version="v1") + + parts = entity.entity_id.split(":") + assert len(parts) == 4 + assert parts[0] == "policy" + assert parts[1] == "test" + assert parts[2] == "v1" + assert parts[3] == entity.content_hash + + def test_auto_version(self, temp_storage): + """Test auto-generated version is timestamp.""" + registry = PolicyRegistry(temp_storage) + entity = registry.register_policy("test", preset="safety") + + parts = entity.entity_id.split(":") + version = parts[2] + + # Should be YYYYMMDDHHmmss format + assert len(version) == 14 + assert version.isdigit() + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/plugins/web4-governance/test_presence.py b/plugins/web4-governance/test_presence.py new file mode 100644 index 0000000000..9e4b64214d --- /dev/null +++ b/plugins/web4-governance/test_presence.py @@ -0,0 +1,188 @@ +"""Tests for presence tracking (silence as signal).""" + +import tempfile +from pathlib import Path +from datetime import datetime, timedelta, timezone +from governance.presence import ( + PresenceTracker, + PresenceStatus, + PresenceRecord, +) + + +def test_presence_registration(): + """Test entity registration.""" + with tempfile.TemporaryDirectory() as tmpdir: + tracker = PresenceTracker(Path(tmpdir) / "presence.json") + + # Register entity + record = tracker.register("track:web4", interval_hours=6) + assert record.entity_id == "track:web4" + assert record.interval_seconds == 6 * 3600 + + # Check initial status (unknown - no heartbeat yet) + status = tracker.check("track:web4") + assert status == PresenceStatus.UNKNOWN + + return True + + +def test_heartbeat_and_status(): + """Test heartbeat updates status correctly.""" + with tempfile.TemporaryDirectory() as tmpdir: + tracker = PresenceTracker(Path(tmpdir) / "presence.json") + + tracker.register("agent:claude", interval_hours=6) + + # First heartbeat + prev = tracker.heartbeat("agent:claude") + assert prev == PresenceStatus.UNKNOWN + + # Should now be active + status = tracker.check("agent:claude") + assert status == PresenceStatus.ACTIVE + + return True + + +def test_status_transitions(): + """Test status transitions based on time.""" + record = PresenceRecord( + entity_id="test:entity", + interval_seconds=3600, # 1 hour + grace_factor=1.5, # 1.5 hours grace + missing_factor=3.0 # 3 hours missing + ) + + # No heartbeat yet + assert record.get_status() == PresenceStatus.UNKNOWN + + # Just seen + record.heartbeat() + assert record.get_status() == PresenceStatus.ACTIVE + + # Simulate time passing by backdating last_seen + def backdate(hours): + past = datetime.now(timezone.utc) - timedelta(hours=hours) + record.last_seen = past.isoformat() + + # 30 minutes ago - still active + backdate(0.5) + assert record.get_status() == PresenceStatus.ACTIVE + + # 1.2 hours ago - expected (in grace period) + backdate(1.2) + assert record.get_status() == PresenceStatus.EXPECTED + + # 2 hours ago - overdue (past grace, before missing) + backdate(2) + assert record.get_status() == PresenceStatus.OVERDUE + + # 4 hours ago - missing + backdate(4) + assert record.get_status() == PresenceStatus.MISSING + + return True + + +def test_overdue_detection(): + """Test detecting overdue entities.""" + with tempfile.TemporaryDirectory() as tmpdir: + tracker = PresenceTracker(Path(tmpdir) / "presence.json") + + # Register multiple entities + tracker.register("track:web4", interval_hours=6) + tracker.register("track:4life", interval_hours=6) + tracker.register("agent:test", interval_hours=1) + + # Heartbeat for web4 only + tracker.heartbeat("track:web4") + + # Check overdue list (4life and test never heartbeated, but status is UNKNOWN not OVERDUE) + overdue = tracker.get_overdue() + assert len(overdue) == 0 # Unknown entities aren't "overdue" + + # Now heartbeat 4life, then backdate it + tracker.heartbeat("track:4life") + record = tracker.get_record("track:4life") + past = datetime.now(timezone.utc) - timedelta(hours=12) # Way past 6hr interval + record.last_seen = past.isoformat() + + # Now should show as overdue/missing + overdue = tracker.get_overdue() + assert "track:4life" in overdue + + return True + + +def test_summary(): + """Test summary generation.""" + with tempfile.TemporaryDirectory() as tmpdir: + tracker = PresenceTracker(Path(tmpdir) / "presence.json") + + tracker.register("entity:a", interval_hours=1) + tracker.register("entity:b", interval_hours=1) + tracker.heartbeat("entity:a") + + summary = tracker.summary() + assert summary["total_tracked"] == 2 + assert summary["active"] == 1 + assert "entity:a" in summary["entities"] + assert "entity:b" in summary["entities"] + + return True + + +def test_auto_register(): + """Test auto-registration on heartbeat.""" + with tempfile.TemporaryDirectory() as tmpdir: + tracker = PresenceTracker(Path(tmpdir) / "presence.json") + + # Heartbeat unknown entity with auto_register=True (default) + tracker.heartbeat("new:entity") + assert "new:entity" in tracker.records + + # Heartbeat unknown entity with auto_register=False + status = tracker.heartbeat("another:entity", auto_register=False) + assert status == PresenceStatus.UNKNOWN + assert "another:entity" not in tracker.records + + return True + + +def test_persistence(): + """Test that records persist across tracker instances.""" + with tempfile.TemporaryDirectory() as tmpdir: + storage = Path(tmpdir) / "presence.json" + + # Create and populate + tracker1 = PresenceTracker(storage) + tracker1.register("persist:test", interval_hours=2) + tracker1.heartbeat("persist:test") + + # New instance should load existing records + tracker2 = PresenceTracker(storage) + assert "persist:test" in tracker2.records + record = tracker2.get_record("persist:test") + assert record.check_count == 1 + + return True + + +if __name__ == "__main__": + tests = [ + test_presence_registration, + test_heartbeat_and_status, + test_status_transitions, + test_overdue_detection, + test_summary, + test_auto_register, + test_persistence, + ] + + for test in tests: + try: + result = test() + print(f"✓ {test.__name__}") + except Exception as e: + print(f"✗ {test.__name__}: {e}") diff --git a/plugins/web4-governance/test_tier1_5.py b/plugins/web4-governance/test_tier1_5.py new file mode 100644 index 0000000000..cd312a6e35 --- /dev/null +++ b/plugins/web4-governance/test_tier1_5.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +# Copyright (c) 2025 Web4 Contributors +""" +Tests for Tier 1.5 features: Presets, Rate Limiting, Audit Query, Reporter. +""" + +import pytest +import time +import tempfile +from pathlib import Path + +from governance.presets import ( + get_preset, + list_presets, + is_preset_name, + resolve_preset, + policy_config_to_dict, + PolicyRule, + PolicyMatch, +) +from governance.rate_limiter import RateLimiter +from governance.ledger import Ledger +from governance.reporter import AuditReporter + + +class TestPresets: + """Tests for policy presets.""" + + def test_list_presets_returns_four(self): + """Should return all four presets.""" + presets = list_presets() + assert len(presets) == 4 + names = {p.name for p in presets} + assert names == {"permissive", "safety", "strict", "audit-only"} + + def test_presets_have_descriptions(self): + """All presets should have non-empty descriptions.""" + for preset in list_presets(): + assert preset.description + assert len(preset.description) > 0 + + def test_get_preset_by_name(self): + """Should return preset by name.""" + safety = get_preset("safety") + assert safety is not None + assert safety.name == "safety" + + def test_get_preset_unknown(self): + """Should return None for unknown preset.""" + assert get_preset("nonexistent") is None + + def test_is_preset_name(self): + """Should validate preset names.""" + assert is_preset_name("safety") is True + assert is_preset_name("strict") is True + assert is_preset_name("bogus") is False + assert is_preset_name("") is False + + def test_permissive_preset_structure(self): + """Permissive should have no rules and enforce=false.""" + preset = get_preset("permissive") + assert preset.config.rules == [] + assert preset.config.enforce is False + assert preset.config.default_policy == "allow" + + def test_safety_preset_has_rules(self): + """Safety should have rules and enforce=true.""" + preset = get_preset("safety") + assert len(preset.config.rules) > 0 + assert preset.config.enforce is True + assert preset.config.default_policy == "allow" + + def test_strict_preset_denies_by_default(self): + """Strict should default deny with allow rules.""" + preset = get_preset("strict") + assert preset.config.default_policy == "deny" + assert preset.config.enforce is True + assert len(preset.config.rules) > 0 + # All strict rules should be allow + for rule in preset.config.rules: + assert rule.decision == "allow" + + def test_audit_only_matches_safety_rules(self): + """Audit-only should have same rules as safety but enforce=false.""" + safety = get_preset("safety") + audit_only = get_preset("audit-only") + assert audit_only.config.enforce is False + # Rules should match + assert len(audit_only.config.rules) == len(safety.config.rules) + + def test_resolve_preset_no_overrides(self): + """Should return preset config with no overrides.""" + config = resolve_preset("safety") + preset = get_preset("safety") + assert config.default_policy == preset.config.default_policy + assert config.enforce == preset.config.enforce + assert config.preset == "safety" + + def test_resolve_preset_override_enforce(self): + """Should override enforce flag.""" + config = resolve_preset("safety", enforce=False) + assert config.enforce is False + + def test_resolve_preset_override_default_policy(self): + """Should override default policy.""" + config = resolve_preset("safety", default_policy="deny") + assert config.default_policy == "deny" + + def test_resolve_preset_append_rules(self): + """Should append additional rules.""" + extra = PolicyRule( + id="extra-rule", + name="Extra", + priority=100, + decision="deny", + match=PolicyMatch(tools=["Write"]), + ) + config = resolve_preset("safety", additional_rules=[extra]) + preset = get_preset("safety") + assert len(config.rules) == len(preset.config.rules) + 1 + assert config.rules[-1].id == "extra-rule" + + def test_resolve_preset_unknown_raises(self): + """Should raise for unknown preset.""" + with pytest.raises(ValueError, match="Unknown policy preset"): + resolve_preset("bogus") + + def test_policy_config_to_dict(self): + """Should convert config to dict.""" + config = resolve_preset("safety") + d = policy_config_to_dict(config) + assert d["default_policy"] == "allow" + assert d["enforce"] is True + assert d["preset"] == "safety" + assert isinstance(d["rules"], list) + + +class TestRateLimiter: + """Tests for sliding window rate limiter.""" + + def test_check_allows_when_empty(self): + """Should allow when no actions recorded.""" + limiter = RateLimiter() + result = limiter.check("key1", max_count=5, window_ms=60000) + assert result.allowed is True + assert result.current == 0 + assert result.limit == 5 + + def test_check_allows_under_limit(self): + """Should allow when under limit.""" + limiter = RateLimiter() + limiter.record("key1") + limiter.record("key1") + result = limiter.check("key1", max_count=5, window_ms=60000) + assert result.allowed is True + assert result.current == 2 + + def test_check_denies_at_limit(self): + """Should deny when at limit.""" + limiter = RateLimiter() + for _ in range(5): + limiter.record("key1") + result = limiter.check("key1", max_count=5, window_ms=60000) + assert result.allowed is False + assert result.current == 5 + + def test_check_denies_over_limit(self): + """Should deny when over limit.""" + limiter = RateLimiter() + for _ in range(10): + limiter.record("key1") + result = limiter.check("key1", max_count=5, window_ms=60000) + assert result.allowed is False + assert result.current == 10 + + def test_independent_keys(self): + """Keys should be tracked independently.""" + limiter = RateLimiter() + for _ in range(5): + limiter.record("bash") + limiter.record("read") + assert limiter.check("bash", 5, 60000).allowed is False + assert limiter.check("read", 5, 60000).allowed is True + + def test_key_count(self): + """Should report correct key count.""" + limiter = RateLimiter() + assert limiter.key_count == 0 + limiter.record("a") + limiter.record("b") + limiter.record("c") + assert limiter.key_count == 3 + + def test_count_method(self): + """Should report count per key.""" + limiter = RateLimiter() + assert limiter.count("new") == 0 + limiter.record("new") + limiter.record("new") + assert limiter.count("new") == 2 + + def test_make_key(self): + """Should build namespaced key.""" + assert RateLimiter.make_key("rule1", "Bash") == "ratelimit:rule1:Bash" + + +class TestAuditQuery: + """Tests for audit query and filtering.""" + + @pytest.fixture + def ledger(self, tmp_path): + """Create a ledger with test data.""" + db_path = tmp_path / "test.db" + ledger = Ledger(db_path) + + # Create test session + ledger.register_identity("lct:test", "machine", "user") + ledger.start_session("sess1", "lct:test", "project") + + # Record various audit entries + ledger.record_audit("sess1", "tool_use", "Read", "/foo/bar.ts", status="success", + r6_data={"request": {"category": "file_read"}}) + ledger.record_audit("sess1", "tool_use", "Bash", "ls -la", status="success", + r6_data={"request": {"category": "command"}}) + ledger.record_audit("sess1", "tool_use", "Bash", "rm -rf ./cache", status="error", + r6_data={"request": {"category": "command"}, "result": {"error_message": "blocked"}}) + ledger.record_audit("sess1", "tool_use", "WebFetch", "https://example.com", status="blocked", + r6_data={"request": {"category": "network"}}) + ledger.record_audit("sess1", "tool_use", "Read", "/src/main.py", status="success", + r6_data={"request": {"category": "file_read"}}) + + return ledger + + def test_query_all(self, ledger): + """Should return all records when no filter.""" + results = ledger.query_audit(session_id="sess1") + assert len(results) == 5 + + def test_query_by_tool(self, ledger): + """Should filter by tool name.""" + results = ledger.query_audit(session_id="sess1", tool="Bash") + assert len(results) == 2 + assert all(r["tool_name"] == "Bash" for r in results) + + def test_query_by_status(self, ledger): + """Should filter by status.""" + results = ledger.query_audit(session_id="sess1", status="error") + assert len(results) == 1 + assert results[0]["status"] == "error" + + def test_query_by_status_blocked(self, ledger): + """Should filter by blocked status.""" + results = ledger.query_audit(session_id="sess1", status="blocked") + assert len(results) == 1 + assert results[0]["tool_name"] == "WebFetch" + + def test_query_by_category(self, ledger): + """Should filter by category.""" + results = ledger.query_audit(session_id="sess1", category="file_read") + assert len(results) == 2 + + def test_query_by_target_pattern(self, ledger): + """Should filter by target glob pattern.""" + results = ledger.query_audit(session_id="sess1", target_pattern="*.py") + assert len(results) == 1 + assert "main.py" in results[0]["target"] + + def test_query_combined_filters(self, ledger): + """Should combine multiple filters.""" + results = ledger.query_audit(session_id="sess1", tool="Bash", status="success") + assert len(results) == 1 + + def test_query_limit(self, ledger): + """Should respect limit.""" + results = ledger.query_audit(session_id="sess1", limit=2) + assert len(results) == 2 + + def test_query_empty_results(self, ledger): + """Should return empty for no matches.""" + results = ledger.query_audit(session_id="sess1", tool="NonexistentTool") + assert len(results) == 0 + + def test_get_audit_stats(self, ledger): + """Should return aggregated stats.""" + stats = ledger.get_audit_stats(session_id="sess1") + assert stats["total"] == 5 + assert stats["tool_counts"]["Read"] == 2 + assert stats["tool_counts"]["Bash"] == 2 + assert stats["status_counts"]["success"] == 3 + assert stats["category_counts"]["file_read"] == 2 + + +class TestAuditReporter: + """Tests for audit reporter.""" + + def make_record(self, **kwargs): + """Create a test audit record.""" + defaults = { + "audit_id": "audit:test", + "session_id": "sess1", + "sequence": 1, + "action_type": "tool_use", + "tool_name": "Read", + "target": "/foo/bar.ts", + "status": "success", + "timestamp": "2026-01-27T10:00:00Z", + "r6_data": '{"request": {"category": "file_read"}, "result": {"duration_ms": 10}}', + } + defaults.update(kwargs) + return defaults + + def make_records(self): + """Create test records.""" + return [ + self.make_record(tool_name="Read", status="success", timestamp="2026-01-27T10:00:00Z"), + self.make_record(tool_name="Read", status="success", timestamp="2026-01-27T10:00:30Z"), + self.make_record(tool_name="Bash", status="success", timestamp="2026-01-27T10:01:00Z", + r6_data='{"request": {"category": "command"}, "result": {"duration_ms": 50}}'), + self.make_record(tool_name="Bash", status="error", timestamp="2026-01-27T10:01:30Z", + r6_data='{"request": {"category": "command"}, "result": {"error_message": "exit 1"}}'), + self.make_record(tool_name="WebFetch", status="blocked", timestamp="2026-01-27T10:02:00Z", + r6_data='{"request": {"category": "network"}}'), + ] + + def test_empty_records(self): + """Should handle empty records.""" + reporter = AuditReporter([]) + report = reporter.generate() + assert report.total_records == 0 + assert report.time_range is None + assert report.tool_stats == [] + assert report.category_breakdown == [] + + def test_total_records(self): + """Should compute correct total.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + assert report.total_records == 5 + + def test_time_range(self): + """Should compute time range.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + assert report.time_range is not None + assert "10:00:00" in report.time_range["from"] + assert "10:02:00" in report.time_range["to"] + + def test_tool_stats(self): + """Should aggregate per tool.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + bash = next(ts for ts in report.tool_stats if ts.tool == "Bash") + assert bash.invocations == 2 + assert bash.success_count == 1 + assert bash.error_count == 1 + + def test_tool_stats_success_rate(self): + """Should calculate success rate.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + read = next(ts for ts in report.tool_stats if ts.tool == "Read") + assert read.success_rate == 1.0 + + def test_tool_stats_sorted_by_invocations(self): + """Should sort by invocation count.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + # Read and Bash both have 2, but order may vary + assert report.tool_stats[0].invocations >= report.tool_stats[-1].invocations + + def test_category_breakdown(self): + """Should compute category breakdown.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + categories = {cb.category: cb.count for cb in report.category_breakdown} + assert categories["file_read"] == 2 + assert categories["command"] == 2 + assert categories["network"] == 1 + + def test_policy_stats(self): + """Should compute policy stats.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + assert report.policy_stats.total_evaluated == 5 + assert report.policy_stats.deny_count == 1 # blocked + assert report.policy_stats.allow_count == 4 + + def test_errors(self): + """Should aggregate errors.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + assert len(report.errors) == 1 + assert report.errors[0].tool == "Bash" + assert report.errors[0].count == 1 + + def test_timeline(self): + """Should bucket by minute.""" + reporter = AuditReporter(self.make_records()) + report = reporter.generate() + assert len(report.timeline) > 0 + # First minute should have 2 records + first_bucket = report.timeline[0] + assert first_bucket.count == 2 + + def test_format_text(self): + """Should produce text output.""" + reporter = AuditReporter(self.make_records()) + text = reporter.format_text() + assert "Audit Report" in text + assert "Tool Stats" in text + assert "Categories" in text + assert "Policy" in text + + def test_to_dict(self): + """Should convert to dict.""" + reporter = AuditReporter(self.make_records()) + d = reporter.to_dict() + assert d["total_records"] == 5 + assert isinstance(d["tool_stats"], list) + assert isinstance(d["category_breakdown"], list) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/web4-governance-issue.md b/web4-governance-issue.md new file mode 100644 index 0000000000..a99665590b --- /dev/null +++ b/web4-governance-issue.md @@ -0,0 +1,217 @@ +## Preflight Checklist + +- [x] I have searched existing requests and this feature hasn't been requested yet +- [x] This is a single feature request (not multiple features) + +## Problem Statement + +Claude Code agents currently operate with **zero governance or accountability infrastructure**. Every tool call (file edits, bash commands, network requests) executes immediately without: + +- **Audit trail** - No record of what the agent actually did, when, or why +- **Policy framework** - No way to gate dangerous operations before execution +- **Trust metrics** - No mechanism to track agent reliability over time +- **Provenance chain** - No cryptographic proof of action history + +This creates serious problems: + +**For enterprises**: Cannot deploy Claude Code in regulated environments (healthcare, finance, government) where audit trails are mandatory. No compliance story. + +**For teams**: Cannot answer "what did the agent change last week?" or "why did this file get modified?" No accountability when things go wrong. + +**For safety**: Destructive bash commands (`rm -rf`, `DROP TABLE`) execute without gates. No pre-action policy enforcement to prevent accidents. + +**For multi-agent systems**: When multiple agents or MCP servers interact, there's no trust framework. Every interaction starts from zero trust. + +The current model is "execute everything, ask questions never" - great for demos, problematic for production. + +## Proposed Solution + +Add an **optional governance plugin** that provides: + +### R6 Audit Trail +Every tool call gets a structured record: +- **R**ules: Constraints that applied +- **R**ole: Session identity and context +- **R**equest: Tool name, category, target +- **R**eference: Chain position (hash-linked provenance) +- **R**esource: (Optional) cost/token estimates +- **R**esult: Status, output hash, duration + +Records form a hash-linked chain stored in `~/.web4/audit/{session}.jsonl`, enabling: +- Post-session review ("show me all file writes from yesterday") +- Tamper detection (break one link, verify entire chain fails) +- Compliance reporting (export for auditors) + +### Policy Engine (Pre-Action Gating) +Rule-based gates that evaluate **before** tool execution: +```python +policy = { + "rules": [ + {"match": {"tools": ["Bash"]}, "decision": "warn"}, + {"match": {"targetPatterns": ["rm -rf"]}, "decision": "deny"} + ], + "defaultPolicy": "allow", + "enforce": True # or False for dry-run mode +} +``` + +Decisions: `allow`, `warn`, `deny` +Built-in presets: permissive, safety, strict, audit-only + +### Trust Accumulation (T3 Tensors) +Track agent/MCP reliability across dimensions: +- Completeness, Consistency, Timeliness, Reliability, Relevance, Transparency + +Integrates with policy: "Allow network calls only if agent reliability > 0.7" + +### Implementation +- **Hook-based**: Uses existing `session_start`, `pre_tool_use`, `post_tool_use` hooks +- **Zero core changes**: Plugin lives in `plugins/web4-governance/` +- **Opt-in**: Disabled by default, enable via `.claude/settings.json` +- **SQLite storage**: `~/.web4/ledger.db` with WAL mode for concurrent sessions +- **Observational by default**: Records everything, blocks nothing (unless policy enforces) + +## Alternative Solutions + +**Current workarounds:** +- Manual git commits after every session (tedious, incomplete) +- Wrapper scripts that log commands (misses file operations, breaks interactivity) +- "Trust but verify" - inspect git diffs after the fact (too late if damage done) +- Don't use Claude Code in production (limits adoption) + +**Why those don't work:** +- No structured format for replay/analysis +- No pre-action policy enforcement +- No cryptographic proof of integrity +- Can't answer "what would have happened?" for policy dry-runs + +**Other tools:** +- Traditional audit logs (syslog, etc.) - not agent-aware, miss intent +- Git history - only captures committed changes, not attempted actions +- IDE activity logs - don't understand agent semantics + +None capture **agent intent** (the "why") alongside action and result. + +## Priority + +**High - Significant impact on productivity** + +Blocks enterprise adoption. Teams want Claude Code but can't deploy without governance story. + +## Feature Category + +**Configuration and settings** + +Also touches: CLI commands, Hooks, Developer tools/SDK + +## Use Case Example + +### Scenario 1: Enterprise Compliance (Healthcare Startup) + +**Context**: Medical software company wants to use Claude Code for refactoring. HIPAA requires audit trails of all data access. + +**Without governance:** +- Cannot deploy - no audit trail +- Manual logging insufficient for compliance +- Forced to use other tools + +**With governance:** +1. Enable web4-governance plugin with `audit-only` preset +2. Developers use Claude Code normally +3. Audit queries: `ledger.query_audit(tool="Read", targetPattern="*/patient/*")` +4. Export audit trail for compliance review +5. Demonstrate to auditors: cryptographically verified action history + +**Impact**: Unlocks Claude Code for regulated industries. + +### Scenario 2: Team Debugging (Open Source Project) + +**Context**: Junior dev's Claude Code session went wrong - production configs overwritten. + +**Without governance:** +- "I don't know what happened" +- Git shows final state, not agent's intent +- Team loses trust in AI tools + +**With governance:** +1. Review audit trail: `ledger.query_audit(session_id="abc123", tool="Write")` +2. See exact sequence: agent misunderstood context, wrote staging config to prod +3. Reconstruct decision tree from R6 records +4. Update policy: `deny writes to */config/prod/* without confirmation` +5. Share learnings, improve prompts + +**Impact**: Transforms failures into learning opportunities. + +### Scenario 3: Multi-Agent Federation (Advanced Usage) + +**Context**: Multiple Claude Code sessions + MCP servers collaborating on codebase. + +**Without governance:** +- No trust metrics between agents +- Every agent starts from zero trust +- Race conditions from concurrent edits +- Can't answer "which agent changed this?" + +**With governance:** +1. Each agent/MCP builds trust history (T3 tensors) +2. High-trust agents get broader permissions +3. Witnessing: agents observe each other's actions +4. Audit trails show "Agent A called MCP B, which modified file C" +5. Policy: "Only agents with consistency > 0.8 can edit core files" + +**Impact**: Enables safe multi-agent systems. + +## Additional Context + +### Working Implementation + +I've built a complete working version as a PR: **#20448** +- 75+ passing tests +- SQLite ledger with WAL mode +- Hook integration (session_start, pre_tool_use, post_tool_use) +- Policy presets, rate limiting, audit query/reporting +- PolicyEntity (policies as first-class trust participants) + +Code is ready to review/merge if this aligns with Claude Code's vision. + +### Why This Matters Now + +AI agents are moving from demos to production. Claude Code is ahead of the curve in functionality, but **governance is the missing piece** for serious adoption. + +Every other agent framework will add governance eventually. Claude Code can lead by: +1. Making it optional (doesn't slow down existing users) +2. Making it extensible (community can build policy presets) +3. Making it open (not black-box enterprise-only feature) + +### Trust-Native Computing + +This implements concepts from "Web4" - trust-native internet infrastructure: +- LCTs (Linked Context Tokens): Session identity +- R6 framework: Structured intent capture +- T3 tensors: Multi-dimensional trust +- Witnessing chains: Cryptographic provenance + +Not asking you to adopt the "Web4" branding - just showing this is part of a larger architectural vision for AI-native trust infrastructure. + +### Questions for Maintainers + +1. **Interest**: Does governance/audit functionality align with Claude Code's roadmap? +2. **Scope**: Should this be core feature, official plugin, or community plugin? +3. **Approach**: Any concerns with the hook-based implementation? +4. **Naming**: "web4-governance" or something more generic like "audit-policy"? + +Happy to: +- Adapt the implementation to your preferences +- Break into smaller incremental PRs +- Write documentation/examples +- Maintain as official or community plugin + +### Links + +- PR #20448: https://github.com/anthropics/claude-code/pull/20448 +- Web4 framework: https://github.com/dp-web4/web4 +- Working demo/tests in PR + +--- + +*Note: "web4" used generically to describe trust-native, cryptographically-accountable infrastructure for the AI agent era. Not a trademark claim.*