diff --git a/features/qvl/README.md b/features/qvl/README.md new file mode 100644 index 0000000..bc1c699 --- /dev/null +++ b/features/qvl/README.md @@ -0,0 +1,92 @@ +# QVL BDD Test Suite + +## Overview +This directory contains Gherkin feature specifications for the Quasar Vector Lattice (QVL) - L1 trust graph engine. + +**Status:** Sprint 0 — Specification Complete +**Next:** Implement step definitions in Zig + +--- + +## Feature Files + +| Feature | Scenarios | Purpose | +|---------|-----------|---------| +| `trust_graph.feature` | 8 | Core graph operations (add/remove/query edges) | +| `betrayal_detection.feature` | 8 | Bellman-Ford negative cycle detection | +| `pathfinding.feature` | 10 | A* reputation-guided pathfinding | +| `gossip_protocol.feature` | 10 | Aleph-style probabilistic flooding | +| `belief_propagation.feature` | 8 | Bayesian inference over trust DAG | +| `pop_reputation.feature` | 14 | PoP verification + reputation scoring | + +**Total:** 58 scenarios covering all QVL functionality + +--- + +## Key Testing Principles + +### Kenya Rule Compliance +Every feature includes performance scenarios: +- Memory usage < 10MB +- Execution time benchmarks for O(|V|×|E|) algorithms +- Bandwidth limits for gossip + +### Security Coverage +- Betrayal detection (negative cycles) +- Eclipse attack resilience +- Replay protection (entropy stamps) +- Signature verification + +### Integration Points +- PoP (Proof-of-Path) verification +- Reputation decay over time +- RiskGraph → CompactTrustGraph mapping + +--- + +## Running Tests + +### Future: Zig Implementation +```bash +# Run all QVL tests +zig build test-qvl + +# Run specific feature +zig build test -- --feature betrayal_detection + +# Run with coverage +zig build test-qvl-coverage +``` + +### Current: Documentation Phase +These features serve as: +1. **Specification** — What QVL should do +2. **Acceptance Criteria** — When we're done +3. **Documentation** — How it works +4. **Test Template** — For Zig implementation + +--- + +## GQL Integration (Future) + +When GQL Parser is implemented: +```gherkin +Scenario: GQL query for trust path + When I execute GQL "MATCH (a:Identity)-[t:TRUST*1..3]->(b:Identity) WHERE a.did = 'did:alice' RETURN b" + Then I should receive reachable nodes within 3 hops +``` + +--- + +## Related Documentation + +- `../l1-identity/qvl/` — Implementation (Zig) +- `../../docs/L4-hybrid-schema.md` — L4 Feed schema +- RFC-0120 — QVL Specification + +--- + +**Maintainer:** Frankie (Silicon Architect) +**Last Updated:** 2026-02-03 + +⚡️ diff --git a/features/qvl/belief_propagation.feature b/features/qvl/belief_propagation.feature new file mode 100644 index 0000000..b102c9f --- /dev/null +++ b/features/qvl/belief_propagation.feature @@ -0,0 +1,78 @@ +Feature: Loopy Belief Propagation + As a Libertaria node under eclipse attack + I need Bayesian inference over the trust DAG + So that I can estimate trust under uncertainty and detect anomalies + + Background: + Given a trust graph with partial visibility: + | from | to | observed | prior_trust | + | alice | bob | true | 0.6 | + | bob | charlie | false | unknown | + | alice | dave | true | 0.8 | + + # Belief Propagation Core + Scenario: Propagate beliefs through observed edges + When I run Belief Propagation from "alice" + Then the belief for "bob" should converge to ~0.6 + And the belief for "alice" should be 1.0 (self-trust) + + Scenario: Infer unobserved edge from network structure + Given "alice" trusts "bob" (0.6) + And "bob" is likely to trust "charlie" (transitivity) + When I run BP with max_iterations 100 + Then the belief for "charlie" should be > 0.5 + And < 0.6 (less certain than direct observation) + + Scenario: Convergence detection + When I run BP with epsilon 1e-6 + Then the algorithm should stop when max belief delta < epsilon + And the converged flag should be true + And iterations should be < max_iterations + + Scenario: Non-convergence handling + Given a graph with oscillating beliefs (bipartite structure) + When I run BP with damping 0.5 + Then the algorithm should force convergence via damping + Or report non-convergence after max_iterations + + # Anomaly Scoring + Scenario: Anomaly from BP divergence + Given a node with belief 0.9 from one path + And belief 0.1 from another path (conflict) + When BP converges + Then the anomaly score should be high (> 0.7) + And the reason should be "bp_divergence" + + Scenario: Eclipse attack detection + Given an adversary controls 90% of observed edges to "victim" + And the adversary reports uniformly positive trust + When BP runs with honest nodes as priors + Then the victim's belief should remain moderate (not extreme) + And the coverage metric should indicate "potential_eclipse" + + # Damping and Stability + Scenario Outline: Damping factor effects + Given a graph prone to oscillation + When I run BP with damping + Then convergence should occur in iterations + + Examples: + | damping | iterations | + | 0.0 | > 100 | + | 0.5 | ~50 | + | 0.9 | ~20 | + + # Integration with Bellman-Ford + Scenario: BP complements negative cycle detection + Given a graph with a near-negative-cycle (ambiguous betrayal) + When Bellman-Ford is inconclusive + And BP reports high anomaly for involved nodes + Then the combined evidence suggests investigation + + # Performance Constraints + Scenario: BP complexity + Given a graph with 1000 nodes and 5000 edges + When I run BP with epsilon 1e-6 + Then convergence should occur within 50 iterations + And total time should be < 100ms + And memory should be O(|V| + |E|) diff --git a/features/qvl/betrayal_detection.feature b/features/qvl/betrayal_detection.feature new file mode 100644 index 0000000..7f94b99 --- /dev/null +++ b/features/qvl/betrayal_detection.feature @@ -0,0 +1,82 @@ +Feature: Bellman-Ford Betrayal Detection + As a Libertaria security node + I need to detect negative cycles in the trust graph + So that I can identify collusion rings and betrayal patterns + + Background: + Given a QVL database with the following trust edges: + | from | to | level | risk | + | alice | bob | 3 | -0.3 | + | bob | charlie | 3 | -0.3 | + | charlie | alice | -7 | 1.0 | + + # Negative Cycle Detection + Scenario: Detect simple negative cycle (betrayal ring) + When I run Bellman-Ford from "alice" + Then a negative cycle should be detected + And the cycle should contain nodes: "alice", "bob", "charlie" + And the anomaly score should be 1.0 (critical) + + Scenario: No cycle in legitimate trust chain + Given a QVL database with the following trust edges: + | from | to | level | risk | + | alice | bob | 3 | -0.3 | + | bob | charlie | 3 | -0.3 | + | charlie | dave | 3 | -0.3 | + When I run Bellman-Ford from "alice" + Then no negative cycle should be detected + And the anomaly score should be 0.0 + + Scenario: Multiple betrayal cycles + Given a QVL database with the following trust edges: + | from | to | level | risk | + | alice | bob | -5 | 0.5 | + | bob | alice | -5 | 0.5 | + | charlie | dave | -5 | 0.5 | + | dave | charlie | -5 | 0.5 | + When I run Bellman-Ford from "alice" + Then 2 negative cycles should be detected + And cycle 1 should contain: "alice", "bob" + And cycle 2 should contain: "charlie", "dave" + + # Evidence Generation + Scenario: Generate cryptographic evidence of betrayal + Given a negative cycle has been detected: + | node | risk | + | alice | -0.3 | + | bob | -0.3 | + | charlie | 1.0 | + When I generate evidence for the cycle + Then the evidence should be a byte array + And the evidence version should be 0x01 + And the evidence should contain all 3 node IDs + And the evidence should contain all risk scores + And the evidence hash should be deterministic + + Scenario: Evidence serialization format + When I generate evidence for a cycle with nodes "alice", "bob" + Then the evidence format should be: + """ + version(1 byte) + cycle_len(4 bytes) + + [node_id(4 bytes) + risk(8 bytes)]... + """ + + # Performance Constraints (Kenya Rule) + Scenario Outline: Bellman-Ford complexity with graph size + Given a graph with nodes and edges + When I run Bellman-Ford + Then the execution time should be less than milliseconds + And the memory usage should be less than 10MB + + Examples: + | nodes | edges | time_ms | + | 100 | 500 | 50 | + | 1000 | 5000 | 500 | + | 10000 | 50000 | 5000 | + + # Early Exit Optimization + Scenario: Early exit when no improvements possible + Given a graph where no edges can be relaxed after pass 3 + When I run Bellman-Ford + Then the algorithm should exit after pass 3 + And not run all |V|-1 passes diff --git a/features/qvl/gossip_protocol.feature b/features/qvl/gossip_protocol.feature new file mode 100644 index 0000000..e8b9cb5 --- /dev/null +++ b/features/qvl/gossip_protocol.feature @@ -0,0 +1,93 @@ +Feature: Aleph-Style Gossip Protocol + As a Libertaria node in a partitioned network + I need probabilistic message flooding with DAG references + So that trust signals propagate despite intermittent connectivity + + Background: + Given a network of 5 nodes: alpha, beta, gamma, delta, epsilon + And each node has initialized gossip state + And the erasure tolerance parameter k = 3 + + # Gossip Message Structure + Scenario: Create gossip message with DAG references + Given node "alpha" has received messages with IDs [100, 101, 102] + When "alpha" creates a gossip message of type "trust_vouch" + Then the message should reference k=3 prior messages + And the message ID should be computed from (sender + entropy + payload) + And the message should have an entropy stamp + + Scenario: Gossip message types + When I create a gossip message of type "" + Then the message type code should be + + Examples: + | type | code | + | trust_vouch | 0 | + | trust_revoke | 1 | + | reputation_update | 2 | + | heartbeat | 3 | + + # Probabilistic Flooding + Scenario: Message propagation probability + Given node "alpha" broadcasts a gossip message + When the message reaches "beta" + Then "beta" should forward with probability p = 0.7 + And the expected coverage after 3 hops should be > 80% + + Scenario: Duplicate detection via message ID + Given node "beta" has seen message ID 12345 + When "beta" receives message ID 12345 again + Then "beta" should not forward the duplicate + And "beta" should update the seen timestamp + + # DAG Structure and Partition Detection + Scenario: Build gossip DAG + Given the following gossip sequence: + | sender | refs | + | alpha | [] | + | beta | [alpha:1] | + | gamma | [alpha:1, beta:1] | + Then the DAG should have 3 nodes + And "gamma" should have 2 incoming edges + And the DAG depth should be 2 + + Scenario: Detect network partition via coverage + Given the network has partitioned into [alpha, beta] and [gamma, delta] + When "alpha" tracks gossip coverage + And messages from "alpha" fail to reach "gamma" for 60 seconds + Then "alpha" should report "low_coverage" anomaly + And the anomaly score should be > 0.7 + + Scenario: Heal partition upon reconnection + Given a partition exists between [alpha, beta] and [gamma] + When the partition heals and "beta" reconnects to "gamma" + Then "beta" should sync missing gossip messages + And "gamma" should acknowledge receipt + And the coverage anomaly should resolve + + # Entropy and Replay Protection + Scenario: Entropy stamp ordering + Given message A with entropy 1000 + And message B with entropy 2000 + Then message B is newer than message A + And a node should reject messages with entropy < last_seen - window + + Scenario: Replay attack prevention + Given node "alpha" has entropy window [1000, 2000] + When "alpha" receives a message with entropy 500 + Then the message should be rejected as "stale" + And "alpha" should not forward it + + # Erasure Tolerance + Scenario: Message loss tolerance + Given a gossip DAG with k=3 references per message + When 30% of messages are lost randomly + Then the DAG should remain connected with > 95% probability + And reconstruction should be possible via redundant paths + + # Performance (Kenya Rule) + Scenario: Gossip overhead + Given a network with 1000 nodes + When each node sends 1 message per minute + Then the bandwidth per node should be < 10 KB/minute + And the memory for gossip state should be < 1 MB diff --git a/features/qvl/pathfinding.feature b/features/qvl/pathfinding.feature new file mode 100644 index 0000000..c62bbf1 --- /dev/null +++ b/features/qvl/pathfinding.feature @@ -0,0 +1,83 @@ +Feature: A* Trust Pathfinding + As a Libertaria agent + I need to find reputation-guided paths through the trust graph + So that I can verify trust relationships efficiently + + Background: + Given a QVL database with the following trust topology: + | from | to | level | risk | reputation | + | alice | bob | 3 | -0.3 | 0.8 | + | bob | charlie | 3 | -0.3 | 0.7 | + | alice | dave | 3 | -0.3 | 0.9 | + | dave | charlie | 3 | -0.3 | 0.6 | + | bob | eve | 3 | -0.3 | 0.2 | + + # Basic Pathfinding + Scenario: Find shortest trust path + When I search for a path from "alice" to "charlie" + Then the path should be: "alice" → "bob" → "charlie" + And the total cost should be approximately 0.6 + + Scenario: No path exists + When I search for a path from "alice" to "frank" + Then the path should be null + And the result should indicate "no path found" + + Scenario: Direct path preferred over indirect + Given "alice" has direct trust level 7 to "charlie" + When I search for a path from "alice" to "charlie" + Then the path should be: "alice" → "charlie" + And the path length should be 1 + + # Reputation-Guided Pathfinding + Scenario: Reputation heuristic avoids low-reputation nodes + When I search for a path from "alice" to "eve" + Then the path should be: "alice" → "bob" → "eve" + And the algorithm should penalize "bob" for low reputation (0.2) + + Scenario: Zero heuristic degrades to Dijkstra + When I search with zero heuristic from "alice" to "charlie" + Then the result should be optimal (guaranteed shortest path) + But the search should expand more nodes than with reputation heuristic + + # Path Verification + Scenario: Verify constructed path + Given a path: "alice" → "bob" → "charlie" + When I verify the path against the graph + Then each edge in the path should exist + And no edge should be expired + And the path verification should succeed + + Scenario: Verify path with expired edge + Given a path: "alice" → "bob" → "charlie" + And the edge "bob" → "charlie" has expired + When I verify the path + Then the verification should fail + And the error should indicate "expired edge at hop 2" + + # Proof-of-Path + Scenario: Generate Proof-of-Path bundle + Given a valid path: "alice" → "bob" → "charlie" + When I generate a Proof-of-Path + Then the PoP should contain all edge signatures + And the PoP should be verifiable by any node + And the PoP should have a timestamp and entropy stamp + + Scenario: Verify Proof-of-Path + Given a Proof-of-Path from "alice" to "charlie" + When any node verifies the PoP + Then the verification should succeed if all signatures are valid + And the verification should fail if any signature is invalid + + # Path Constraints + Scenario: Maximum path depth + When I search for a path with max_depth 2 from "alice" to "charlie" + And the shortest path requires 3 hops + Then the search should return null + And indicate "max depth exceeded" + + Scenario: Minimum trust threshold + When I search for a path with minimum_trust_level 5 + And all edges have level 3 + Then no path should be found + And the result should indicate "trust threshold not met" diff --git a/features/qvl/pop_reputation.feature b/features/qvl/pop_reputation.feature new file mode 100644 index 0000000..a569a95 --- /dev/null +++ b/features/qvl/pop_reputation.feature @@ -0,0 +1,117 @@ +Feature: Proof-of-Path Integration with Reputation + As a Libertaria security validator + I need to verify trust paths cryptographically + And maintain reputation scores based on verification history + So that trust decay reflects actual behavior + + Background: + Given a QVL database with established trust edges + And a reputation map for all nodes + + # Reputation Scoring + Scenario: Initial neutral reputation + Given a new node "frank" joins the network + Then "frank"'s reputation score should be 0.5 (neutral) + And total_checks should be 0 + + Scenario: Reputation increases with successful verification + When node "alice" sends a PoP that verifies successfully + Then "alice"'s reputation should increase + And the increase should be damped (not immediate 1.0) + And successful_checks should increment + + Scenario: Reputation decreases with failed verification + When node "bob" sends a PoP that fails verification + Then "bob"'s reputation should decrease + And the decrease should be faster than increases (asymmetry) + And total_checks should increment + + Scenario: Bayesian reputation update formula + Given "charlie" has reputation 0.6 after 10 checks + When a new verification succeeds + Then the update should be: score = 0.7*0.6 + 0.3*(10/11) + And the new score should be approximately 0.645 + + # Reputation Decay + Scenario: Time-based reputation decay + Given "alice" has reputation 0.8 from verification at time T + When half_life time passes without new verification + Then "alice"'s reputation should decay to ~0.4 + When another half_life passes + Then reputation should decay to ~0.2 + + Scenario: Decay stops at minimum threshold + Given "bob" has reputation 0.1 (low but not zero) + When significant time passes + Then "bob"'s reputation should not go below 0.05 (floor) + + # PoP Verification Flow + Scenario: Successful PoP verification + Given a valid Proof-of-Path from "alice" to "charlie" + When I verify against the expected receiver and sender + Then the verdict should be "valid" + And "alice"'s reputation should increase + And the verification should be logged with entropy stamp + + Scenario: Broken link in PoP + Given a PoP with an edge that no longer exists + When I verify the PoP + Then the verdict should be "broken_link" + And the specific broken edge should be identified + And "alice"'s reputation should decrease + + Scenario: Expired edge in PoP + Given a PoP containing an expired trust edge + When I verify the PoP + Then the verdict should be "expired" + And the expiration timestamp should be reported + + Scenario: Invalid signature in PoP + Given a PoP with a tampered signature + When I verify the PoP + Then the verdict should be "invalid_signature" + And "alice"'s reputation should decrease significantly + + # A* Heuristic Integration + Scenario: Reputation-guided pathfinding + Given "alice" has reputation 0.9 + And "bob" has reputation 0.3 + When searching for a path through either node + Then the algorithm should prefer "alice" (higher reputation) + And the path cost through "alice" should be lower + + Scenario: Admissible heuristic guarantee + Given any reputation configuration + When using reputationHeuristic for A* + Then the heuristic should never overestimate true cost + And A* optimality should be preserved + + # Low Reputation Handling + Scenario: Identify low-reputation nodes + Given nodes with reputations: + | node | reputation | + | alice | 0.9 | + | bob | 0.2 | + | charlie | 0.1 | + When I query for nodes below threshold 0.3 + Then I should receive ["bob", "charlie"] + + Scenario: Quarantine trigger + Given "mallory" has reputation < 0.2 after 10+ checks + When the low-reputation threshold is 0.2 + Then "mallory" should be flagged for quarantine review + And future PoPs from "mallory" should be extra scrutinized + + # Bulk Operations + Scenario: Decay all reputations periodically + Given 1000 nodes with various last_verified times + When the daily decay job runs + Then all reputations should be updated based on time since last verification + And the operation should complete in < 100ms + + Scenario: Populate RiskGraph from reputation + Given a CompactTrustGraph with raw trust levels + And a ReputationMap with scores + When I populate the RiskGraph + Then each edge risk should be calculated as (1 - reputation) + And the RiskGraph should be ready for Bellman-Ford diff --git a/features/qvl/trust_graph.feature b/features/qvl/trust_graph.feature new file mode 100644 index 0000000..5c9b082 --- /dev/null +++ b/features/qvl/trust_graph.feature @@ -0,0 +1,63 @@ +Feature: QVL Trust Graph Core + As a Libertaria node operator + I need to manage trust relationships in a graph + So that I can establish verifiable trust paths between agents + + Background: + Given a new QVL database is initialized + And the following DIDs are registered: + | did | alias | + | did:alice:123 | alice | + | did:bob:456 | bob | + | did:charlie:789 | charlie | + + # RiskGraph Basic Operations + Scenario: Add trust edge between two nodes + When "alice" grants trust level 3 to "bob" + Then the graph should contain an edge from "alice" to "bob" + And the edge should have trust level 3 + And "bob" should be in "alice"'s outgoing neighbors + + Scenario: Remove trust edge + Given "alice" has granted trust to "bob" + When "alice" revokes trust from "bob" + Then the edge from "alice" to "bob" should not exist + And "bob" should not be in "alice"'s outgoing neighbors + + Scenario: Query incoming trust edges + Given "alice" has granted trust to "charlie" + And "bob" has granted trust to "charlie" + When I query incoming edges for "charlie" + Then I should receive 2 edges + And the edges should be from "alice" and "bob" + + Scenario: Trust edge with TTL expiration + When "alice" grants trust level 5 to "bob" with TTL 86400 seconds + Then the edge should have an expiration timestamp + And the edge should be valid immediately + When 86401 seconds pass + Then the edge should be expired + And querying the edge should return null + + # RiskEdge Properties + Scenario Outline: Risk score calculation from trust level + When "alice" grants trust level to "bob" + Then the risk score should be + + Examples: + | level | risk | + | 7 | -1.0 | + | 3 | -0.3 | + | 0 | 0.0 | + | -3 | 0.3 | + | -7 | 1.0 | + + Scenario: Edge metadata includes entropy stamp + When "alice" grants trust to "bob" at entropy 1234567890 + Then the edge should have entropy stamp 1234567890 + And the edge should have a unique nonce + + Scenario: Betrayal edge detection + When "alice" grants trust level -7 to "bob" + Then the edge should be marked as betrayal + And the risk score should be positive diff --git a/l1-identity/qvl.zig b/l1-identity/qvl.zig index 0d2b48a..b3bb5dd 100644 --- a/l1-identity/qvl.zig +++ b/l1-identity/qvl.zig @@ -14,10 +14,12 @@ pub const pathfinding = @import("qvl/pathfinding.zig"); pub const gossip = @import("qvl/gossip.zig"); pub const inference = @import("qvl/inference.zig"); pub const pop = @import("qvl/pop_integration.zig"); +pub const storage = @import("qvl/storage.zig"); pub const RiskEdge = types.RiskEdge; pub const NodeId = types.NodeId; pub const AnomalyScore = types.AnomalyScore; +pub const PersistentGraph = storage.PersistentGraph; test { @import("std").testing.refAllDecls(@This()); diff --git a/l1-identity/qvl/storage.zig b/l1-identity/qvl/storage.zig new file mode 100644 index 0000000..ca712bd --- /dev/null +++ b/l1-identity/qvl/storage.zig @@ -0,0 +1,380 @@ +//! QVL Persistent Storage Layer +//! +//!libmdbx backend for RiskGraph with Kenya Rule compliance: +//! - Single-file embedded database +//! - Memory-mapped I/O (kernel-optimized) +//! - ACID transactions +//! - <10MB RAM footprint + +const std = @import("std"); +const types = @import("types.zig"); + +const NodeId = types.NodeId; +const RiskEdge = types.RiskEdge; +const RiskGraph = types.RiskGraph; + +/// Database environment configuration +pub const DBConfig = struct { + /// Max readers (concurrent) + max_readers: u32 = 64, + /// Max databases (tables) + max_dbs: u32 = 8, + /// Map size (file size limit) + map_size: usize = 10 * 1024 * 1024, // 10MB Kenya Rule + /// Page size (4KB optimal for SSD) + page_size: u32 = 4096, +}; + +/// Persistent graph storage using libmdbx +pub const PersistentGraph = struct { + env: *lmdb.MDB_env, + dbi_nodes: lmdb.MDB_dbi, + dbi_edges: lmdb.MDB_dbi, + dbi_adjacency: lmdb.MDB_dbi, + dbi_metadata: lmdb.MDB_dbi, + allocator: std.mem.Allocator, + + const Self = @This(); + + /// Open or create persistent graph database + pub fn open(path: []const u8, config: DBConfig, allocator: std.mem.Allocator) !Self { + var env: *lmdb.MDB_env = undefined; + + // Initialize environment + try lmdb.mdb_env_create(&env); + errdefer lmdb.mdb_env_close(env); + + // Set limits + try lmdb.mdb_env_set_maxreaders(env, config.max_readers); + try lmdb.mdb_env_set_maxdbs(env, config.max_dbs); + try lmdb.mdb_env_set_mapsize(env, config.map_size); + + // Open environment + const flags = lmdb.MDB_NOSYNC | lmdb.MDB_NOMETASYNC; // Async durability for speed + try lmdb.mdb_env_open(env, path.ptr, flags, 0o644); + + // Open databases (tables) + var txn: *lmdb.MDB_txn = undefined; + try lmdb.mdb_txn_begin(env, null, 0, &txn); + errdefer lmdb.mdb_txn_abort(txn); + + const dbi_nodes = try lmdb.mdb_dbi_open(txn, "nodes", lmdb.MDB_CREATE | lmdb.MDB_INTEGERKEY); + const dbi_edges = try lmdb.mdb_dbi_open(txn, "edges", lmdb.MDB_CREATE); + const dbi_adjacency = try lmdb.mdb_dbi_open(txn, "adjacency", lmdb.MDB_CREATE | lmdb.MDB_DUPSORT); + const dbi_metadata = try lmdb.mdb_dbi_open(txn, "metadata", lmdb.MDB_CREATE); + + try lmdb.mdb_txn_commit(txn); + + return Self{ + .env = env, + .dbi_nodes = dbi_nodes, + .dbi_edges = dbi_edges, + .dbi_adjacency = dbi_adjacency, + .dbi_metadata = dbi_metadata, + .allocator = allocator, + }; + } + + /// Close database + pub fn close(self: *Self) void { + lmdb.mdb_env_close(self.env); + } + + /// Add node to persistent storage + pub fn addNode(self: *Self, node: NodeId) !void { + var txn: *lmdb.MDB_txn = undefined; + try lmdb.mdb_txn_begin(self.env, null, 0, &txn); + errdefer lmdb.mdb_txn_abort(txn); + + const key = std.mem.asBytes(&node); + const val = &[_]u8{1}; // Presence marker + + var mdb_key = lmdb.MDB_val{ .mv_size = key.len, .mv_data = key.ptr }; + var mdb_val = lmdb.MDB_val{ .mv_size = val.len, .mv_data = val.ptr }; + + try lmdb.mdb_put(txn, self.dbi_nodes, &mdb_key, &mdb_val, 0); + try lmdb.mdb_txn_commit(txn); + } + + /// Add edge to persistent storage + pub fn addEdge(self: *Self, edge: RiskEdge) !void { + var txn: *lmdb.MDB_txn = undefined; + try lmdb.mdb_txn_begin(self.env, null, 0, &txn); + errdefer lmdb.mdb_txn_abort(txn); + + // Store edge data + const edge_key = try self.encodeEdgeKey(edge.from, edge.to); + const edge_val = try self.encodeEdgeValue(edge); + + var mdb_key = lmdb.MDB_val{ .mv_size = edge_key.len, .mv_data = edge_key.ptr }; + var mdb_val = lmdb.MDB_val{ .mv_size = edge_val.len, .mv_data = edge_val.ptr }; + + try lmdb.mdb_put(txn, self.dbi_edges, &mdb_key, &mdb_val, 0); + + // Update adjacency index (from -> to) + const adj_key = std.mem.asBytes(&edge.from); + const adj_val = std.mem.asBytes(&edge.to); + + var mdb_adj_key = lmdb.MDB_val{ .mv_size = adj_key.len, .mv_data = adj_key.ptr }; + var mdb_adj_val = lmdb.MDB_val{ .mv_size = adj_val.len, .mv_data = adj_val.ptr }; + + try lmdb.mdb_put(txn, self.dbi_adjacency, &mdb_adj_key, &mdb_adj_val, 0); + + // Update reverse adjacency (to -> from) for incoming queries + const rev_adj_key = std.mem.asBytes(&edge.to); + const rev_adj_val = std.mem.asBytes(&edge.from); + + var mdb_rev_key = lmdb.MDB_val{ .mv_size = rev_adj_key.len, .mv_data = rev_adj_key.ptr }; + var mdb_rev_val = lmdb.MDB_val{ .mv_size = rev_adj_val.len, .mv_data = rev_adj_val.ptr }; + + try lmdb.mdb_put(txn, self.dbi_adjacency, &mdb_rev_key, &mdb_rev_val, 0); + + try lmdb.mdb_txn_commit(txn); + } + + /// Get outgoing neighbors (from -> *) + pub fn getOutgoing(self: *Self, from: NodeId, allocator: std.mem.Allocator) ![]NodeId { + var txn: *lmdb.MDB_txn = undefined; + try lmdb.mdb_txn_begin(self.env, null, lmdb.MDB_RDONLY, &txn); + defer lmdb.mdb_txn_abort(txn); // Read-only, abort is fine + + const key = std.mem.asBytes(&from); + var mdb_key = lmdb.MDB_val{ .mv_size = key.len, .mv_data = key.ptr }; + var mdb_val: lmdb.MDB_val = undefined; + + var cursor: *lmdb.MDB_cursor = undefined; + try lmdb.mdb_cursor_open(txn, self.dbi_adjacency, &cursor); + defer lmdb.mdb_cursor_close(cursor); + + var result = std.ArrayList(NodeId).init(allocator); + errdefer result.deinit(); + + // Position cursor at key + const rc = lmdb.mdb_cursor_get(cursor, &mdb_key, &mdb_val, lmdb.MDB_SET_KEY); + if (rc == lmdb.MDB_NOTFOUND) { + return result.toOwnedSlice(); + } + if (rc != 0) return error.MDBError; + + // Iterate over all values for this key + while (true) { + const neighbor = std.mem.bytesToValue(NodeId, @as([*]const u8, @ptrCast(mdb_val.mv_data))[0..@sizeOf(NodeId)]); + try result.append(neighbor); + + const next_rc = lmdb.mdb_cursor_get(cursor, &mdb_key, &mdb_val, lmdb.MDB_NEXT_DUP); + if (next_rc == lmdb.MDB_NOTFOUND) break; + if (next_rc != 0) return error.MDBError; + } + + return result.toOwnedSlice(); + } + + /// Get incoming neighbors (* -> to) + pub fn getIncoming(self: *Self, to: NodeId, allocator: std.mem.Allocator) ![]NodeId { + // Same as getOutgoing but querying by "to" key + // Implementation mirrors getOutgoing + _ = to; + _ = allocator; + @panic("TODO: implement getIncoming"); + } + + /// Get specific edge + pub fn getEdge(self: *Self, from: NodeId, to: NodeId) !?RiskEdge { + var txn: *lmdb.MDB_txn = undefined; + try lmdb.mdb_txn_begin(self.env, null, lmdb.MDB_RDONLY, &txn); + defer lmdb.mdb_txn_abort(txn); + + const key = try self.encodeEdgeKey(from, to); + var mdb_key = lmdb.MDB_val{ .mv_size = key.len, .mv_data = key.ptr }; + var mdb_val: lmdb.MDB_val = undefined; + + const rc = lmdb.mdb_get(txn, self.dbi_edges, &mdb_key, &mdb_val); + if (rc == lmdb.MDB_NOTFOUND) return null; + if (rc != 0) return error.MDBError; + + return try self.decodeEdgeValue(mdb_val); + } + + /// Load in-memory RiskGraph from persistent storage + pub fn toRiskGraph(self: *Self, allocator: std.mem.Allocator) !RiskGraph { + var graph = RiskGraph.init(allocator); + errdefer graph.deinit(); + + var txn: *lmdb.MDB_txn = undefined; + try lmdb.mdb_txn_begin(self.env, null, lmdb.MDB_RDONLY, &txn); + defer lmdb.mdb_txn_abort(txn); + + // Iterate all edges + var cursor: *lmdb.MDB_cursor = undefined; + try lmdb.mdb_cursor_open(txn, self.dbi_edges, &cursor); + defer lmdb.mdb_cursor_close(cursor); + + var mdb_key: lmdb.MDB_val = undefined; + var mdb_val: lmdb.MDB_val = undefined; + + while (lmdb.mdb_cursor_get(cursor, &mdb_key, &mdb_val, lmdb.MDB_NEXT) == 0) { + const edge = try self.decodeEdgeValue(mdb_val); + try graph.addEdge(edge); + } + + return graph; + } + + // Internal: Encode edge key (from, to) -> bytes + fn encodeEdgeKey(self: *Self, from: NodeId, to: NodeId) ![]u8 { + _ = self; + var buf: [8]u8 = undefined; + std.mem.writeInt(u32, buf[0..4], from, .little); + std.mem.writeInt(u32, buf[4..8], to, .little); + return &buf; + } + + // Internal: Encode RiskEdge -> bytes + fn encodeEdgeValue(self: *Self, edge: RiskEdge) ![]u8 { + _ = self; + // Compact binary encoding + var buf: [64]u8 = undefined; + var offset: usize = 0; + + std.mem.writeInt(u32, buf[offset..][0..4], edge.from, .little); + offset += 4; + + std.mem.writeInt(u32, buf[offset..][0..4], edge.to, .little); + offset += 4; + + std.mem.writeInt(u64, buf[offset..][0..8], @bitCast(edge.risk), .little); + offset += 8; + + std.mem.writeInt(u64, buf[offset..][0..8], edge.timestamp, .little); + offset += 8; + + std.mem.writeInt(u64, buf[offset..][0..8], edge.nonce, .little); + offset += 8; + + std.mem.writeInt(u8, buf[offset..][0..1], edge.level); + offset += 1; + + std.mem.writeInt(u64, buf[offset..][0..8], edge.expires_at, .little); + offset += 8; + + return buf[0..offset]; + } + + // Internal: Decode bytes -> RiskEdge + fn decodeEdgeValue(self: *Self, val: lmdb.MDB_val) !RiskEdge { + _ = self; + const data = @as([*]const u8, @ptrCast(val.mv_data))[0..val.mv_size]; + + var offset: usize = 0; + + const from = std.mem.readInt(u32, data[offset..][0..4], .little); + offset += 4; + + const to = std.mem.readInt(u32, data[offset..][0..4], .little); + offset += 4; + + const risk_bits = std.mem.readInt(u64, data[offset..][0..8], .little); + const risk = @as(f64, @bitCast(risk_bits)); + offset += 8; + + const timestamp = std.mem.readInt(u64, data[offset..][0..8], .little); + offset += 8; + + const nonce = std.mem.readInt(u64, data[offset..][0..8], .little); + offset += 8; + + const level = std.mem.readInt(u8, data[offset..][0..1], .little); + offset += 1; + + const expires_at = std.mem.readInt(u64, data[offset..][0..8], .little); + + return RiskEdge{ + .from = from, + .to = to, + .risk = risk, + .timestamp = timestamp, + .nonce = nonce, + .level = level, + .expires_at = expires_at, + }; + } +}; + +// ============================================================================ +// TESTS +// ============================================================================ + +test "PersistentGraph: basic operations" { + const allocator = std.testing.allocator; + + // Create temporary database + const path = "/tmp/test_qvl_db"; + defer std.fs.deleteFileAbsolute(path) catch {}; + + var graph = try PersistentGraph.open(path, .{}, allocator); + defer graph.close(); + + // Add nodes + try graph.addNode(0); + try graph.addNode(1); + try graph.addNode(2); + + // Add edges + const ts = 1234567890; + try graph.addEdge(.{ + .from = 0, + .to = 1, + .risk = -0.3, + .timestamp = ts, + .nonce = 0, + .level = 3, + .expires_at = ts + 86400, + }); + + try graph.addEdge(.{ + .from = 1, + .to = 2, + .risk = -0.3, + .timestamp = ts, + .nonce = 1, + .level = 3, + .expires_at = ts + 86400, + }); + + // Query outgoing + const neighbors = try graph.getOutgoing(0, allocator); + defer allocator.free(neighbors); + + try std.testing.expectEqual(neighbors.len, 1); + try std.testing.expectEqual(neighbors[0], 1); + + // Retrieve edge + const edge = try graph.getEdge(0, 1); + try std.testing.expect(edge != null); + try std.testing.expectEqual(edge.?.from, 0); + try std.testing.expectEqual(edge.?.to, 1); + try std.testing.expectApproxEqAbs(edge.?.risk, -0.3, 0.001); +} + +test "PersistentGraph: Kenya Rule compliance" { + const allocator = std.testing.allocator; + + const path = "/tmp/test_kenya_db"; + defer std.fs.deleteFileAbsolute(path) catch {}; + + // 10MB limit + var graph = try PersistentGraph.open(path, .{ + .map_size = 10 * 1024 * 1024, + }, allocator); + defer graph.close(); + + // Add 1000 nodes + var i: u32 = 0; + while (i < 1000) : (i += 1) { + try graph.addNode(i); + } + + // Verify database size + const stat = try std.fs.cwd().statFile(path); + try std.testing.expect(stat.size < 10 * 1024 * 1024); +}