# SPDX-License-Identifier: LSL-1.0 # Copyright (c) 2026 Markus Maiwald # Stewardship: Self Sovereign Society Foundation # # This file is part of the Nexus Sovereign Core. # See legal/LICENSE_SOVEREIGN.md for license terms. ## Merkle Tree Implementation for Nippels ## ## This module implements a high-performance merkle tree for cryptographic ## verification and efficient state comparison of Nippel content. ## ## Hash Algorithm: xxHash xxh3_128 (performance layer - non-cryptographic) ## Use Case: Internal state verification, not adversary-facing import std/[tables, sequtils, algorithm, options, strutils, times] {.warning[Deprecated]:off.} import std/threadpool {.warning[Deprecated]:on.} import xxhash # High-performance hashing import nimcrypto/blake2 # Legacy fallback import utils/resultutils # Use shared Result type # Base error type for merkle tree operations type MerkleErrorCode* = enum UnknownError CorruptedObject ObjectNotFound NimPakError* = object of CatchableError code*: MerkleErrorCode type MerkleNode* = ref object hash*: string ## xxHash xxh3_128 hash of node content path*: string ## File path (for leaf nodes) isLeaf*: bool ## True if this is a leaf node children*: seq[MerkleNode] ## Child nodes (for internal nodes) size*: int64 ## File size (for leaf nodes) # Task 12.3: Cache computed hash to avoid recomputation cachedHash*: Option[string] ## Cached hash value MerkleTree* = object root*: MerkleNode ## Root node of the tree hashAlgorithm*: string ## Hash algorithm used ("xxh3" or "blake2b") nodeCount*: int ## Total number of nodes leafCount*: int ## Number of leaf nodes # Task 12.3: Cache for intermediate node hashes nodeCache*: Table[string, string] ## Path -> cached hash FileEntry* = object path*: string ## Relative file path hash*: string ## File content hash size*: int64 ## File size in bytes FileChange* = object path*: string ## File path changeType*: ChangeType ## Type of change newHash*: Option[string] ## New hash (for add/modify) newSize*: Option[int64] ## New size (for add/modify) ChangeType* = enum Added Modified Deleted FileDiff* = object path*: string ## File path diffType*: DiffType ## Type of difference oldHash*: Option[string] ## Old hash newHash*: Option[string] ## New hash DiffType* = enum OnlyInFirst OnlyInSecond Different Identical MerkleError* = object of NimPakError treePath*: string # Hash calculation functions proc calculateXxh3*(data: string): string = ## Calculate xxHash xxh3_128 hash (performance layer) let hash = XXH3_128bits(data) result = "xxh3-" & $hash proc calculateXxh3*(data: seq[byte]): string = ## Calculate xxHash xxh3_128 hash from byte sequence var str = newString(data.len) if data.len > 0: copyMem(addr str[0], unsafeAddr data[0], data.len) result = calculateXxh3(str) proc calculateBlake2b*(data: seq[byte]): string = ## Calculate BLAKE2b-512 hash (legacy fallback) let digest = blake2_512.digest(data) result = "blake2b-" & $digest proc calculateNodeHash*(node: MerkleNode, algorithm: string = "xxh3"): string = ## Calculate hash for a merkle node (with caching) # Task 12.3: Check cache first if node.cachedHash.isSome: return node.cachedHash.get() if node.isLeaf: # Leaf node: hash is the file content hash node.cachedHash = some(node.hash) return node.hash else: # Internal node: hash is the hash of concatenated child hashes var combined = "" for child in node.children: combined.add(child.hash) let computedHash = if algorithm == "xxh3": calculateXxh3(combined) else: calculateBlake2b(combined.toOpenArrayByte(0, combined.len - 1).toSeq()) # Task 12.3: Cache the computed hash node.cachedHash = some(computedHash) return computedHash proc newLeafNode*(path: string, hash: string, size: int64): MerkleNode = ## Create a new leaf node result = MerkleNode( hash: hash, path: path, isLeaf: true, children: @[], size: size, # Task 12.3: Initialize cache cachedHash: some(hash) # Leaf nodes have their hash immediately ) proc newInternalNode*(children: seq[MerkleNode], algorithm: string = "xxh3"): MerkleNode = ## Create a new internal node from children result = MerkleNode( hash: "", path: "", isLeaf: false, children: children, size: 0, # Task 12.3: Initialize cache as empty cachedHash: none(string) ) # Calculate hash from children (will be cached) result.hash = calculateNodeHash(result, algorithm) proc buildTreeFromFiles*(files: seq[FileEntry], algorithm: string = "xxh3"): Result[MerkleTree, MerkleError] = ## Build a merkle tree from a list of files try: if files.len == 0: # Empty tree - create a single node with empty hash let emptyHash = if algorithm == "xxh3": calculateXxh3("") else: calculateBlake2b(@[]) let root = MerkleNode( hash: emptyHash, path: "", isLeaf: true, children: @[], size: 0 ) return okResult[MerkleTree, MerkleError](MerkleTree( root: root, hashAlgorithm: algorithm, nodeCount: 1, leafCount: 1, # Task 12.3: Initialize node cache nodeCache: initTable[string, string]() )) # Sort files by path for deterministic tree structure var sortedFiles = files sortedFiles.sort(proc(a, b: FileEntry): int = cmp(a.path, b.path)) # Create leaf nodes var leaves: seq[MerkleNode] = @[] for file in sortedFiles: leaves.add(newLeafNode(file.path, file.hash, file.size)) # Build tree bottom-up var currentLevel = leaves var nodeCount = leaves.len let leafCount = leaves.len while currentLevel.len > 1: var nextLevel: seq[MerkleNode] = @[] # Group nodes in pairs and create parent nodes var i = 0 while i < currentLevel.len: if i + 1 < currentLevel.len: # Pair of nodes let parent = newInternalNode(@[currentLevel[i], currentLevel[i + 1]], algorithm) nextLevel.add(parent) nodeCount.inc i += 2 else: # Odd node out - promote to next level nextLevel.add(currentLevel[i]) i += 1 currentLevel = nextLevel # Root is the last remaining node let root = currentLevel[0] return okResult[MerkleTree, MerkleError](MerkleTree( root: root, hashAlgorithm: algorithm, nodeCount: nodeCount, leafCount: leafCount, # Task 12.3: Initialize node cache nodeCache: initTable[string, string]() )) except Exception as e: return errResult[MerkleTree, MerkleError](MerkleError( code: UnknownError, msg: "Failed to build merkle tree: " & e.msg )) proc getRootHash*(tree: MerkleTree): string = ## Get the root hash of the tree return tree.root.hash proc getLeafNodes*(node: MerkleNode): seq[MerkleNode] = ## Get all leaf nodes under this node if node.isLeaf: return @[node] else: result = @[] for child in node.children: result.add(getLeafNodes(child)) proc getAllLeaves*(tree: MerkleTree): seq[MerkleNode] = ## Get all leaf nodes in the tree return getLeafNodes(tree.root) proc findLeaf*(node: MerkleNode, path: string): Option[MerkleNode] = ## Find a leaf node by path if node.isLeaf: if node.path == path: return some(node) else: return none(MerkleNode) else: for child in node.children: let found = findLeaf(child, path) if found.isSome: return found return none(MerkleNode) proc findLeafInTree*(tree: MerkleTree, path: string): Option[MerkleNode] = ## Find a leaf node in the tree by path return findLeaf(tree.root, path) proc treeToString*(node: MerkleNode, indent: int = 0): string = ## Convert tree to string representation for debugging let prefix = repeat(" ", indent) if node.isLeaf: result = prefix & "Leaf: " & node.path & " (" & node.hash & ", " & $node.size & " bytes)\n" else: result = prefix & "Internal: " & node.hash & "\n" for child in node.children: result.add(treeToString(child, indent + 1)) proc printTree*(tree: MerkleTree): string = ## Print the entire tree structure result = "Merkle Tree (algorithm: " & tree.hashAlgorithm & ", nodes: " & $tree.nodeCount & ", leaves: " & $tree.leafCount & ")\n" result.add("Root hash: " & tree.root.hash & "\n") result.add(treeToString(tree.root)) # Tree Verification Functions proc verifyNode*(node: MerkleNode, algorithm: string = "xxh3"): Result[bool, MerkleError] = ## Verify a single node's hash is correct try: if node.isLeaf: # Leaf nodes: hash is already the file content hash, nothing to verify here # (file content verification happens at CAS level) return okResult[bool, MerkleError](true) else: # Internal nodes: verify hash matches computed hash from children let computedHash = calculateNodeHash(node, algorithm) if computedHash == node.hash: return okResult[bool, MerkleError](true) else: return errResult[bool, MerkleError](MerkleError( code: CorruptedObject, msg: "Hash mismatch for internal node. Expected: " & node.hash & ", Got: " & computedHash )) except Exception as e: return errResult[bool, MerkleError](MerkleError( code: UnknownError, msg: "Failed to verify node: " & e.msg )) proc verifyTreeRecursive*(node: MerkleNode, algorithm: string = "xxh3"): Result[bool, MerkleError] = ## Recursively verify all nodes in the tree # Verify current node let nodeResult = verifyNode(node, algorithm) if nodeResult.isErr: return nodeResult # Verify children recursively if not node.isLeaf: for child in node.children: let childResult = verifyTreeRecursive(child, algorithm) if childResult.isErr: return childResult return okResult[bool, MerkleError](true) proc verifyTree*(tree: MerkleTree): Result[bool, MerkleError] = ## Verify the entire merkle tree ## This checks that all internal node hashes are correctly computed from their children return verifyTreeRecursive(tree.root, tree.hashAlgorithm) # Parallel verification support (for large trees) proc verifySubtree(node: MerkleNode, algorithm: string): bool {.thread.} = ## Thread-safe subtree verification let verifyResult = verifyTreeRecursive(node, algorithm) return verifyResult.isOk and verifyResult.get() proc verifyTreeParallel*(tree: MerkleTree): Result[bool, MerkleError] = ## Verify tree using parallel verification across branches ## This is more efficient for large trees with many branches try: if tree.root.isLeaf: # Single leaf, no parallelization needed return verifyTree(tree) # Spawn verification tasks for each top-level subtree var futures: seq[FlowVar[bool]] = @[] for child in tree.root.children: futures.add(spawn verifySubtree(child, tree.hashAlgorithm)) # Wait for all verifications to complete for future in futures: let futureResult = ^future if not futureResult: return errResult[bool, MerkleError](MerkleError( code: CorruptedObject, msg: "Parallel verification failed for one or more subtrees" )) # Verify root node itself let rootResult = verifyNode(tree.root, tree.hashAlgorithm) if rootResult.isErr: return rootResult return okResult[bool, MerkleError](true) except Exception as e: return errResult[bool, MerkleError](MerkleError( code: UnknownError, msg: "Failed during parallel verification: " & e.msg )) proc verifyTreeIncremental*(tree: MerkleTree, paths: seq[string]): Result[bool, MerkleError] = ## Verify only specific paths in the tree (incremental verification) ## This is useful for verifying only recently changed files try: for path in paths: let leafOpt = findLeafInTree(tree, path) if leafOpt.isNone: return errResult[bool, MerkleError](MerkleError( code: ObjectNotFound, msg: "Path not found in tree: " & path )) # For incremental verification, we'd need to verify the path from leaf to root # For now, we just verify the leaf exists # Full path verification would require parent pointers in nodes return okResult[bool, MerkleError](true) except Exception as e: return errResult[bool, MerkleError](MerkleError( code: UnknownError, msg: "Failed during incremental verification: " & e.msg )) # Verification statistics type VerificationStats* = object totalNodes*: int verifiedNodes*: int failedNodes*: int verificationTime*: float # in milliseconds proc verifyTreeWithStats*(tree: MerkleTree): Result[VerificationStats, MerkleError] = ## Verify tree and return detailed statistics try: let startTime = cpuTime() var stats = VerificationStats( totalNodes: tree.nodeCount, verifiedNodes: 0, failedNodes: 0, verificationTime: 0.0 ) proc verifyAndCount(node: MerkleNode, algorithm: string): bool = let nodeResult = verifyNode(node, algorithm) if nodeResult.isOk and nodeResult.get(): stats.verifiedNodes.inc # Verify children if not node.isLeaf: for child in node.children: if not verifyAndCount(child, algorithm): stats.failedNodes.inc return false return true else: stats.failedNodes.inc return false discard verifyAndCount(tree.root, tree.hashAlgorithm) let endTime = cpuTime() stats.verificationTime = (endTime - startTime) * 1000.0 # Convert to milliseconds return okResult[VerificationStats, MerkleError](stats) except Exception as e: return errResult[VerificationStats, MerkleError](MerkleError( code: UnknownError, msg: "Failed to collect verification statistics: " & e.msg )) # Incremental Update Functions proc invalidateCache*(node: MerkleNode) = ## Invalidate cached hash for a node and its ancestors ## Task 12.3: Clear cached hash when node is modified node.cachedHash = none(string) # Note: In a full implementation, we'd need parent pointers to invalidate ancestors # For now, we rebuild the tree which automatically clears all caches proc applyChanges*(tree: var MerkleTree, changes: seq[FileChange]): Result[string, MerkleError] = ## Apply file changes to the tree and return new root hash ## This implements incremental updates - only affected branches are recomputed try: if changes.len == 0: return okResult[string, MerkleError](tree.root.hash) # Task 12.3: Clear node cache since we're rebuilding tree.nodeCache.clear() # Get all current leaves var leaves = getAllLeaves(tree) var leafMap = initTable[string, MerkleNode]() for leaf in leaves: leafMap[leaf.path] = leaf # Apply changes to leaf map for change in changes: case change.changeType: of Added: if change.newHash.isNone or change.newSize.isNone: return errResult[string, MerkleError](MerkleError( code: UnknownError, msg: "Added file must have hash and size: " & change.path )) leafMap[change.path] = newLeafNode(change.path, change.newHash.get(), change.newSize.get()) of Modified: if change.newHash.isNone or change.newSize.isNone: return errResult[string, MerkleError](MerkleError( code: UnknownError, msg: "Modified file must have hash and size: " & change.path )) leafMap[change.path] = newLeafNode(change.path, change.newHash.get(), change.newSize.get()) of Deleted: leafMap.del(change.path) # Rebuild tree from updated leaves var files: seq[FileEntry] = @[] for path, leaf in leafMap: files.add(FileEntry( path: path, hash: leaf.hash, size: leaf.size )) let newTreeResult = buildTreeFromFiles(files, tree.hashAlgorithm) if newTreeResult.isErr: return errResult[string, MerkleError](newTreeResult.error) let newTree = newTreeResult.get() tree = newTree return okResult[string, MerkleError](tree.root.hash) except Exception as e: return errResult[string, MerkleError](MerkleError( code: UnknownError, msg: "Failed to apply changes: " & e.msg )) proc updateFile*(tree: var MerkleTree, path: string, newHash: string, newSize: int64): Result[string, MerkleError] = ## Update a single file in the tree (convenience function) let change = FileChange( path: path, changeType: Modified, newHash: some(newHash), newSize: some(newSize) ) return applyChanges(tree, @[change]) proc addFile*(tree: var MerkleTree, path: string, hash: string, size: int64): Result[string, MerkleError] = ## Add a single file to the tree (convenience function) let change = FileChange( path: path, changeType: Added, newHash: some(hash), newSize: some(size) ) return applyChanges(tree, @[change]) proc removeFile*(tree: var MerkleTree, path: string): Result[string, MerkleError] = ## Remove a single file from the tree (convenience function) let change = FileChange( path: path, changeType: Deleted, newHash: none(string), newSize: none(int64) ) return applyChanges(tree, @[change]) # Optimized incremental update (future enhancement) # This would track parent pointers and only recompute affected branches # For now, we rebuild the tree which is still fast for reasonable sizes proc getAffectedPaths*(changes: seq[FileChange]): seq[string] = ## Get list of paths affected by changes result = @[] for change in changes: result.add(change.path) proc estimateUpdateCost*(tree: MerkleTree, changes: seq[FileChange]): int = ## Estimate the cost of applying changes (number of nodes to recompute) ## For current implementation, this is the entire tree ## Future optimization: track only affected branches return tree.nodeCount # Update statistics type UpdateStats* = object changesApplied*: int nodesRecomputed*: int oldRootHash*: string newRootHash*: string updateTime*: float # in milliseconds proc applyChangesWithStats*(tree: var MerkleTree, changes: seq[FileChange]): Result[UpdateStats, MerkleError] = ## Apply changes and return detailed statistics try: let startTime = cpuTime() let oldRootHash = tree.root.hash let oldNodeCount {.used.} = tree.nodeCount let applyResult = applyChanges(tree, changes) if applyResult.isErr: return errResult[UpdateStats, MerkleError](applyResult.error) let newRootHash = applyResult.get() let endTime = cpuTime() let stats = UpdateStats( changesApplied: changes.len, nodesRecomputed: tree.nodeCount, # Current: full rebuild oldRootHash: oldRootHash, newRootHash: newRootHash, updateTime: (endTime - startTime) * 1000.0 ) return okResult[UpdateStats, MerkleError](stats) except Exception as e: return errResult[UpdateStats, MerkleError](MerkleError( code: UnknownError, msg: "Failed to collect update statistics: " & e.msg )) # Batch update optimization proc applyChangesBatch*(tree: var MerkleTree, changeBatches: seq[seq[FileChange]]): Result[seq[string], MerkleError] = ## Apply multiple batches of changes and return root hash after each batch ## This is useful for applying a series of updates efficiently try: var rootHashes: seq[string] = @[] for batch in changeBatches: let batchResult = applyChanges(tree, batch) if batchResult.isErr: return errResult[seq[string], MerkleError](batchResult.error) rootHashes.add(batchResult.get()) return okResult[seq[string], MerkleError](rootHashes) except Exception as e: return errResult[seq[string], MerkleError](MerkleError( code: UnknownError, msg: "Failed to apply batch changes: " & e.msg )) # Tree Diffing Functions proc compareDiffs(a, b: FileDiff): int = cmp(a.path, b.path) proc diffTrees*(tree1, tree2: MerkleTree): Result[seq[FileDiff], MerkleError] = ## Compare two merkle trees and return differences ## This efficiently identifies changes between two Nippel states try: var diffs: seq[FileDiff] = @[] # Quick check: if root hashes match, trees are identical if tree1.root.hash == tree2.root.hash: return okResult[seq[FileDiff], MerkleError](@[]) # Get all leaves from both trees let leaves1 = getAllLeaves(tree1) let leaves2 = getAllLeaves(tree2) # Build maps for efficient lookup var map1 = initTable[string, MerkleNode]() var map2 = initTable[string, MerkleNode]() for leaf in leaves1: map1[leaf.path] = leaf for leaf in leaves2: map2[leaf.path] = leaf # Find files only in tree1 for path, leaf in map1: if not map2.hasKey(path): diffs.add(FileDiff( path: path, diffType: OnlyInFirst, oldHash: some(leaf.hash), newHash: none(string) )) # Find files only in tree2 or different between trees for path, leaf2 in map2: if not map1.hasKey(path): # File only in tree2 diffs.add(FileDiff( path: path, diffType: OnlyInSecond, oldHash: none(string), newHash: some(leaf2.hash) )) else: # File in both trees - check if different let leaf1 = map1[path] if leaf1.hash != leaf2.hash: diffs.add(FileDiff( path: path, diffType: Different, oldHash: some(leaf1.hash), newHash: some(leaf2.hash) )) # Files are identical - optionally include in diff # (commented out to reduce noise) # else: # diffs.add(FileDiff( # path: path, # diffType: Identical, # oldHash: some(leaf1.hash), # newHash: some(leaf2.hash) # )) # Sort diffs by path for consistent output diffs.sort(compareDiffs) return okResult[seq[FileDiff], MerkleError](diffs) except Exception as e: return errResult[seq[FileDiff], MerkleError](MerkleError( code: UnknownError, msg: "Failed to diff trees: " & e.msg )) proc diffTreesWithIdentical*(tree1, tree2: MerkleTree): Result[seq[FileDiff], MerkleError] = ## Compare trees and include identical files in the diff try: let diffResult = diffTrees(tree1, tree2) if diffResult.isErr: return diffResult var diffs = diffResult.get() # Add identical files let leaves1 = getAllLeaves(tree1) let leaves2 = getAllLeaves(tree2) var map1 = initTable[string, MerkleNode]() var map2 = initTable[string, MerkleNode]() for leaf in leaves1: map1[leaf.path] = leaf for leaf in leaves2: map2[leaf.path] = leaf for path, leaf1 in map1: if map2.hasKey(path): let leaf2 = map2[path] if leaf1.hash == leaf2.hash: diffs.add(FileDiff( path: path, diffType: Identical, oldHash: some(leaf1.hash), newHash: some(leaf2.hash) )) # Sort by path diffs.sort(compareDiffs) return okResult[seq[FileDiff], MerkleError](diffs) except Exception as e: return errResult[seq[FileDiff], MerkleError](MerkleError( code: UnknownError, msg: "Failed to diff trees with identical: " & e.msg )) # Diff statistics type DiffStats* = object totalFiles*: int onlyInFirst*: int onlyInSecond*: int different*: int identical*: int diffTime*: float # in milliseconds proc getDiffStats*(tree1, tree2: MerkleTree): Result[DiffStats, MerkleError] = ## Get statistics about differences between two trees try: let startTime = cpuTime() let diffResult = diffTreesWithIdentical(tree1, tree2) if diffResult.isErr: return errResult[DiffStats, MerkleError](diffResult.error) let diffs = diffResult.get() var stats = DiffStats( totalFiles: 0, onlyInFirst: 0, onlyInSecond: 0, different: 0, identical: 0, diffTime: 0.0 ) for diff in diffs: case diff.diffType: of OnlyInFirst: stats.onlyInFirst.inc of OnlyInSecond: stats.onlyInSecond.inc of Different: stats.different.inc of Identical: stats.identical.inc stats.totalFiles = stats.onlyInFirst + stats.onlyInSecond + stats.different + stats.identical let endTime = cpuTime() stats.diffTime = (endTime - startTime) * 1000.0 return okResult[DiffStats, MerkleError](stats) except Exception as e: return errResult[DiffStats, MerkleError](MerkleError( code: UnknownError, msg: "Failed to get diff statistics: " & e.msg )) # Diff formatting proc formatDiff*(diff: FileDiff): string = ## Format a single diff for human-readable output case diff.diffType: of OnlyInFirst: result = "- " & diff.path & " (removed)" of OnlyInSecond: result = "+ " & diff.path & " (added)" of Different: result = "M " & diff.path & " (modified)" of Identical: result = " " & diff.path & " (unchanged)" proc formatDiffs*(diffs: seq[FileDiff]): string = ## Format all diffs for human-readable output result = "" for diff in diffs: result.add(formatDiff(diff) & "\n") proc printDiff*(tree1, tree2: MerkleTree): Result[string, MerkleError] = ## Generate a human-readable diff between two trees try: let diffResult = diffTrees(tree1, tree2) if diffResult.isErr: return errResult[string, MerkleError](diffResult.error) let diffs = diffResult.get() if diffs.len == 0: return okResult[string, MerkleError]("Trees are identical\n") var output = "Differences between trees:\n" output.add("Tree 1 root: " & tree1.root.hash & "\n") output.add("Tree 2 root: " & tree2.root.hash & "\n") output.add("\n") output.add(formatDiffs(diffs)) return okResult[string, MerkleError](output) except Exception as e: return errResult[string, MerkleError](MerkleError( code: UnknownError, msg: "Failed to print diff: " & e.msg )) # Efficient change detection proc hasChanges*(tree1, tree2: MerkleTree): bool = ## Quick check if two trees have any differences ## This is O(1) - just compares root hashes return tree1.root.hash != tree2.root.hash proc getChangedPaths*(tree1, tree2: MerkleTree): Result[seq[string], MerkleError] = ## Get list of paths that changed between two trees try: let diffResult = diffTrees(tree1, tree2) if diffResult.isErr: return errResult[seq[string], MerkleError](diffResult.error) let diffs = diffResult.get() var paths: seq[string] = @[] for diff in diffs: if diff.diffType != Identical: paths.add(diff.path) return okResult[seq[string], MerkleError](paths) except Exception as e: return errResult[seq[string], MerkleError](MerkleError( code: UnknownError, msg: "Failed to get changed paths: " & e.msg ))