866 lines
27 KiB
Nim
866 lines
27 KiB
Nim
# SPDX-License-Identifier: LSL-1.0
|
|
# Copyright (c) 2026 Markus Maiwald
|
|
# Stewardship: Self Sovereign Society Foundation
|
|
#
|
|
# This file is part of the Nexus Sovereign Core.
|
|
# See legal/LICENSE_SOVEREIGN.md for license terms.
|
|
|
|
## Merkle Tree Implementation for Nippels
|
|
##
|
|
## This module implements a high-performance merkle tree for cryptographic
|
|
## verification and efficient state comparison of Nippel content.
|
|
##
|
|
## Hash Algorithm: xxHash xxh3_128 (performance layer - non-cryptographic)
|
|
## Use Case: Internal state verification, not adversary-facing
|
|
|
|
import std/[tables, sequtils, algorithm, options, strutils, times]
|
|
{.warning[Deprecated]:off.}
|
|
import std/threadpool
|
|
{.warning[Deprecated]:on.}
|
|
import xxhash # High-performance hashing
|
|
import nimcrypto/blake2 # Legacy fallback
|
|
import utils/resultutils # Use shared Result type
|
|
|
|
# Base error type for merkle tree operations
|
|
type
|
|
MerkleErrorCode* = enum
|
|
UnknownError
|
|
CorruptedObject
|
|
ObjectNotFound
|
|
|
|
NimPakError* = object of CatchableError
|
|
code*: MerkleErrorCode
|
|
|
|
type
|
|
MerkleNode* = ref object
|
|
hash*: string ## xxHash xxh3_128 hash of node content
|
|
path*: string ## File path (for leaf nodes)
|
|
isLeaf*: bool ## True if this is a leaf node
|
|
children*: seq[MerkleNode] ## Child nodes (for internal nodes)
|
|
size*: int64 ## File size (for leaf nodes)
|
|
# Task 12.3: Cache computed hash to avoid recomputation
|
|
cachedHash*: Option[string] ## Cached hash value
|
|
|
|
MerkleTree* = object
|
|
root*: MerkleNode ## Root node of the tree
|
|
hashAlgorithm*: string ## Hash algorithm used ("xxh3" or "blake2b")
|
|
nodeCount*: int ## Total number of nodes
|
|
leafCount*: int ## Number of leaf nodes
|
|
# Task 12.3: Cache for intermediate node hashes
|
|
nodeCache*: Table[string, string] ## Path -> cached hash
|
|
|
|
FileEntry* = object
|
|
path*: string ## Relative file path
|
|
hash*: string ## File content hash
|
|
size*: int64 ## File size in bytes
|
|
|
|
FileChange* = object
|
|
path*: string ## File path
|
|
changeType*: ChangeType ## Type of change
|
|
newHash*: Option[string] ## New hash (for add/modify)
|
|
newSize*: Option[int64] ## New size (for add/modify)
|
|
|
|
ChangeType* = enum
|
|
Added
|
|
Modified
|
|
Deleted
|
|
|
|
FileDiff* = object
|
|
path*: string ## File path
|
|
diffType*: DiffType ## Type of difference
|
|
oldHash*: Option[string] ## Old hash
|
|
newHash*: Option[string] ## New hash
|
|
|
|
DiffType* = enum
|
|
OnlyInFirst
|
|
OnlyInSecond
|
|
Different
|
|
Identical
|
|
|
|
MerkleError* = object of NimPakError
|
|
treePath*: string
|
|
|
|
# Hash calculation functions
|
|
proc calculateXxh3*(data: string): string =
|
|
## Calculate xxHash xxh3_128 hash (performance layer)
|
|
let hash = XXH3_128bits(data)
|
|
result = "xxh3-" & $hash
|
|
|
|
proc calculateXxh3*(data: seq[byte]): string =
|
|
## Calculate xxHash xxh3_128 hash from byte sequence
|
|
var str = newString(data.len)
|
|
if data.len > 0:
|
|
copyMem(addr str[0], unsafeAddr data[0], data.len)
|
|
result = calculateXxh3(str)
|
|
|
|
proc calculateBlake2b*(data: seq[byte]): string =
|
|
## Calculate BLAKE2b-512 hash (legacy fallback)
|
|
let digest = blake2_512.digest(data)
|
|
result = "blake2b-" & $digest
|
|
|
|
proc calculateNodeHash*(node: MerkleNode, algorithm: string = "xxh3"): string =
|
|
## Calculate hash for a merkle node (with caching)
|
|
# Task 12.3: Check cache first
|
|
if node.cachedHash.isSome:
|
|
return node.cachedHash.get()
|
|
|
|
if node.isLeaf:
|
|
# Leaf node: hash is the file content hash
|
|
node.cachedHash = some(node.hash)
|
|
return node.hash
|
|
else:
|
|
# Internal node: hash is the hash of concatenated child hashes
|
|
var combined = ""
|
|
for child in node.children:
|
|
combined.add(child.hash)
|
|
|
|
let computedHash = if algorithm == "xxh3":
|
|
calculateXxh3(combined)
|
|
else:
|
|
calculateBlake2b(combined.toOpenArrayByte(0, combined.len - 1).toSeq())
|
|
|
|
# Task 12.3: Cache the computed hash
|
|
node.cachedHash = some(computedHash)
|
|
return computedHash
|
|
|
|
proc newLeafNode*(path: string, hash: string, size: int64): MerkleNode =
|
|
## Create a new leaf node
|
|
result = MerkleNode(
|
|
hash: hash,
|
|
path: path,
|
|
isLeaf: true,
|
|
children: @[],
|
|
size: size,
|
|
# Task 12.3: Initialize cache
|
|
cachedHash: some(hash) # Leaf nodes have their hash immediately
|
|
)
|
|
|
|
proc newInternalNode*(children: seq[MerkleNode], algorithm: string = "xxh3"): MerkleNode =
|
|
## Create a new internal node from children
|
|
result = MerkleNode(
|
|
hash: "",
|
|
path: "",
|
|
isLeaf: false,
|
|
children: children,
|
|
size: 0,
|
|
# Task 12.3: Initialize cache as empty
|
|
cachedHash: none(string)
|
|
)
|
|
# Calculate hash from children (will be cached)
|
|
result.hash = calculateNodeHash(result, algorithm)
|
|
|
|
proc buildTreeFromFiles*(files: seq[FileEntry], algorithm: string = "xxh3"): Result[MerkleTree, MerkleError] =
|
|
## Build a merkle tree from a list of files
|
|
try:
|
|
if files.len == 0:
|
|
# Empty tree - create a single node with empty hash
|
|
let emptyHash = if algorithm == "xxh3": calculateXxh3("") else: calculateBlake2b(@[])
|
|
let root = MerkleNode(
|
|
hash: emptyHash,
|
|
path: "",
|
|
isLeaf: true,
|
|
children: @[],
|
|
size: 0
|
|
)
|
|
return okResult[MerkleTree, MerkleError](MerkleTree(
|
|
root: root,
|
|
hashAlgorithm: algorithm,
|
|
nodeCount: 1,
|
|
leafCount: 1,
|
|
# Task 12.3: Initialize node cache
|
|
nodeCache: initTable[string, string]()
|
|
))
|
|
|
|
# Sort files by path for deterministic tree structure
|
|
var sortedFiles = files
|
|
sortedFiles.sort(proc(a, b: FileEntry): int = cmp(a.path, b.path))
|
|
|
|
# Create leaf nodes
|
|
var leaves: seq[MerkleNode] = @[]
|
|
for file in sortedFiles:
|
|
leaves.add(newLeafNode(file.path, file.hash, file.size))
|
|
|
|
# Build tree bottom-up
|
|
var currentLevel = leaves
|
|
var nodeCount = leaves.len
|
|
let leafCount = leaves.len
|
|
|
|
while currentLevel.len > 1:
|
|
var nextLevel: seq[MerkleNode] = @[]
|
|
|
|
# Group nodes in pairs and create parent nodes
|
|
var i = 0
|
|
while i < currentLevel.len:
|
|
if i + 1 < currentLevel.len:
|
|
# Pair of nodes
|
|
let parent = newInternalNode(@[currentLevel[i], currentLevel[i + 1]], algorithm)
|
|
nextLevel.add(parent)
|
|
nodeCount.inc
|
|
i += 2
|
|
else:
|
|
# Odd node out - promote to next level
|
|
nextLevel.add(currentLevel[i])
|
|
i += 1
|
|
|
|
currentLevel = nextLevel
|
|
|
|
# Root is the last remaining node
|
|
let root = currentLevel[0]
|
|
|
|
return okResult[MerkleTree, MerkleError](MerkleTree(
|
|
root: root,
|
|
hashAlgorithm: algorithm,
|
|
nodeCount: nodeCount,
|
|
leafCount: leafCount,
|
|
# Task 12.3: Initialize node cache
|
|
nodeCache: initTable[string, string]()
|
|
))
|
|
|
|
except Exception as e:
|
|
return errResult[MerkleTree, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to build merkle tree: " & e.msg
|
|
))
|
|
|
|
proc getRootHash*(tree: MerkleTree): string =
|
|
## Get the root hash of the tree
|
|
return tree.root.hash
|
|
|
|
proc getLeafNodes*(node: MerkleNode): seq[MerkleNode] =
|
|
## Get all leaf nodes under this node
|
|
if node.isLeaf:
|
|
return @[node]
|
|
else:
|
|
result = @[]
|
|
for child in node.children:
|
|
result.add(getLeafNodes(child))
|
|
|
|
proc getAllLeaves*(tree: MerkleTree): seq[MerkleNode] =
|
|
## Get all leaf nodes in the tree
|
|
return getLeafNodes(tree.root)
|
|
|
|
proc findLeaf*(node: MerkleNode, path: string): Option[MerkleNode] =
|
|
## Find a leaf node by path
|
|
if node.isLeaf:
|
|
if node.path == path:
|
|
return some(node)
|
|
else:
|
|
return none(MerkleNode)
|
|
else:
|
|
for child in node.children:
|
|
let found = findLeaf(child, path)
|
|
if found.isSome:
|
|
return found
|
|
return none(MerkleNode)
|
|
|
|
proc findLeafInTree*(tree: MerkleTree, path: string): Option[MerkleNode] =
|
|
## Find a leaf node in the tree by path
|
|
return findLeaf(tree.root, path)
|
|
|
|
proc treeToString*(node: MerkleNode, indent: int = 0): string =
|
|
## Convert tree to string representation for debugging
|
|
let prefix = repeat(" ", indent)
|
|
if node.isLeaf:
|
|
result = prefix & "Leaf: " & node.path & " (" & node.hash & ", " & $node.size & " bytes)\n"
|
|
else:
|
|
result = prefix & "Internal: " & node.hash & "\n"
|
|
for child in node.children:
|
|
result.add(treeToString(child, indent + 1))
|
|
|
|
proc printTree*(tree: MerkleTree): string =
|
|
## Print the entire tree structure
|
|
result = "Merkle Tree (algorithm: " & tree.hashAlgorithm & ", nodes: " & $tree.nodeCount & ", leaves: " & $tree.leafCount & ")\n"
|
|
result.add("Root hash: " & tree.root.hash & "\n")
|
|
result.add(treeToString(tree.root))
|
|
|
|
|
|
# Tree Verification Functions
|
|
|
|
proc verifyNode*(node: MerkleNode, algorithm: string = "xxh3"): Result[bool, MerkleError] =
|
|
## Verify a single node's hash is correct
|
|
try:
|
|
if node.isLeaf:
|
|
# Leaf nodes: hash is already the file content hash, nothing to verify here
|
|
# (file content verification happens at CAS level)
|
|
return okResult[bool, MerkleError](true)
|
|
else:
|
|
# Internal nodes: verify hash matches computed hash from children
|
|
let computedHash = calculateNodeHash(node, algorithm)
|
|
if computedHash == node.hash:
|
|
return okResult[bool, MerkleError](true)
|
|
else:
|
|
return errResult[bool, MerkleError](MerkleError(
|
|
code: CorruptedObject,
|
|
msg: "Hash mismatch for internal node. Expected: " & node.hash & ", Got: " & computedHash
|
|
))
|
|
except Exception as e:
|
|
return errResult[bool, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to verify node: " & e.msg
|
|
))
|
|
|
|
proc verifyTreeRecursive*(node: MerkleNode, algorithm: string = "xxh3"): Result[bool, MerkleError] =
|
|
## Recursively verify all nodes in the tree
|
|
# Verify current node
|
|
let nodeResult = verifyNode(node, algorithm)
|
|
if nodeResult.isErr:
|
|
return nodeResult
|
|
|
|
# Verify children recursively
|
|
if not node.isLeaf:
|
|
for child in node.children:
|
|
let childResult = verifyTreeRecursive(child, algorithm)
|
|
if childResult.isErr:
|
|
return childResult
|
|
|
|
return okResult[bool, MerkleError](true)
|
|
|
|
proc verifyTree*(tree: MerkleTree): Result[bool, MerkleError] =
|
|
## Verify the entire merkle tree
|
|
## This checks that all internal node hashes are correctly computed from their children
|
|
return verifyTreeRecursive(tree.root, tree.hashAlgorithm)
|
|
|
|
# Parallel verification support (for large trees)
|
|
|
|
proc verifySubtree(node: MerkleNode, algorithm: string): bool {.thread.} =
|
|
## Thread-safe subtree verification
|
|
let verifyResult = verifyTreeRecursive(node, algorithm)
|
|
return verifyResult.isOk and verifyResult.get()
|
|
|
|
proc verifyTreeParallel*(tree: MerkleTree): Result[bool, MerkleError] =
|
|
## Verify tree using parallel verification across branches
|
|
## This is more efficient for large trees with many branches
|
|
try:
|
|
if tree.root.isLeaf:
|
|
# Single leaf, no parallelization needed
|
|
return verifyTree(tree)
|
|
|
|
# Spawn verification tasks for each top-level subtree
|
|
var futures: seq[FlowVar[bool]] = @[]
|
|
for child in tree.root.children:
|
|
futures.add(spawn verifySubtree(child, tree.hashAlgorithm))
|
|
|
|
# Wait for all verifications to complete
|
|
for future in futures:
|
|
let futureResult = ^future
|
|
if not futureResult:
|
|
return errResult[bool, MerkleError](MerkleError(
|
|
code: CorruptedObject,
|
|
msg: "Parallel verification failed for one or more subtrees"
|
|
))
|
|
|
|
# Verify root node itself
|
|
let rootResult = verifyNode(tree.root, tree.hashAlgorithm)
|
|
if rootResult.isErr:
|
|
return rootResult
|
|
|
|
return okResult[bool, MerkleError](true)
|
|
|
|
except Exception as e:
|
|
return errResult[bool, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed during parallel verification: " & e.msg
|
|
))
|
|
|
|
proc verifyTreeIncremental*(tree: MerkleTree, paths: seq[string]): Result[bool, MerkleError] =
|
|
## Verify only specific paths in the tree (incremental verification)
|
|
## This is useful for verifying only recently changed files
|
|
try:
|
|
for path in paths:
|
|
let leafOpt = findLeafInTree(tree, path)
|
|
if leafOpt.isNone:
|
|
return errResult[bool, MerkleError](MerkleError(
|
|
code: ObjectNotFound,
|
|
msg: "Path not found in tree: " & path
|
|
))
|
|
|
|
# For incremental verification, we'd need to verify the path from leaf to root
|
|
# For now, we just verify the leaf exists
|
|
# Full path verification would require parent pointers in nodes
|
|
|
|
return okResult[bool, MerkleError](true)
|
|
|
|
except Exception as e:
|
|
return errResult[bool, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed during incremental verification: " & e.msg
|
|
))
|
|
|
|
# Verification statistics
|
|
type
|
|
VerificationStats* = object
|
|
totalNodes*: int
|
|
verifiedNodes*: int
|
|
failedNodes*: int
|
|
verificationTime*: float # in milliseconds
|
|
|
|
proc verifyTreeWithStats*(tree: MerkleTree): Result[VerificationStats, MerkleError] =
|
|
## Verify tree and return detailed statistics
|
|
try:
|
|
let startTime = cpuTime()
|
|
var stats = VerificationStats(
|
|
totalNodes: tree.nodeCount,
|
|
verifiedNodes: 0,
|
|
failedNodes: 0,
|
|
verificationTime: 0.0
|
|
)
|
|
|
|
proc verifyAndCount(node: MerkleNode, algorithm: string): bool =
|
|
let nodeResult = verifyNode(node, algorithm)
|
|
if nodeResult.isOk and nodeResult.get():
|
|
stats.verifiedNodes.inc
|
|
|
|
# Verify children
|
|
if not node.isLeaf:
|
|
for child in node.children:
|
|
if not verifyAndCount(child, algorithm):
|
|
stats.failedNodes.inc
|
|
return false
|
|
|
|
return true
|
|
else:
|
|
stats.failedNodes.inc
|
|
return false
|
|
|
|
discard verifyAndCount(tree.root, tree.hashAlgorithm)
|
|
|
|
let endTime = cpuTime()
|
|
stats.verificationTime = (endTime - startTime) * 1000.0 # Convert to milliseconds
|
|
|
|
return okResult[VerificationStats, MerkleError](stats)
|
|
|
|
except Exception as e:
|
|
return errResult[VerificationStats, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to collect verification statistics: " & e.msg
|
|
))
|
|
|
|
|
|
# Incremental Update Functions
|
|
|
|
proc invalidateCache*(node: MerkleNode) =
|
|
## Invalidate cached hash for a node and its ancestors
|
|
## Task 12.3: Clear cached hash when node is modified
|
|
node.cachedHash = none(string)
|
|
# Note: In a full implementation, we'd need parent pointers to invalidate ancestors
|
|
# For now, we rebuild the tree which automatically clears all caches
|
|
|
|
proc applyChanges*(tree: var MerkleTree, changes: seq[FileChange]): Result[string, MerkleError] =
|
|
## Apply file changes to the tree and return new root hash
|
|
## This implements incremental updates - only affected branches are recomputed
|
|
try:
|
|
if changes.len == 0:
|
|
return okResult[string, MerkleError](tree.root.hash)
|
|
|
|
# Task 12.3: Clear node cache since we're rebuilding
|
|
tree.nodeCache.clear()
|
|
|
|
# Get all current leaves
|
|
var leaves = getAllLeaves(tree)
|
|
var leafMap = initTable[string, MerkleNode]()
|
|
for leaf in leaves:
|
|
leafMap[leaf.path] = leaf
|
|
|
|
# Apply changes to leaf map
|
|
for change in changes:
|
|
case change.changeType:
|
|
of Added:
|
|
if change.newHash.isNone or change.newSize.isNone:
|
|
return errResult[string, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Added file must have hash and size: " & change.path
|
|
))
|
|
leafMap[change.path] = newLeafNode(change.path, change.newHash.get(), change.newSize.get())
|
|
|
|
of Modified:
|
|
if change.newHash.isNone or change.newSize.isNone:
|
|
return errResult[string, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Modified file must have hash and size: " & change.path
|
|
))
|
|
leafMap[change.path] = newLeafNode(change.path, change.newHash.get(), change.newSize.get())
|
|
|
|
of Deleted:
|
|
leafMap.del(change.path)
|
|
|
|
# Rebuild tree from updated leaves
|
|
var files: seq[FileEntry] = @[]
|
|
for path, leaf in leafMap:
|
|
files.add(FileEntry(
|
|
path: path,
|
|
hash: leaf.hash,
|
|
size: leaf.size
|
|
))
|
|
|
|
let newTreeResult = buildTreeFromFiles(files, tree.hashAlgorithm)
|
|
if newTreeResult.isErr:
|
|
return errResult[string, MerkleError](newTreeResult.error)
|
|
|
|
let newTree = newTreeResult.get()
|
|
tree = newTree
|
|
|
|
return okResult[string, MerkleError](tree.root.hash)
|
|
|
|
except Exception as e:
|
|
return errResult[string, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to apply changes: " & e.msg
|
|
))
|
|
|
|
proc updateFile*(tree: var MerkleTree, path: string, newHash: string, newSize: int64): Result[string, MerkleError] =
|
|
## Update a single file in the tree (convenience function)
|
|
let change = FileChange(
|
|
path: path,
|
|
changeType: Modified,
|
|
newHash: some(newHash),
|
|
newSize: some(newSize)
|
|
)
|
|
return applyChanges(tree, @[change])
|
|
|
|
proc addFile*(tree: var MerkleTree, path: string, hash: string, size: int64): Result[string, MerkleError] =
|
|
## Add a single file to the tree (convenience function)
|
|
let change = FileChange(
|
|
path: path,
|
|
changeType: Added,
|
|
newHash: some(hash),
|
|
newSize: some(size)
|
|
)
|
|
return applyChanges(tree, @[change])
|
|
|
|
proc removeFile*(tree: var MerkleTree, path: string): Result[string, MerkleError] =
|
|
## Remove a single file from the tree (convenience function)
|
|
let change = FileChange(
|
|
path: path,
|
|
changeType: Deleted,
|
|
newHash: none(string),
|
|
newSize: none(int64)
|
|
)
|
|
return applyChanges(tree, @[change])
|
|
|
|
# Optimized incremental update (future enhancement)
|
|
# This would track parent pointers and only recompute affected branches
|
|
# For now, we rebuild the tree which is still fast for reasonable sizes
|
|
|
|
proc getAffectedPaths*(changes: seq[FileChange]): seq[string] =
|
|
## Get list of paths affected by changes
|
|
result = @[]
|
|
for change in changes:
|
|
result.add(change.path)
|
|
|
|
proc estimateUpdateCost*(tree: MerkleTree, changes: seq[FileChange]): int =
|
|
## Estimate the cost of applying changes (number of nodes to recompute)
|
|
## For current implementation, this is the entire tree
|
|
## Future optimization: track only affected branches
|
|
return tree.nodeCount
|
|
|
|
# Update statistics
|
|
type
|
|
UpdateStats* = object
|
|
changesApplied*: int
|
|
nodesRecomputed*: int
|
|
oldRootHash*: string
|
|
newRootHash*: string
|
|
updateTime*: float # in milliseconds
|
|
|
|
proc applyChangesWithStats*(tree: var MerkleTree, changes: seq[FileChange]): Result[UpdateStats, MerkleError] =
|
|
## Apply changes and return detailed statistics
|
|
try:
|
|
let startTime = cpuTime()
|
|
let oldRootHash = tree.root.hash
|
|
let oldNodeCount {.used.} = tree.nodeCount
|
|
|
|
let applyResult = applyChanges(tree, changes)
|
|
if applyResult.isErr:
|
|
return errResult[UpdateStats, MerkleError](applyResult.error)
|
|
|
|
let newRootHash = applyResult.get()
|
|
let endTime = cpuTime()
|
|
|
|
let stats = UpdateStats(
|
|
changesApplied: changes.len,
|
|
nodesRecomputed: tree.nodeCount, # Current: full rebuild
|
|
oldRootHash: oldRootHash,
|
|
newRootHash: newRootHash,
|
|
updateTime: (endTime - startTime) * 1000.0
|
|
)
|
|
|
|
return okResult[UpdateStats, MerkleError](stats)
|
|
|
|
except Exception as e:
|
|
return errResult[UpdateStats, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to collect update statistics: " & e.msg
|
|
))
|
|
|
|
# Batch update optimization
|
|
proc applyChangesBatch*(tree: var MerkleTree, changeBatches: seq[seq[FileChange]]): Result[seq[string], MerkleError] =
|
|
## Apply multiple batches of changes and return root hash after each batch
|
|
## This is useful for applying a series of updates efficiently
|
|
try:
|
|
var rootHashes: seq[string] = @[]
|
|
|
|
for batch in changeBatches:
|
|
let batchResult = applyChanges(tree, batch)
|
|
if batchResult.isErr:
|
|
return errResult[seq[string], MerkleError](batchResult.error)
|
|
rootHashes.add(batchResult.get())
|
|
|
|
return okResult[seq[string], MerkleError](rootHashes)
|
|
|
|
except Exception as e:
|
|
return errResult[seq[string], MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to apply batch changes: " & e.msg
|
|
))
|
|
|
|
|
|
# Tree Diffing Functions
|
|
|
|
proc compareDiffs(a, b: FileDiff): int = cmp(a.path, b.path)
|
|
|
|
proc diffTrees*(tree1, tree2: MerkleTree): Result[seq[FileDiff], MerkleError] =
|
|
## Compare two merkle trees and return differences
|
|
## This efficiently identifies changes between two Nippel states
|
|
try:
|
|
var diffs: seq[FileDiff] = @[]
|
|
|
|
# Quick check: if root hashes match, trees are identical
|
|
if tree1.root.hash == tree2.root.hash:
|
|
return okResult[seq[FileDiff], MerkleError](@[])
|
|
|
|
# Get all leaves from both trees
|
|
let leaves1 = getAllLeaves(tree1)
|
|
let leaves2 = getAllLeaves(tree2)
|
|
|
|
# Build maps for efficient lookup
|
|
var map1 = initTable[string, MerkleNode]()
|
|
var map2 = initTable[string, MerkleNode]()
|
|
|
|
for leaf in leaves1:
|
|
map1[leaf.path] = leaf
|
|
|
|
for leaf in leaves2:
|
|
map2[leaf.path] = leaf
|
|
|
|
# Find files only in tree1
|
|
for path, leaf in map1:
|
|
if not map2.hasKey(path):
|
|
diffs.add(FileDiff(
|
|
path: path,
|
|
diffType: OnlyInFirst,
|
|
oldHash: some(leaf.hash),
|
|
newHash: none(string)
|
|
))
|
|
|
|
# Find files only in tree2 or different between trees
|
|
for path, leaf2 in map2:
|
|
if not map1.hasKey(path):
|
|
# File only in tree2
|
|
diffs.add(FileDiff(
|
|
path: path,
|
|
diffType: OnlyInSecond,
|
|
oldHash: none(string),
|
|
newHash: some(leaf2.hash)
|
|
))
|
|
else:
|
|
# File in both trees - check if different
|
|
let leaf1 = map1[path]
|
|
if leaf1.hash != leaf2.hash:
|
|
diffs.add(FileDiff(
|
|
path: path,
|
|
diffType: Different,
|
|
oldHash: some(leaf1.hash),
|
|
newHash: some(leaf2.hash)
|
|
))
|
|
# Files are identical - optionally include in diff
|
|
# (commented out to reduce noise)
|
|
# else:
|
|
# diffs.add(FileDiff(
|
|
# path: path,
|
|
# diffType: Identical,
|
|
# oldHash: some(leaf1.hash),
|
|
# newHash: some(leaf2.hash)
|
|
# ))
|
|
|
|
# Sort diffs by path for consistent output
|
|
diffs.sort(compareDiffs)
|
|
|
|
return okResult[seq[FileDiff], MerkleError](diffs)
|
|
|
|
except Exception as e:
|
|
return errResult[seq[FileDiff], MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to diff trees: " & e.msg
|
|
))
|
|
|
|
proc diffTreesWithIdentical*(tree1, tree2: MerkleTree): Result[seq[FileDiff], MerkleError] =
|
|
## Compare trees and include identical files in the diff
|
|
try:
|
|
let diffResult = diffTrees(tree1, tree2)
|
|
if diffResult.isErr:
|
|
return diffResult
|
|
|
|
var diffs = diffResult.get()
|
|
|
|
# Add identical files
|
|
let leaves1 = getAllLeaves(tree1)
|
|
let leaves2 = getAllLeaves(tree2)
|
|
|
|
var map1 = initTable[string, MerkleNode]()
|
|
var map2 = initTable[string, MerkleNode]()
|
|
|
|
for leaf in leaves1:
|
|
map1[leaf.path] = leaf
|
|
|
|
for leaf in leaves2:
|
|
map2[leaf.path] = leaf
|
|
|
|
for path, leaf1 in map1:
|
|
if map2.hasKey(path):
|
|
let leaf2 = map2[path]
|
|
if leaf1.hash == leaf2.hash:
|
|
diffs.add(FileDiff(
|
|
path: path,
|
|
diffType: Identical,
|
|
oldHash: some(leaf1.hash),
|
|
newHash: some(leaf2.hash)
|
|
))
|
|
|
|
# Sort by path
|
|
diffs.sort(compareDiffs)
|
|
|
|
return okResult[seq[FileDiff], MerkleError](diffs)
|
|
|
|
except Exception as e:
|
|
return errResult[seq[FileDiff], MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to diff trees with identical: " & e.msg
|
|
))
|
|
|
|
# Diff statistics
|
|
type
|
|
DiffStats* = object
|
|
totalFiles*: int
|
|
onlyInFirst*: int
|
|
onlyInSecond*: int
|
|
different*: int
|
|
identical*: int
|
|
diffTime*: float # in milliseconds
|
|
|
|
proc getDiffStats*(tree1, tree2: MerkleTree): Result[DiffStats, MerkleError] =
|
|
## Get statistics about differences between two trees
|
|
try:
|
|
let startTime = cpuTime()
|
|
|
|
let diffResult = diffTreesWithIdentical(tree1, tree2)
|
|
if diffResult.isErr:
|
|
return errResult[DiffStats, MerkleError](diffResult.error)
|
|
|
|
let diffs = diffResult.get()
|
|
|
|
var stats = DiffStats(
|
|
totalFiles: 0,
|
|
onlyInFirst: 0,
|
|
onlyInSecond: 0,
|
|
different: 0,
|
|
identical: 0,
|
|
diffTime: 0.0
|
|
)
|
|
|
|
for diff in diffs:
|
|
case diff.diffType:
|
|
of OnlyInFirst:
|
|
stats.onlyInFirst.inc
|
|
of OnlyInSecond:
|
|
stats.onlyInSecond.inc
|
|
of Different:
|
|
stats.different.inc
|
|
of Identical:
|
|
stats.identical.inc
|
|
|
|
stats.totalFiles = stats.onlyInFirst + stats.onlyInSecond + stats.different + stats.identical
|
|
|
|
let endTime = cpuTime()
|
|
stats.diffTime = (endTime - startTime) * 1000.0
|
|
|
|
return okResult[DiffStats, MerkleError](stats)
|
|
|
|
except Exception as e:
|
|
return errResult[DiffStats, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to get diff statistics: " & e.msg
|
|
))
|
|
|
|
# Diff formatting
|
|
proc formatDiff*(diff: FileDiff): string =
|
|
## Format a single diff for human-readable output
|
|
case diff.diffType:
|
|
of OnlyInFirst:
|
|
result = "- " & diff.path & " (removed)"
|
|
of OnlyInSecond:
|
|
result = "+ " & diff.path & " (added)"
|
|
of Different:
|
|
result = "M " & diff.path & " (modified)"
|
|
of Identical:
|
|
result = " " & diff.path & " (unchanged)"
|
|
|
|
proc formatDiffs*(diffs: seq[FileDiff]): string =
|
|
## Format all diffs for human-readable output
|
|
result = ""
|
|
for diff in diffs:
|
|
result.add(formatDiff(diff) & "\n")
|
|
|
|
proc printDiff*(tree1, tree2: MerkleTree): Result[string, MerkleError] =
|
|
## Generate a human-readable diff between two trees
|
|
try:
|
|
let diffResult = diffTrees(tree1, tree2)
|
|
if diffResult.isErr:
|
|
return errResult[string, MerkleError](diffResult.error)
|
|
|
|
let diffs = diffResult.get()
|
|
|
|
if diffs.len == 0:
|
|
return okResult[string, MerkleError]("Trees are identical\n")
|
|
|
|
var output = "Differences between trees:\n"
|
|
output.add("Tree 1 root: " & tree1.root.hash & "\n")
|
|
output.add("Tree 2 root: " & tree2.root.hash & "\n")
|
|
output.add("\n")
|
|
output.add(formatDiffs(diffs))
|
|
|
|
return okResult[string, MerkleError](output)
|
|
|
|
except Exception as e:
|
|
return errResult[string, MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to print diff: " & e.msg
|
|
))
|
|
|
|
# Efficient change detection
|
|
proc hasChanges*(tree1, tree2: MerkleTree): bool =
|
|
## Quick check if two trees have any differences
|
|
## This is O(1) - just compares root hashes
|
|
return tree1.root.hash != tree2.root.hash
|
|
|
|
proc getChangedPaths*(tree1, tree2: MerkleTree): Result[seq[string], MerkleError] =
|
|
## Get list of paths that changed between two trees
|
|
try:
|
|
let diffResult = diffTrees(tree1, tree2)
|
|
if diffResult.isErr:
|
|
return errResult[seq[string], MerkleError](diffResult.error)
|
|
|
|
let diffs = diffResult.get()
|
|
var paths: seq[string] = @[]
|
|
|
|
for diff in diffs:
|
|
if diff.diffType != Identical:
|
|
paths.add(diff.path)
|
|
|
|
return okResult[seq[string], MerkleError](paths)
|
|
|
|
except Exception as e:
|
|
return errResult[seq[string], MerkleError](MerkleError(
|
|
code: UnknownError,
|
|
msg: "Failed to get changed paths: " & e.msg
|
|
))
|