nip/src/nimpak/format_cas.nim

678 lines
23 KiB
Nim

# SPDX-License-Identifier: LSL-1.0
# Copyright (c) 2026 Markus Maiwald
# Stewardship: Self Sovereign Society Foundation
#
# This file is part of the Nexus Sovereign Core.
# See legal/LICENSE_SOVEREIGN.md for license terms.
## Package Format CAS Integration
##
## This module integrates all package formats with the Content-Addressable Storage
## system, providing unified storage, retrieval, deduplication, and garbage collection
## across all five package formats. It also implements format conversion pipelines
## between compatible formats.
import std/[os, json, times, strutils, sequtils, tables, options, strformat, algorithm]
import ./types_fixed
import ./formats
import ./cas
import ./packages
import ./recipes
import ./chunks
import ./snapshots
import ./overlays
type
FormatCasError* = object of NimPakError
formatType*: PackageFormat
objectHash*: string
StorageResult* = object
## Result of storing a package format in CAS
format*: PackageFormat
hash*: string
size*: int64
compressed*: bool
chunks*: seq[ChunkRef]
RetrievalResult* = object
## Result of retrieving a package format from CAS
format*: PackageFormat
hash*: string
data*: seq[byte]
metadata*: JsonNode
FormatCasManager* = object
## Manager for package format CAS operations
cas*: CasManager
formatRegistry*: Table[string, PackageFormat]
conversionCache*: Table[string, string] ## Source hash -> converted hash
# =============================================================================
# Format CAS Manager Initialization
# =============================================================================
proc initFormatCasManager*(casManager: CasManager): FormatCasManager =
## Initialize format CAS manager with existing CAS manager
var registry = initTable[string, PackageFormat]()
# Register format extensions
registry[".npr"] = NprRecipe
registry[".npk.zst"] = NpkBinary
registry[".npk.tar"] = NpkBinary
registry[".nca"] = NcaChunk
registry[".nss.zst"] = NssSnapshot
registry[".nss.tar"] = NssSnapshot
registry[".nof"] = NofOverlay
FormatCasManager(
cas: casManager,
formatRegistry: registry,
conversionCache: initTable[string, string]()
)
# =============================================================================
# Universal Format Storage
# =============================================================================
proc storeNprRecipe*(manager: var FormatCasManager, recipe: NprRecipe): Result[StorageResult, FormatCasError] =
## Store NPR recipe in CAS
try:
let kdlContent = serializeNprToKdl(recipe)
let data = kdlContent.toOpenArrayByte(0, kdlContent.len - 1).toSeq()
let storeResult = manager.cas.storeObject(data)
if storeResult.isErr:
return err[StorageResult, FormatCasError](FormatCasError(
code: CasError,
msg: "Failed to store NPR recipe: " & storeResult.getError().msg,
formatType: NprRecipe,
objectHash: "unknown"
))
let casObject = storeResult.get()
let result = StorageResult(
format: NprRecipe,
hash: casObject.hash,
size: casObject.size,
compressed: casObject.compressed,
chunks: casObject.chunks
)
return ok[StorageResult, FormatCasError](result)
except Exception as e:
return err[StorageResult, FormatCasError](FormatCasError(
code: UnknownError,
msg: "Failed to store NPR recipe: " & e.msg,
formatType: NprRecipe,
objectHash: "unknown"
))
proc storeNpkPackage*(manager: var FormatCasManager, package: NpkPackage): Result[StorageResult, FormatCasError] =
## Store NPK package in CAS with file-level deduplication
try:
# Store package metadata
let kdlContent = serializeToKdl(package)
let metadataData = kdlContent.toOpenArrayByte(0, kdlContent.len - 1).toSeq()
let metadataResult = manager.cas.storeObject(metadataData)
if metadataResult.isErr:
return err[StorageResult, FormatCasError](FormatCasError(
code: CasError,
msg: "Failed to store NPK metadata: " & metadataResult.getError().msg,
formatType: NpkBinary,
objectHash: "unknown"
))
# Files are already stored in CAS through the package creation process
# Just return the metadata storage result
let casObject = metadataResult.get()
let result = StorageResult(
format: NpkBinary,
hash: casObject.hash,
size: casObject.size,
compressed: casObject.compressed,
chunks: casObject.chunks
)
return ok[StorageResult, FormatCasError](result)
except Exception as e:
return err[StorageResult, FormatCasError](FormatCasError(
code: UnknownError,
msg: "Failed to store NPK package: " & e.msg,
formatType: NpkBinary,
objectHash: "unknown"
))
proc storeNcaChunk*(manager: var FormatCasManager, chunk: NcaChunk): Result[StorageResult, FormatCasError] =
## Store NCA chunk in CAS
try:
let binaryData = serializeNcaChunk(chunk)
let storeResult = manager.cas.storeObject(binaryData)
if storeResult.isErr:
return err[StorageResult, FormatCasError](FormatCasError(
code: CasError,
msg: "Failed to store NCA chunk: " & storeResult.getError().msg,
formatType: NcaChunk,
objectHash: chunk.hash
))
let casObject = storeResult.get()
let result = StorageResult(
format: NcaChunk,
hash: casObject.hash,
size: casObject.size,
compressed: casObject.compressed,
chunks: casObject.chunks
)
return ok[StorageResult, FormatCasError](result)
except Exception as e:
return err[StorageResult, FormatCasError](FormatCasError(
code: UnknownError,
msg: "Failed to store NCA chunk: " & e.msg,
formatType: NcaChunk,
objectHash: chunk.hash
))
proc storeNssSnapshot*(manager: var FormatCasManager, snapshot: NssSnapshot): Result[StorageResult, FormatCasError] =
## Store NSS snapshot in CAS with package-level deduplication
try:
# Store snapshot metadata
let kdlContent = serializeNssToKdl(snapshot)
let metadataData = kdlContent.toOpenArrayByte(0, kdlContent.len - 1).toSeq()
let metadataResult = manager.cas.storeObject(metadataData)
if metadataResult.isErr:
return err[StorageResult, FormatCasError](FormatCasError(
code: CasError,
msg: "Failed to store NSS metadata: " & metadataResult.getError().msg,
formatType: NssSnapshot,
objectHash: "unknown"
))
# Store individual packages (they may already be in CAS)
for package in snapshot.packages:
let packageResult = manager.storeNpkPackage(package)
if packageResult.isErr:
# Log warning but continue - package might already be stored
discard
let casObject = metadataResult.get()
let result = StorageResult(
format: NssSnapshot,
hash: casObject.hash,
size: casObject.size,
compressed: casObject.compressed,
chunks: casObject.chunks
)
return ok[StorageResult, FormatCasError](result)
except Exception as e:
return err[StorageResult, FormatCasError](FormatCasError(
code: UnknownError,
msg: "Failed to store NSS snapshot: " & e.msg,
formatType: NssSnapshot,
objectHash: "unknown"
))
proc storeNofOverlay*(manager: var FormatCasManager, overlay: NofOverlay): Result[StorageResult, FormatCasError] =
## Store NOF overlay in CAS
try:
let kdlContent = serializeNofToKdl(overlay)
let data = kdlContent.toOpenArrayByte(0, kdlContent.len - 1).toSeq()
let storeResult = manager.cas.storeObject(data)
if storeResult.isErr:
return err[StorageResult, FormatCasError](FormatCasError(
code: CasError,
msg: "Failed to store NOF overlay: " & storeResult.getError().msg,
formatType: NofOverlay,
objectHash: "unknown"
))
let casObject = storeResult.get()
let result = StorageResult(
format: NofOverlay,
hash: casObject.hash,
size: casObject.size,
compressed: casObject.compressed,
chunks: casObject.chunks
)
return ok[StorageResult, FormatCasError](result)
except Exception as e:
return err[StorageResult, FormatCasError](FormatCasError(
code: UnknownError,
msg: "Failed to store NOF overlay: " & e.msg,
formatType: NofOverlay,
objectHash: "unknown"
))
# =============================================================================
# Universal Format Retrieval
# =============================================================================
proc retrieveNprRecipe*(manager: FormatCasManager, hash: string): Result[NprRecipe, FormatCasError] =
## Retrieve NPR recipe from CAS
let dataResult = manager.cas.retrieveObject(hash)
if dataResult.isErr:
return err[NprRecipe, FormatCasError](FormatCasError(
code: ObjectNotFound,
msg: "Failed to retrieve NPR recipe: " & dataResult.getError().msg,
formatType: NprRecipe,
objectHash: hash
))
let data = dataResult.get()
let kdlContent = cast[string](data)
let deserializeResult = deserializeNprFromKdl(kdlContent)
if deserializeResult.isErr:
return err[NprRecipe, FormatCasError](FormatCasError(
code: InvalidMetadata,
msg: "Failed to deserialize NPR recipe: " & deserializeResult.getError().msg,
formatType: NprRecipe,
objectHash: hash
))
return ok[NprRecipe, FormatCasError](deserializeResult.get())
proc retrieveNpkPackage*(manager: FormatCasManager, hash: string): Result[NpkPackage, FormatCasError] =
## Retrieve NPK package from CAS
let dataResult = manager.cas.retrieveObject(hash)
if dataResult.isErr:
return err[NpkPackage, FormatCasError](FormatCasError(
code: ObjectNotFound,
msg: "Failed to retrieve NPK package: " & dataResult.getError().msg,
formatType: NpkBinary,
objectHash: hash
))
let data = dataResult.get()
let kdlContent = cast[string](data)
let deserializeResult = deserializeFromKdl(kdlContent)
if deserializeResult.isErr:
return err[NpkPackage, FormatCasError](FormatCasError(
code: InvalidMetadata,
msg: "Failed to deserialize NPK package: " & deserializeResult.getError().msg,
formatType: NpkBinary,
objectHash: hash
))
return ok[NpkPackage, FormatCasError](deserializeResult.get())
proc retrieveNcaChunk*(manager: FormatCasManager, hash: string): Result[NcaChunk, FormatCasError] =
## Retrieve NCA chunk from CAS
let dataResult = manager.cas.retrieveObject(hash)
if dataResult.isErr:
return err[NcaChunk, FormatCasError](FormatCasError(
code: ObjectNotFound,
msg: "Failed to retrieve NCA chunk: " & dataResult.getError().msg,
formatType: NcaChunk,
objectHash: hash
))
let data = dataResult.get()
let deserializeResult = deserializeNcaChunk(data)
if deserializeResult.isErr:
return err[NcaChunk, FormatCasError](FormatCasError(
code: InvalidMetadata,
msg: "Failed to deserialize NCA chunk: " & deserializeResult.getError().msg,
formatType: NcaChunk,
objectHash: hash
))
return ok[NcaChunk, FormatCasError](deserializeResult.get())
proc retrieveNssSnapshot*(manager: FormatCasManager, hash: string): Result[NssSnapshot, FormatCasError] =
## Retrieve NSS snapshot from CAS
let dataResult = manager.cas.retrieveObject(hash)
if dataResult.isErr:
return err[NssSnapshot, FormatCasError](FormatCasError(
code: ObjectNotFound,
msg: "Failed to retrieve NSS snapshot: " & dataResult.getError().msg,
formatType: NssSnapshot,
objectHash: hash
))
let data = dataResult.get()
let kdlContent = cast[string](data)
let deserializeResult = deserializeNssFromKdl(kdlContent)
if deserializeResult.isErr:
return err[NssSnapshot, FormatCasError](FormatCasError(
code: InvalidMetadata,
msg: "Failed to deserialize NSS snapshot: " & deserializeResult.getError().msg,
formatType: NssSnapshot,
objectHash: hash
))
return ok[NssSnapshot, FormatCasError](deserializeResult.get())
proc retrieveNofOverlay*(manager: FormatCasManager, hash: string): Result[NofOverlay, FormatCasError] =
## Retrieve NOF overlay from CAS
let dataResult = manager.cas.retrieveObject(hash)
if dataResult.isErr:
return err[NofOverlay, FormatCasError](FormatCasError(
code: ObjectNotFound,
msg: "Failed to retrieve NOF overlay: " & dataResult.getError().msg,
formatType: NofOverlay,
objectHash: hash
))
let data = dataResult.get()
let kdlContent = cast[string](data)
let deserializeResult = deserializeNofFromKdl(kdlContent)
if deserializeResult.isErr:
return err[NofOverlay, FormatCasError](FormatCasError(
code: InvalidMetadata,
msg: "Failed to deserialize NOF overlay: " & deserializeResult.getError().msg,
formatType: NofOverlay,
objectHash: hash
))
return ok[NofOverlay, FormatCasError](deserializeResult.get())
# =============================================================================
# Format Detection and Universal Retrieval
# =============================================================================
proc detectAndRetrieve*(manager: FormatCasManager, hash: string): Result[RetrievalResult, FormatCasError] =
## Detect format and retrieve object from CAS
let dataResult = manager.cas.retrieveObject(hash)
if dataResult.isErr:
return err[RetrievalResult, FormatCasError](FormatCasError(
code: ObjectNotFound,
msg: "Failed to retrieve object: " & dataResult.getError().msg,
formatType: NpkBinary, # Default
objectHash: hash
))
let data = dataResult.get()
# Try to detect format from content
var detectedFormat = NpkBinary # Default
var metadata = newJObject()
# Check for KDL format markers
let content = cast[string](data)
if content.contains("recipe \""):
detectedFormat = NprRecipe
metadata["type"] = newJString("recipe")
elif content.contains("overlay \""):
detectedFormat = NofOverlay
metadata["type"] = newJString("overlay")
elif content.contains("snapshot \""):
detectedFormat = NssSnapshot
metadata["type"] = newJString("snapshot")
elif content.contains("package \""):
detectedFormat = NpkBinary
metadata["type"] = newJString("package")
elif data.len >= 4 and cast[string](data[0..3]) == "NCA1":
detectedFormat = NcaChunk
metadata["type"] = newJString("chunk")
let result = RetrievalResult(
format: detectedFormat,
hash: hash,
data: data,
metadata: metadata
)
return ok[RetrievalResult, FormatCasError](result)
# =============================================================================
# Cross-Format Deduplication
# =============================================================================
proc deduplicateAcrossFormats*(manager: var FormatCasManager): Result[int, FormatCasError] =
## Perform deduplication across all package formats
try:
var removedCount = 0
let allObjects = manager.cas.listObjects()
var contentHashes = initTable[string, seq[string]]()
# Group objects by content hash (not storage hash)
for objectHash in allObjects:
let retrieveResult = manager.cas.retrieveObject(objectHash)
if retrieveResult.isOk:
let data = retrieveResult.get()
let contentHash = calculateBlake3(data)
if not contentHashes.hasKey(contentHash):
contentHashes[contentHash] = @[]
contentHashes[contentHash].add(objectHash)
# Remove duplicates (keep first occurrence)
for contentHash, objects in contentHashes:
if objects.len > 1:
# Keep the first object, remove the rest
for i in 1..<objects.len:
# TODO: Implement object removal in CAS
# For now, just count what would be removed
removedCount.inc
return ok[int, FormatCasError](removedCount)
except Exception as e:
return err[int, FormatCasError](FormatCasError(
code: UnknownError,
msg: "Failed to deduplicate across formats: " & e.msg,
formatType: NpkBinary,
objectHash: "unknown"
))
# =============================================================================
# Format Conversion Pipelines
# =============================================================================
proc convertNprToNpk*(manager: var FormatCasManager, recipeHash: string,
sourceDir: string): Result[StorageResult, FormatCasError] =
## Convert NPR recipe to NPK package
let recipeResult = manager.retrieveNprRecipe(recipeHash)
if recipeResult.isErr:
return err[StorageResult, FormatCasError](FormatCasError(
code: ObjectNotFound,
msg: "Failed to retrieve recipe for conversion: " & recipeResult.getError().msg,
formatType: NprRecipe,
objectHash: recipeHash
))
let recipe = recipeResult.get()
# TODO: Implement actual build process from recipe
# For now, create a placeholder NPK package
let npkResult = createNpkPackage(recipe.metadata, sourceDir, manager.cas)
if npkResult.isErr:
return err[StorageResult, FormatCasError](FormatCasError(
code: BuildFailed,
msg: "Failed to build NPK from recipe: " & npkResult.getError().msg,
formatType: NpkBinary,
objectHash: recipeHash
))
let npkPackage = npkResult.get()
let storeResult = manager.storeNpkPackage(npkPackage)
if storeResult.isOk:
# Cache the conversion
manager.conversionCache[recipeHash] = storeResult.get().hash
return storeResult
proc convertNpkToNca*(manager: var FormatCasManager, packageHash: string): Result[seq[StorageResult], FormatCasError] =
## Convert NPK package to NCA chunks
let packageResult = manager.retrieveNpkPackage(packageHash)
if packageResult.isErr:
return err[seq[StorageResult], FormatCasError](FormatCasError(
code: ObjectNotFound,
msg: "Failed to retrieve package for conversion: " & packageResult.getError().msg,
formatType: NpkBinary,
objectHash: packageHash
))
let package = packageResult.get()
var results: seq[StorageResult] = @[]
# Convert each file to NCA chunks
for file in package.files:
# Create chunk from file data (simplified)
let chunk = createNcaChunk(file.hash, @[], false) # Placeholder data
let storeResult = manager.storeNcaChunk(chunk)
if storeResult.isOk:
results.add(storeResult.get())
return ok[seq[StorageResult], FormatCasError](results)
proc convertNpkToNss*(manager: var FormatCasManager, packageHashes: seq[string],
snapshotName: string): Result[StorageResult, FormatCasError] =
## Convert multiple NPK packages to NSS snapshot
var packages: seq[NpkPackage] = @[]
var packageIds: seq[PackageId] = @[]
# Retrieve all packages
for hash in packageHashes:
let packageResult = manager.retrieveNpkPackage(hash)
if packageResult.isOk:
let package = packageResult.get()
packages.add(package)
packageIds.add(package.metadata.id)
# Create lockfile
let lockfile = createLockfile("gen-" & $epochTime().int, packageIds)
# Create snapshot
let snapshot = createNssSnapshot(snapshotName, lockfile, packages)
return manager.storeNssSnapshot(snapshot)
# =============================================================================
# Garbage Collection Across Formats
# =============================================================================
proc garbageCollectFormats*(manager: var FormatCasManager,
reachableHashes: seq[string] = @[]): Result[int, FormatCasError] =
## Perform garbage collection across all package formats
try:
let reachableSet = reachableHashes.toHashSet()
# Add conversion cache entries to reachable set
var allReachable = reachableSet
for sourceHash, targetHash in manager.conversionCache:
allReachable.incl(sourceHash)
allReachable.incl(targetHash)
# Perform CAS garbage collection
let gcResult = manager.cas.garbageCollect(allReachable)
if gcResult.isErr:
return err[int, FormatCasError](FormatCasError(
code: CasError,
msg: "Failed to garbage collect: " & gcResult.getError().msg,
formatType: NpkBinary,
objectHash: "unknown"
))
return ok[int, FormatCasError](gcResult.get())
except Exception as e:
return err[int, FormatCasError](FormatCasError(
code: UnknownError,
msg: "Failed to garbage collect formats: " & e.msg,
formatType: NpkBinary,
objectHash: "unknown"
))
# =============================================================================
# Format Statistics and Management
# =============================================================================
proc getFormatStatistics*(manager: FormatCasManager): JsonNode =
## Get statistics about stored package formats
let allObjects = manager.cas.listObjects()
var formatCounts = initTable[PackageFormat, int]()
var totalSize: int64 = 0
# Initialize counts
for format in [NprRecipe, NpkBinary, NcaChunk, NssSnapshot, NofOverlay]:
formatCounts[format] = 0
# Count objects by format
for objectHash in allObjects:
let detectResult = manager.detectAndRetrieve(objectHash)
if detectResult.isOk:
let result = detectResult.get()
formatCounts[result.format] = formatCounts.getOrDefault(result.format, 0) + 1
totalSize += result.data.len.int64
return %*{
"total_objects": allObjects.len,
"total_size": totalSize,
"formats": %*{
"npr_recipes": formatCounts[NprRecipe],
"npk_packages": formatCounts[NpkBinary],
"nca_chunks": formatCounts[NcaChunk],
"nss_snapshots": formatCounts[NssSnapshot],
"nof_overlays": formatCounts[NofOverlay]
},
"conversion_cache_entries": manager.conversionCache.len
}
proc listObjectsByFormat*(manager: FormatCasManager, format: PackageFormat): seq[string] =
## List all objects of a specific format
let allObjects = manager.cas.listObjects()
var result: seq[string] = @[]
for objectHash in allObjects:
let detectResult = manager.detectAndRetrieve(objectHash)
if detectResult.isOk and detectResult.get().format == format:
result.add(objectHash)
return result
proc validateAllFormats*(manager: FormatCasManager): JsonNode =
## Validate all stored package formats
let allObjects = manager.cas.listObjects()
var validationResults = newJObject()
var totalValid = 0
var totalInvalid = 0
for objectHash in allObjects:
let detectResult = manager.detectAndRetrieve(objectHash)
if detectResult.isOk:
let result = detectResult.get()
# TODO: Implement format-specific validation
# For now, assume all objects are valid if they can be retrieved
validationResults[objectHash] = %*{
"format": $result.format,
"valid": true,
"errors": newJArray(),
"warnings": newJArray()
}
totalValid.inc
else:
validationResults[objectHash] = %*{
"format": "unknown",
"valid": false,
"errors": [detectResult.getError().msg],
"warnings": newJArray()
}
totalInvalid.inc
return %*{
"summary": %*{
"total_objects": allObjects.len,
"valid": totalValid,
"invalid": totalInvalid
},
"objects": validationResults
}