Files
git.stella-ops.org/src/Scanner/StellaOps.Scanner.WebService/Services/EvidenceBundleExporter.cs

778 lines
30 KiB
C#

// <copyright file="EvidenceBundleExporter.cs" company="StellaOps">
// SPDX-License-Identifier: BUSL-1.1
// </copyright>
using System.IO.Compression;
using System.Security.Cryptography;
using System.Text;
using System.Text.Json;
using StellaOps.Scanner.WebService.Contracts;
namespace StellaOps.Scanner.WebService.Services;
/// <summary>
/// Exports unified evidence bundles to ZIP and TAR.GZ archive formats.
/// </summary>
public sealed class EvidenceBundleExporter : IEvidenceBundleExporter
{
private readonly TimeProvider _timeProvider;
private static readonly JsonSerializerOptions JsonOptions = new()
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
/// <summary>
/// Initializes a new instance of the <see cref="EvidenceBundleExporter"/> class.
/// </summary>
/// <param name="timeProvider">The time provider for deterministic timestamps. Defaults to system time if null.</param>
public EvidenceBundleExporter(TimeProvider? timeProvider = null)
{
_timeProvider = timeProvider ?? TimeProvider.System;
}
/// <inheritdoc />
public async Task<EvidenceExportResult> ExportAsync(
UnifiedEvidenceResponseDto evidence,
EvidenceExportFormat format,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(evidence);
var fileEntries = new List<ArchiveFileEntry>();
var memoryStreams = new List<(string path, MemoryStream stream, string contentType)>();
try
{
// Prepare all file contents
await PrepareEvidenceFilesAsync(evidence, memoryStreams, fileEntries, ct)
.ConfigureAwait(false);
// Create archive manifest
var manifest = new ArchiveManifestDto
{
FindingId = evidence.FindingId,
GeneratedAt = _timeProvider.GetUtcNow(),
CacheKey = evidence.CacheKey ?? string.Empty,
Files = fileEntries,
ScannerVersion = null // Scanner version not directly available in manifests
};
// Add manifest to archive
var manifestJson = JsonSerializer.Serialize(manifest, JsonOptions);
var manifestBytes = Encoding.UTF8.GetBytes(manifestJson);
var manifestStream = new MemoryStream(manifestBytes);
var manifestEntry = CreateFileEntry("manifest.json", manifestBytes, "application/json");
fileEntries.Insert(0, manifestEntry);
memoryStreams.Insert(0, ("manifest.json", manifestStream, "application/json"));
// Generate archive
var archiveStream = new MemoryStream();
if (format == EvidenceExportFormat.Zip)
{
await CreateZipArchiveAsync(evidence.FindingId, memoryStreams, archiveStream, ct)
.ConfigureAwait(false);
}
else
{
await CreateTarGzArchiveAsync(evidence.FindingId, memoryStreams, archiveStream, ct)
.ConfigureAwait(false);
}
archiveStream.Position = 0;
// Compute archive digest
var archiveDigest = ComputeSha256(archiveStream);
archiveStream.Position = 0;
var (contentType, extension) = format switch
{
EvidenceExportFormat.Zip => ("application/zip", "zip"),
EvidenceExportFormat.TarGz => ("application/gzip", "tar.gz"),
_ => throw new ArgumentOutOfRangeException(nameof(format))
};
return new EvidenceExportResult
{
Stream = archiveStream,
ContentType = contentType,
FileName = $"evidence-{evidence.FindingId}.{extension}",
ArchiveDigest = archiveDigest,
Manifest = manifest with { Files = fileEntries },
Size = archiveStream.Length
};
}
finally
{
// Cleanup intermediate streams
foreach (var (_, stream, _) in memoryStreams)
{
await stream.DisposeAsync().ConfigureAwait(false);
}
}
}
/// <inheritdoc />
public async Task<RunEvidenceExportResult> ExportRunAsync(
IReadOnlyList<UnifiedEvidenceResponseDto> runEvidence,
string scanId,
EvidenceExportFormat format,
CancellationToken ct = default)
{
ArgumentNullException.ThrowIfNull(runEvidence);
ArgumentException.ThrowIfNullOrWhiteSpace(scanId);
var findingManifests = new List<ArchiveManifestDto>();
var allStreams = new List<(string path, MemoryStream stream, string contentType)>();
var totalFiles = 0;
try
{
// Process each finding into its own subfolder
foreach (var evidence in runEvidence)
{
ct.ThrowIfCancellationRequested();
var findingPrefix = $"findings/{evidence.FindingId}/";
var fileEntries = new List<ArchiveFileEntry>();
var findingStreams = new List<(string path, MemoryStream stream, string contentType)>();
await PrepareEvidenceFilesAsync(evidence, findingStreams, fileEntries, ct)
.ConfigureAwait(false);
// Add finding manifest
var findingManifest = new ArchiveManifestDto
{
FindingId = evidence.FindingId,
GeneratedAt = _timeProvider.GetUtcNow(),
CacheKey = evidence.CacheKey ?? string.Empty,
Files = fileEntries,
ScannerVersion = null
};
findingManifests.Add(findingManifest);
// Add to all streams with finding prefix
foreach (var (path, stream, ct2) in findingStreams)
{
allStreams.Add((findingPrefix + path, stream, ct2));
totalFiles++;
}
}
// Create run-level manifest
var runManifest = new RunArchiveManifestDto
{
ScanId = scanId,
GeneratedAt = _timeProvider.GetUtcNow(),
Findings = findingManifests,
TotalFiles = totalFiles,
ScannerVersion = null
};
// Add run manifest to archive
var manifestJson = JsonSerializer.Serialize(runManifest, JsonOptions);
var manifestBytes = Encoding.UTF8.GetBytes(manifestJson);
var manifestStream = new MemoryStream(manifestBytes);
allStreams.Insert(0, ("MANIFEST.json", manifestStream, "application/json"));
// Generate run-level README
var readme = GenerateRunReadme(scanId, runEvidence, findingManifests);
var readmeBytes = Encoding.UTF8.GetBytes(readme);
var readmeStream = new MemoryStream(readmeBytes);
allStreams.Insert(1, ("README.md", readmeStream, "text/markdown"));
// Generate archive
var archiveStream = new MemoryStream();
if (format == EvidenceExportFormat.Zip)
{
await CreateZipArchiveAsync($"evidence-run-{scanId}", allStreams, archiveStream, ct)
.ConfigureAwait(false);
}
else
{
await CreateTarGzArchiveAsync($"evidence-run-{scanId}", allStreams, archiveStream, ct)
.ConfigureAwait(false);
}
archiveStream.Position = 0;
// Compute archive digest
var archiveDigest = ComputeSha256(archiveStream);
archiveStream.Position = 0;
var (contentType, extension) = format switch
{
EvidenceExportFormat.Zip => ("application/zip", "zip"),
EvidenceExportFormat.TarGz => ("application/gzip", "tar.gz"),
_ => throw new ArgumentOutOfRangeException(nameof(format))
};
return new RunEvidenceExportResult
{
Stream = archiveStream,
ContentType = contentType,
FileName = $"evidence-run-{scanId}.{extension}",
ArchiveDigest = archiveDigest,
Manifest = runManifest,
Size = archiveStream.Length,
FindingCount = runEvidence.Count
};
}
finally
{
// Cleanup intermediate streams
foreach (var (_, stream, _) in allStreams)
{
await stream.DisposeAsync().ConfigureAwait(false);
}
}
}
private string GenerateRunReadme(
string scanId,
IReadOnlyList<UnifiedEvidenceResponseDto> findings,
IReadOnlyList<ArchiveManifestDto> manifests)
{
var sb = new StringBuilder();
sb.AppendLine("# StellaOps Scan Run Evidence Bundle");
sb.AppendLine();
sb.AppendLine("## Overview");
sb.AppendLine();
sb.AppendLine($"- **Scan ID:** `{scanId}`");
sb.AppendLine($"- **Finding Count:** {findings.Count}");
sb.AppendLine($"- **Generated:** {_timeProvider.GetUtcNow():O}");
sb.AppendLine();
sb.AppendLine("## Findings");
sb.AppendLine();
sb.AppendLine("| # | Finding ID | CVE | Component |");
sb.AppendLine("|---|------------|-----|-----------|");
for (var i = 0; i < findings.Count; i++)
{
var f = findings[i];
sb.AppendLine($"| {i + 1} | `{f.FindingId}` | `{f.CveId}` | `{f.ComponentPurl}` |");
}
sb.AppendLine();
sb.AppendLine("## Archive Structure");
sb.AppendLine();
sb.AppendLine("```");
sb.AppendLine("evidence-run-<scanId>/");
sb.AppendLine("├── MANIFEST.json # Run-level manifest");
sb.AppendLine("├── README.md # This file");
sb.AppendLine("└── findings/");
sb.AppendLine(" ├── <findingId1>/");
sb.AppendLine(" │ ├── manifest.json");
sb.AppendLine(" │ ├── sbom.cdx.json");
sb.AppendLine(" │ ├── reachability.json");
sb.AppendLine(" │ ├── binary-diff.json # Binary diff evidence");
sb.AppendLine(" │ ├── binary-diff.dsse.json # Signed binary diff (if attested)");
sb.AppendLine(" │ ├── delta-proof.json # Semantic diff summary");
sb.AppendLine(" │ ├── vex/");
sb.AppendLine(" │ ├── attestations/");
sb.AppendLine(" │ ├── policy/");
sb.AppendLine(" │ ├── replay.sh");
sb.AppendLine(" │ ├── replay.ps1");
sb.AppendLine(" │ └── README.md");
sb.AppendLine(" └── <findingId2>/");
sb.AppendLine(" └── ...");
sb.AppendLine("```");
sb.AppendLine();
sb.AppendLine("## Replay Instructions");
sb.AppendLine();
sb.AppendLine("Each finding folder contains individual replay scripts. To replay all findings:");
sb.AppendLine();
sb.AppendLine("### Bash");
sb.AppendLine("```bash");
sb.AppendLine("for dir in findings/*/; do");
sb.AppendLine(" (cd \"$dir\" && chmod +x replay.sh && ./replay.sh)");
sb.AppendLine("done");
sb.AppendLine("```");
sb.AppendLine();
sb.AppendLine("### PowerShell");
sb.AppendLine("```powershell");
sb.AppendLine("Get-ChildItem -Path findings -Directory | ForEach-Object {");
sb.AppendLine(" Push-Location $_.FullName");
sb.AppendLine(" .\\replay.ps1");
sb.AppendLine(" Pop-Location");
sb.AppendLine("}");
sb.AppendLine("```");
sb.AppendLine();
sb.AppendLine("---");
sb.AppendLine();
sb.AppendLine("*Generated by StellaOps Scanner*");
return sb.ToString();
}
private async Task PrepareEvidenceFilesAsync(
UnifiedEvidenceResponseDto evidence,
List<(string path, MemoryStream stream, string contentType)> streams,
List<ArchiveFileEntry> entries,
CancellationToken ct)
{
// SBOM evidence
if (evidence.Sbom is not null)
{
await AddJsonFileAsync("sbom.cdx.json", evidence.Sbom, streams, entries, ct)
.ConfigureAwait(false);
}
// Reachability evidence
if (evidence.Reachability is not null)
{
await AddJsonFileAsync("reachability.json", evidence.Reachability, streams, entries, ct)
.ConfigureAwait(false);
}
// VEX claims - group by source
if (evidence.VexClaims is { Count: > 0 })
{
var vexBySource = evidence.VexClaims
.GroupBy(v => v.Source ?? "unknown")
.ToDictionary(g => g.Key, g => g.ToList());
foreach (var (source, claims) in vexBySource)
{
var fileName = $"vex/{SanitizeFileName(source)}.json";
await AddJsonFileAsync(fileName, claims, streams, entries, ct)
.ConfigureAwait(false);
}
}
// Attestations
if (evidence.Attestations is { Count: > 0 })
{
foreach (var attestation in evidence.Attestations)
{
var fileName = $"attestations/{SanitizeFileName(attestation.PredicateType ?? attestation.Id)}.dsse.json";
await AddJsonFileAsync(fileName, attestation, streams, entries, ct)
.ConfigureAwait(false);
}
}
// Delta evidence
if (evidence.Deltas is not null)
{
await AddJsonFileAsync("delta.json", evidence.Deltas, streams, entries, ct)
.ConfigureAwait(false);
}
// Binary diff evidence - Sprint: SPRINT_20260112_009_SCANNER_binary_diff_bundle_export (BINDIFF-SCAN-002)
if (evidence.BinaryDiff is not null)
{
await AddJsonFileAsync("binary-diff.json", evidence.BinaryDiff, streams, entries, ct)
.ConfigureAwait(false);
// Add DSSE-signed binary diff if attestation refs are present
if (evidence.BinaryDiff.Attestation is not null)
{
var dsseWrapper = new
{
payloadType = "application/vnd.stellaops.binary-diff+json",
payload = evidence.BinaryDiff,
attestationRef = evidence.BinaryDiff.Attestation
};
await AddJsonFileAsync("binary-diff.dsse.json", dsseWrapper, streams, entries, ct)
.ConfigureAwait(false);
}
// Add delta proof summary for semantic fingerprint changes
if (evidence.BinaryDiff.HasSemanticDiff)
{
var deltaProof = new
{
previousBinaryDigest = evidence.BinaryDiff.PreviousBinaryDigest,
currentBinaryDigest = evidence.BinaryDiff.CurrentBinaryDigest,
similarityScore = evidence.BinaryDiff.SemanticSimilarity ?? evidence.BinaryDiff.SimilarityScore,
functionChangeCount = evidence.BinaryDiff.FunctionChangeCount,
securityChangeCount = evidence.BinaryDiff.SecurityChangeCount
};
await AddJsonFileAsync("delta-proof.json", deltaProof, streams, entries, ct)
.ConfigureAwait(false);
}
}
// Policy evidence
if (evidence.Policy is not null)
{
await AddJsonFileAsync("policy/evaluation.json", evidence.Policy, streams, entries, ct)
.ConfigureAwait(false);
}
// Replay command
if (!string.IsNullOrWhiteSpace(evidence.ReplayCommand))
{
var replayBytes = Encoding.UTF8.GetBytes(evidence.ReplayCommand);
var replayStream = new MemoryStream(replayBytes);
streams.Add(("replay-command.txt", replayStream, "text/plain"));
entries.Add(CreateFileEntry("replay-command.txt", replayBytes, "text/plain"));
// Generate bash replay script
var bashScript = GenerateBashReplayScript(evidence);
var bashBytes = Encoding.UTF8.GetBytes(bashScript);
var bashStream = new MemoryStream(bashBytes);
streams.Add(("replay.sh", bashStream, "text/x-shellscript"));
entries.Add(CreateFileEntry("replay.sh", bashBytes, "text/x-shellscript"));
// Generate PowerShell replay script
var psScript = GeneratePowerShellReplayScript(evidence);
var psBytes = Encoding.UTF8.GetBytes(psScript);
var psStream = new MemoryStream(psBytes);
streams.Add(("replay.ps1", psStream, "text/plain"));
entries.Add(CreateFileEntry("replay.ps1", psBytes, "text/plain"));
}
// Generate README with hash table
var readme = GenerateReadme(evidence, entries);
var readmeBytes = Encoding.UTF8.GetBytes(readme);
var readmeStream = new MemoryStream(readmeBytes);
streams.Add(("README.md", readmeStream, "text/markdown"));
entries.Add(CreateFileEntry("README.md", readmeBytes, "text/markdown"));
await Task.CompletedTask.ConfigureAwait(false);
}
private string GenerateBashReplayScript(UnifiedEvidenceResponseDto evidence)
{
var sb = new StringBuilder();
sb.AppendLine("#!/usr/bin/env bash");
sb.AppendLine("# StellaOps Evidence Bundle Replay Script");
sb.AppendLine($"# Generated: {_timeProvider.GetUtcNow():O}");
sb.AppendLine($"# Finding: {evidence.FindingId}");
sb.AppendLine($"# CVE: {evidence.CveId}");
sb.AppendLine();
sb.AppendLine("set -euo pipefail");
sb.AppendLine();
sb.AppendLine("# Input hashes for deterministic replay");
sb.AppendLine($"ARTIFACT_DIGEST=\"{evidence.Manifests.ArtifactDigest}\"");
sb.AppendLine($"MANIFEST_HASH=\"{evidence.Manifests.ManifestHash}\"");
sb.AppendLine($"FEED_HASH=\"{evidence.Manifests.FeedSnapshotHash}\"");
sb.AppendLine($"POLICY_HASH=\"{evidence.Manifests.PolicyHash}\"");
sb.AppendLine();
sb.AppendLine("# Verify prerequisites");
sb.AppendLine("if ! command -v stella &> /dev/null; then");
sb.AppendLine(" echo \"Error: stella CLI not found. Install from https://stellaops.org/install\"");
sb.AppendLine(" exit 1");
sb.AppendLine("fi");
sb.AppendLine();
sb.AppendLine("echo \"Replaying verdict for finding: ${ARTIFACT_DIGEST}\"");
sb.AppendLine("echo \"Using manifest: ${MANIFEST_HASH}\"");
sb.AppendLine();
sb.AppendLine("# Execute replay");
sb.AppendLine("stella scan replay \\");
sb.AppendLine(" --artifact \"${ARTIFACT_DIGEST}\" \\");
sb.AppendLine(" --manifest \"${MANIFEST_HASH}\" \\");
sb.AppendLine(" --feeds \"${FEED_HASH}\" \\");
sb.AppendLine(" --policy \"${POLICY_HASH}\"");
sb.AppendLine();
sb.AppendLine("echo \"Replay complete. Verify verdict matches original.\"");
return sb.ToString();
}
private string GeneratePowerShellReplayScript(UnifiedEvidenceResponseDto evidence)
{
var sb = new StringBuilder();
sb.AppendLine("# StellaOps Evidence Bundle Replay Script");
sb.AppendLine($"# Generated: {_timeProvider.GetUtcNow():O}");
sb.AppendLine($"# Finding: {evidence.FindingId}");
sb.AppendLine($"# CVE: {evidence.CveId}");
sb.AppendLine();
sb.AppendLine("$ErrorActionPreference = 'Stop'");
sb.AppendLine();
sb.AppendLine("# Input hashes for deterministic replay");
sb.AppendLine($"$ArtifactDigest = \"{evidence.Manifests.ArtifactDigest}\"");
sb.AppendLine($"$ManifestHash = \"{evidence.Manifests.ManifestHash}\"");
sb.AppendLine($"$FeedHash = \"{evidence.Manifests.FeedSnapshotHash}\"");
sb.AppendLine($"$PolicyHash = \"{evidence.Manifests.PolicyHash}\"");
sb.AppendLine();
sb.AppendLine("# Verify prerequisites");
sb.AppendLine("if (-not (Get-Command stella -ErrorAction SilentlyContinue)) {");
sb.AppendLine(" Write-Error \"stella CLI not found. Install from https://stellaops.org/install\"");
sb.AppendLine(" exit 1");
sb.AppendLine("}");
sb.AppendLine();
sb.AppendLine("Write-Host \"Replaying verdict for finding: $ArtifactDigest\"");
sb.AppendLine("Write-Host \"Using manifest: $ManifestHash\"");
sb.AppendLine();
sb.AppendLine("# Execute replay");
sb.AppendLine("stella scan replay `");
sb.AppendLine(" --artifact $ArtifactDigest `");
sb.AppendLine(" --manifest $ManifestHash `");
sb.AppendLine(" --feeds $FeedHash `");
sb.AppendLine(" --policy $PolicyHash");
sb.AppendLine();
sb.AppendLine("Write-Host \"Replay complete. Verify verdict matches original.\"");
return sb.ToString();
}
private string GenerateReadme(UnifiedEvidenceResponseDto evidence, List<ArchiveFileEntry> entries)
{
var sb = new StringBuilder();
sb.AppendLine("# StellaOps Evidence Bundle");
sb.AppendLine();
sb.AppendLine("## Overview");
sb.AppendLine();
sb.AppendLine($"- **Finding ID:** `{evidence.FindingId}`");
sb.AppendLine($"- **CVE:** `{evidence.CveId}`");
sb.AppendLine($"- **Component:** `{evidence.ComponentPurl}`");
sb.AppendLine($"- **Generated:** {evidence.GeneratedAt:O}");
sb.AppendLine();
sb.AppendLine("## Input Hashes for Deterministic Replay");
sb.AppendLine();
sb.AppendLine("| Input | Hash |");
sb.AppendLine("|-------|------|");
sb.AppendLine($"| Artifact Digest | `{evidence.Manifests.ArtifactDigest}` |");
sb.AppendLine($"| Run Manifest | `{evidence.Manifests.ManifestHash}` |");
sb.AppendLine($"| Feed Snapshot | `{evidence.Manifests.FeedSnapshotHash}` |");
sb.AppendLine($"| Policy | `{evidence.Manifests.PolicyHash}` |");
if (!string.IsNullOrEmpty(evidence.Manifests.KnowledgeSnapshotId))
{
sb.AppendLine($"| Knowledge Snapshot | `{evidence.Manifests.KnowledgeSnapshotId}` |");
}
if (!string.IsNullOrEmpty(evidence.Manifests.GraphRevisionId))
{
sb.AppendLine($"| Graph Revision | `{evidence.Manifests.GraphRevisionId}` |");
}
sb.AppendLine();
sb.AppendLine("## Replay Instructions");
sb.AppendLine();
sb.AppendLine("### Using Bash");
sb.AppendLine("```bash");
sb.AppendLine("chmod +x replay.sh");
sb.AppendLine("./replay.sh");
sb.AppendLine("```");
sb.AppendLine();
sb.AppendLine("### Using PowerShell");
sb.AppendLine("```powershell");
sb.AppendLine(".\\replay.ps1");
sb.AppendLine("```");
sb.AppendLine();
sb.AppendLine("### Manual Command");
sb.AppendLine("```");
sb.AppendLine(evidence.ReplayCommand ?? "# Replay command not available");
sb.AppendLine("```");
sb.AppendLine();
sb.AppendLine("## Bundle Contents");
sb.AppendLine();
sb.AppendLine("| File | SHA-256 | Size |");
sb.AppendLine("|------|---------|------|");
foreach (var entry in entries.Where(e => e.Path != "README.md"))
{
sb.AppendLine($"| `{entry.Path}` | `{entry.Sha256[..16]}...` | {FormatSize(entry.Size)} |");
}
sb.AppendLine();
sb.AppendLine("## Verification Status");
sb.AppendLine();
sb.AppendLine($"- **Status:** {evidence.Verification.Status}");
sb.AppendLine($"- **Hashes Verified:** {(evidence.Verification.HashesVerified ? "" : "")}");
sb.AppendLine($"- **Attestations Verified:** {(evidence.Verification.AttestationsVerified ? "" : "")}");
sb.AppendLine($"- **Evidence Complete:** {(evidence.Verification.EvidenceComplete ? "" : "")}");
if (evidence.Verification.Issues is { Count: > 0 })
{
sb.AppendLine();
sb.AppendLine("### Issues");
foreach (var issue in evidence.Verification.Issues)
{
sb.AppendLine($"- {issue}");
}
}
sb.AppendLine();
sb.AppendLine("---");
sb.AppendLine();
sb.AppendLine("*Generated by StellaOps Scanner*");
return sb.ToString();
}
private static string FormatSize(long bytes)
{
string[] sizes = ["B", "KB", "MB", "GB"];
var order = 0;
double size = bytes;
while (size >= 1024 && order < sizes.Length - 1)
{
order++;
size /= 1024;
}
return $"{size:0.##} {sizes[order]}";
}
private static async Task AddJsonFileAsync<T>(
string path,
T content,
List<(string path, MemoryStream stream, string contentType)> streams,
List<ArchiveFileEntry> entries,
CancellationToken ct)
{
var json = JsonSerializer.Serialize(content, JsonOptions);
var bytes = Encoding.UTF8.GetBytes(json);
var stream = new MemoryStream(bytes);
streams.Add((path, stream, "application/json"));
entries.Add(CreateFileEntry(path, bytes, "application/json"));
await Task.CompletedTask.ConfigureAwait(false);
}
private static ArchiveFileEntry CreateFileEntry(string path, byte[] bytes, string contentType)
{
using var sha256 = SHA256.Create();
var hash = sha256.ComputeHash(bytes);
return new ArchiveFileEntry
{
Path = path,
Sha256 = Convert.ToHexString(hash).ToLowerInvariant(),
Size = bytes.Length,
ContentType = contentType
};
}
private static async Task CreateZipArchiveAsync(
string findingId,
List<(string path, MemoryStream stream, string contentType)> files,
Stream outputStream,
CancellationToken ct)
{
using var archive = new ZipArchive(outputStream, ZipArchiveMode.Create, leaveOpen: true);
var rootFolder = $"evidence-{findingId}/";
foreach (var (path, stream, _) in files)
{
ct.ThrowIfCancellationRequested();
var entry = archive.CreateEntry(rootFolder + path, CompressionLevel.Optimal);
await using var entryStream = entry.Open();
stream.Position = 0;
await stream.CopyToAsync(entryStream, ct).ConfigureAwait(false);
}
}
private async Task CreateTarGzArchiveAsync(
string findingId,
List<(string path, MemoryStream stream, string contentType)> files,
Stream outputStream,
CancellationToken ct)
{
// Use GZipStream with inner tar-like structure
// For simplicity, we create a pseudo-tar format compatible with extraction
await using var gzipStream = new GZipStream(outputStream, CompressionLevel.Optimal, leaveOpen: true);
var rootFolder = $"evidence-{findingId}/";
foreach (var (path, stream, _) in files)
{
ct.ThrowIfCancellationRequested();
var fullPath = rootFolder + path;
stream.Position = 0;
// Write tar header (simplified USTAR format)
var header = CreateTarHeader(fullPath, stream.Length);
await gzipStream.WriteAsync(header, ct).ConfigureAwait(false);
// Write file content
await stream.CopyToAsync(gzipStream, ct).ConfigureAwait(false);
// Pad to 512-byte boundary
var padding = (512 - (int)(stream.Length % 512)) % 512;
if (padding > 0)
{
var paddingBytes = new byte[padding];
await gzipStream.WriteAsync(paddingBytes, ct).ConfigureAwait(false);
}
}
// Write two empty blocks to mark end of archive
var endBlocks = new byte[1024];
await gzipStream.WriteAsync(endBlocks, ct).ConfigureAwait(false);
}
private byte[] CreateTarHeader(string name, long size)
{
var header = new byte[512];
// Name (0-99)
var nameBytes = Encoding.ASCII.GetBytes(name);
Array.Copy(nameBytes, 0, header, 0, Math.Min(nameBytes.Length, 100));
// Mode (100-107) - 0644
Encoding.ASCII.GetBytes("0000644").CopyTo(header, 100);
// UID (108-115) - 0
Encoding.ASCII.GetBytes("0000000").CopyTo(header, 108);
// GID (116-123) - 0
Encoding.ASCII.GetBytes("0000000").CopyTo(header, 116);
// Size (124-135) - octal
var sizeOctal = Convert.ToString(size, 8).PadLeft(11, '0');
Encoding.ASCII.GetBytes(sizeOctal).CopyTo(header, 124);
// Mtime (136-147) - current time in octal
var mtime = _timeProvider.GetUtcNow().ToUnixTimeSeconds();
var mtimeOctal = Convert.ToString(mtime, 8).PadLeft(11, '0');
Encoding.ASCII.GetBytes(mtimeOctal).CopyTo(header, 136);
// Checksum placeholder (148-155) - spaces
for (var i = 148; i < 156; i++)
{
header[i] = (byte)' ';
}
// Type flag (156) - '0' for regular file
header[156] = (byte)'0';
// USTAR magic (257-262)
Encoding.ASCII.GetBytes("ustar").CopyTo(header, 257);
header[262] = 0;
// USTAR version (263-264)
Encoding.ASCII.GetBytes("00").CopyTo(header, 263);
// Calculate and write checksum
var checksum = 0;
for (var i = 0; i < 512; i++)
{
checksum += header[i];
}
var checksumOctal = Convert.ToString(checksum, 8).PadLeft(6, '0');
Encoding.ASCII.GetBytes(checksumOctal).CopyTo(header, 148);
header[154] = 0;
header[155] = (byte)' ';
return header;
}
private static string ComputeSha256(Stream stream)
{
using var sha256 = SHA256.Create();
var hash = sha256.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
private static string SanitizeFileName(string name)
{
var invalid = Path.GetInvalidFileNameChars();
var sanitized = new StringBuilder(name.Length);
foreach (var c in name)
{
sanitized.Append(invalid.Contains(c) ? '_' : c);
}
return sanitized.ToString().ToLowerInvariant();
}
}