feat: Add RustFS artifact object store and migration tool

- Implemented RustFsArtifactObjectStore for managing artifacts in RustFS.
- Added unit tests for RustFsArtifactObjectStore functionality.
- Created a RustFS migrator tool to transfer objects from S3 to RustFS.
- Introduced policy preview and report models for API integration.
- Added fixtures and tests for policy preview and report functionality.
- Included necessary metadata and scripts for cache_pkg package.
This commit is contained in:
master
2025-10-23 18:53:18 +03:00
parent 5cb3144e5e
commit 70d7fb529e
117 changed files with 4849 additions and 725 deletions

View File

@@ -45,22 +45,97 @@ internal static class PythonDistributionLoader
AddFileEvidence(context, metadataPath, "METADATA", evidenceEntries);
AddFileEvidence(context, wheelPath, "WHEEL", evidenceEntries);
AddFileEvidence(context, entryPointsPath, "entry_points.txt", evidenceEntries);
AddFileEvidence(context, installerPath, "INSTALLER", evidenceEntries);
AddFileEvidence(context, recordPath, "RECORD", evidenceEntries);
AppendMetadata(metadataEntries, "distInfoPath", PythonPathHelper.NormalizeRelative(context, distInfoPath));
AppendMetadata(metadataEntries, "name", trimmedName);
AppendMetadata(metadataEntries, "version", trimmedVersion);
AppendMetadata(metadataEntries, "normalizedName", normalizedName);
AppendMetadata(metadataEntries, "summary", metadataDocument.GetFirst("Summary"));
AppendMetadata(metadataEntries, "license", metadataDocument.GetFirst("License"));
AppendMetadata(metadataEntries, "licenseExpression", metadataDocument.GetFirst("License-Expression"));
AppendMetadata(metadataEntries, "homePage", metadataDocument.GetFirst("Home-page"));
AppendMetadata(metadataEntries, "author", metadataDocument.GetFirst("Author"));
AppendMetadata(metadataEntries, "authorEmail", metadataDocument.GetFirst("Author-email"));
AppendMetadata(metadataEntries, "projectUrl", metadataDocument.GetFirst("Project-URL"));
AppendMetadata(metadataEntries, "requiresPython", metadataDocument.GetFirst("Requires-Python"));
var licenseFiles = metadataDocument.GetAll("License-File");
if (licenseFiles.Count > 0)
{
var packageRoot = ResolvePackageRoot(distInfoPath);
var licenseIndex = 0;
var seenLicensePaths = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var licenseFile in licenseFiles)
{
if (string.IsNullOrWhiteSpace(licenseFile))
{
continue;
}
var trimmed = licenseFile.Trim();
var resolved = TryResolvePackagePath(packageRoot, trimmed);
string metadataValue;
string? evidenceLocator = null;
if (!string.IsNullOrEmpty(resolved) && File.Exists(resolved))
{
metadataValue = PythonPathHelper.NormalizeRelative(context, resolved);
evidenceLocator = metadataValue;
}
else
{
metadataValue = trimmed;
}
if (metadataValue.Length == 0 || !seenLicensePaths.Add(metadataValue))
{
continue;
}
AppendMetadata(metadataEntries, $"license.file[{licenseIndex}]", metadataValue);
licenseIndex++;
if (!string.IsNullOrEmpty(evidenceLocator))
{
evidenceEntries.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"license",
evidenceLocator,
Value: null,
Sha256: null));
}
}
}
var classifiers = metadataDocument.GetAll("Classifier");
if (classifiers.Count > 0)
{
AppendMetadata(metadataEntries, "classifiers", string.Join(';', classifiers));
var orderedClassifiers = classifiers
.Where(static classifier => !string.IsNullOrWhiteSpace(classifier))
.Select(static classifier => classifier.Trim())
.OrderBy(static classifier => classifier, StringComparer.Ordinal)
.ToArray();
if (orderedClassifiers.Length > 0)
{
AppendMetadata(metadataEntries, "classifiers", string.Join(';', orderedClassifiers));
var licenseClassifierIndex = 0;
for (var index = 0; index < orderedClassifiers.Length; index++)
{
var classifier = orderedClassifiers[index];
AppendMetadata(metadataEntries, $"classifier[{index}]", classifier);
if (classifier.StartsWith("License ::", StringComparison.OrdinalIgnoreCase))
{
AppendMetadata(metadataEntries, $"license.classifier[{licenseClassifierIndex}]", classifier);
licenseClassifierIndex++;
}
}
}
}
var requiresDist = metadataDocument.GetAll("Requires-Dist");
@@ -125,6 +200,7 @@ internal static class PythonDistributionLoader
evidenceEntries.AddRange(verification.Evidence);
var usedByEntrypoint = verification.UsedByEntrypoint || EvaluateEntryPointUsage(context, distInfoPath, entryPoints);
AppendMetadata(metadataEntries, "provenance", "dist-info");
return new PythonDistribution(
trimmedName,
@@ -267,6 +343,24 @@ internal static class PythonDistributionLoader
return builder.ToString();
}
private static string ResolvePackageRoot(string distInfoPath)
{
var parent = Directory.GetParent(distInfoPath);
return parent?.FullName ?? distInfoPath;
}
private static string? TryResolvePackagePath(string basePath, string relativePath)
{
try
{
return Path.GetFullPath(Path.Combine(basePath, relativePath));
}
catch
{
return null;
}
}
private static async Task<string?> ReadSingleLineAsync(string path, CancellationToken cancellationToken)
{
if (!File.Exists(path))
@@ -769,6 +863,11 @@ internal static class PythonRecordVerifier
var entryPath = entry.Path.Replace('/', Path.DirectorySeparatorChar);
var fullPath = Path.GetFullPath(Path.Combine(parent, entryPath));
if (context.UsageHints.IsPathUsed(fullPath))
{
usedByEntrypoint = true;
}
if (!fullPath.StartsWith(root, StringComparison.Ordinal))
{
missing++;
@@ -793,11 +892,6 @@ internal static class PythonRecordVerifier
continue;
}
if (context.UsageHints.IsPathUsed(fullPath))
{
usedByEntrypoint = true;
}
if (string.IsNullOrWhiteSpace(entry.HashAlgorithm) || string.IsNullOrWhiteSpace(entry.HashValue))
{
continue;

View File

@@ -5,6 +5,6 @@
| 1 | SCANNER-ANALYZERS-LANG-10-303A | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-307 | STREAM-based parser for `*.dist-info` (`METADATA`, `WHEEL`, `entry_points.txt`) with normalization + evidence capture. | Parser handles CPython 3.83.12 metadata variations; fixtures confirm canonical ordering and UTF-8 handling. |
| 2 | SCANNER-ANALYZERS-LANG-10-303B | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-303A | RECORD hash verifier with chunked hashing, Zip64 support, and mismatch diagnostics. | Verifier processes 5GB RECORD fixture without allocations >2MB; mismatches produce deterministic evidence records. |
| 3 | SCANNER-ANALYZERS-LANG-10-303C | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-303B | Editable install + pip cache detection; integrate EntryTrace hints for runtime usage flags. | Editable installs resolved to source path; usage flags propagated; regression tests cover mixed editable + wheel installs. |
| 4 | SCANNER-ANALYZERS-LANG-10-307P | DOING (2025-10-23) | SCANNER-ANALYZERS-LANG-10-303C | Shared helper integration (license metadata, quiet provenance, component merging). | Shared helpers reused; analyzer-specific metadata minimal; deterministic merge tests pass. |
| 5 | SCANNER-ANALYZERS-LANG-10-308P | TODO | SCANNER-ANALYZERS-LANG-10-307P | Golden fixtures + determinism harness for Python analyzer; add benchmark and hash throughput reporting. | Fixtures under `Fixtures/lang/python/`; determinism CI guard; benchmark CSV added with threshold alerts. |
| 6 | SCANNER-ANALYZERS-LANG-10-309P | TODO | SCANNER-ANALYZERS-LANG-10-308P | Package plug-in (manifest, DI registration) and document Offline Kit bundling of Python stdlib metadata if needed. | Manifest copied to `plugins/scanner/analyzers/lang/`; Worker loads analyzer; Offline Kit doc updated. |
| 4 | SCANNER-ANALYZERS-LANG-10-307P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-303C | Shared helper integration (license metadata, quiet provenance, component merging). | Shared helpers reused; analyzer-specific metadata minimal; deterministic merge tests pass. |
| 5 | SCANNER-ANALYZERS-LANG-10-308P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-307P | Golden fixtures + determinism harness for Python analyzer; add benchmark and hash throughput reporting. | Fixtures under `Fixtures/lang/python/`; determinism CI guard; benchmark CSV added with threshold alerts. |
| 6 | SCANNER-ANALYZERS-LANG-10-309P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-308P | Package plug-in (manifest, DI registration) and document Offline Kit bundling of Python stdlib metadata if needed. | Manifest copied to `plugins/scanner/analyzers/lang/`; Worker loads analyzer; Offline Kit doc updated. |