feat(rust): Implement RustCargoLockParser and RustFingerprintScanner

- Added RustCargoLockParser to parse Cargo.lock files and extract package information.
- Introduced RustFingerprintScanner to scan for Rust fingerprint records in JSON files.
- Created test fixtures for Rust language analysis, including Cargo.lock and fingerprint JSON files.
- Developed tests for RustLanguageAnalyzer to ensure deterministic output based on provided fixtures.
- Added expected output files for both simple and signed Rust applications.
This commit is contained in:
master
2025-10-22 18:11:01 +03:00
parent c377229931
commit 35c5614eb7
66 changed files with 4200 additions and 217 deletions

View File

@@ -0,0 +1,650 @@
using System.Collections.Immutable;
using System.Linq;
using System.Security.Cryptography;
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
internal static class RustAnalyzerCollector
{
public static RustAnalyzerCollection Collect(LanguageAnalyzerContext context, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
var collector = new Collector(context);
collector.Execute(cancellationToken);
return collector.Build();
}
private sealed class Collector
{
private static readonly EnumerationOptions LockEnumeration = new()
{
MatchCasing = MatchCasing.CaseSensitive,
IgnoreInaccessible = true,
RecurseSubdirectories = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint,
};
private readonly LanguageAnalyzerContext _context;
private readonly Dictionary<RustCrateKey, RustCrateBuilder> _crates = new();
private readonly Dictionary<string, List<RustCrateBuilder>> _cratesByName = new(StringComparer.Ordinal);
private readonly Dictionary<string, RustHeuristicBuilder> _heuristics = new(StringComparer.Ordinal);
private readonly Dictionary<string, RustBinaryRecord> _binaries = new(StringComparer.Ordinal);
public Collector(LanguageAnalyzerContext context)
{
_context = context;
}
public void Execute(CancellationToken cancellationToken)
{
CollectCargoLocks(cancellationToken);
CollectFingerprints(cancellationToken);
CollectBinaries(cancellationToken);
}
public RustAnalyzerCollection Build()
{
var crateRecords = _crates.Values
.Select(static builder => builder.Build())
.OrderBy(static record => record.ComponentKey, StringComparer.Ordinal)
.ToImmutableArray();
var heuristicRecords = _heuristics.Values
.Select(static builder => builder.Build())
.OrderBy(static record => record.ComponentKey, StringComparer.Ordinal)
.ToImmutableArray();
var fallbackRecords = _binaries.Values
.Where(static record => !record.HasMatches)
.Select(BuildFallback)
.OrderBy(static record => record.ComponentKey, StringComparer.Ordinal)
.ToImmutableArray();
return new RustAnalyzerCollection(crateRecords, heuristicRecords, fallbackRecords);
}
private void CollectCargoLocks(CancellationToken cancellationToken)
{
foreach (var lockPath in Directory.EnumerateFiles(_context.RootPath, "Cargo.lock", LockEnumeration))
{
cancellationToken.ThrowIfCancellationRequested();
var packages = RustCargoLockParser.Parse(lockPath, cancellationToken);
if (packages.Count == 0)
{
continue;
}
var relativePath = NormalizeRelative(_context.GetRelativePath(lockPath));
foreach (var package in packages)
{
var builder = GetOrCreateCrate(package.Name, package.Version);
builder.ApplyCargoPackage(package, relativePath);
}
}
}
private void CollectFingerprints(CancellationToken cancellationToken)
{
var records = RustFingerprintScanner.Scan(_context.RootPath, cancellationToken);
foreach (var record in records)
{
cancellationToken.ThrowIfCancellationRequested();
var builder = GetOrCreateCrate(record.Name, record.Version);
var relative = NormalizeRelative(_context.GetRelativePath(record.AbsolutePath));
builder.ApplyFingerprint(record, relative);
}
}
private void CollectBinaries(CancellationToken cancellationToken)
{
var binaries = RustBinaryClassifier.Scan(_context.RootPath, cancellationToken);
foreach (var binary in binaries)
{
cancellationToken.ThrowIfCancellationRequested();
var relative = NormalizeRelative(_context.GetRelativePath(binary.AbsolutePath));
var usage = _context.UsageHints.IsPathUsed(binary.AbsolutePath);
var hash = binary.ComputeSha256();
if (!_binaries.TryGetValue(relative, out var record))
{
record = new RustBinaryRecord(binary.AbsolutePath, relative, usage, hash);
_binaries[relative] = record;
}
else
{
record.MergeUsage(usage);
record.EnsureHash(hash);
}
if (binary.CrateCandidates.IsDefaultOrEmpty || binary.CrateCandidates.Length == 0)
{
continue;
}
foreach (var candidate in binary.CrateCandidates)
{
if (string.IsNullOrWhiteSpace(candidate))
{
continue;
}
var crateBuilder = FindCrateByName(candidate);
if (crateBuilder is not null)
{
crateBuilder.AddBinaryEvidence(relative, record.Hash, usage);
record.MarkCrateMatch();
continue;
}
var heuristic = GetOrCreateHeuristic(candidate);
heuristic.AddBinary(relative, record.Hash, usage);
record.MarkHeuristicMatch();
}
}
}
private RustCrateBuilder GetOrCreateCrate(string name, string? version)
{
var key = new RustCrateKey(name, version);
if (_crates.TryGetValue(key, out var existing))
{
existing.EnsureVersion(version);
return existing;
}
var builder = new RustCrateBuilder(name, version);
_crates[key] = builder;
if (!_cratesByName.TryGetValue(builder.Name, out var list))
{
list = new List<RustCrateBuilder>();
_cratesByName[builder.Name] = list;
}
list.Add(builder);
return builder;
}
private RustCrateBuilder? FindCrateByName(string candidate)
{
var normalized = RustCrateBuilder.NormalizeName(candidate);
if (!_cratesByName.TryGetValue(normalized, out var builders) || builders.Count == 0)
{
return null;
}
return builders
.OrderBy(static builder => builder.Version ?? string.Empty, StringComparer.Ordinal)
.FirstOrDefault();
}
private RustHeuristicBuilder GetOrCreateHeuristic(string crateName)
{
var normalized = RustCrateBuilder.NormalizeName(crateName);
if (_heuristics.TryGetValue(normalized, out var existing))
{
return existing;
}
var builder = new RustHeuristicBuilder(normalized);
_heuristics[normalized] = builder;
return builder;
}
private RustComponentRecord BuildFallback(RustBinaryRecord record)
{
var metadata = new List<KeyValuePair<string, string?>>
{
new("binary.path", record.RelativePath),
new("provenance", "binary"),
};
if (!string.IsNullOrEmpty(record.Hash))
{
metadata.Add(new KeyValuePair<string, string?>("binary.sha256", record.Hash));
}
metadata.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));
var evidence = new List<LanguageComponentEvidence>
{
new(
LanguageEvidenceKind.File,
"binary",
record.RelativePath,
null,
string.IsNullOrEmpty(record.Hash) ? null : record.Hash)
};
var componentName = Path.GetFileName(record.RelativePath);
if (string.IsNullOrWhiteSpace(componentName))
{
componentName = "binary";
}
var key = string.IsNullOrEmpty(record.Hash)
? $"bin::{record.RelativePath}"
: $"bin::sha256:{record.Hash}";
return new RustComponentRecord(
Name: componentName,
Version: null,
Type: "bin",
Purl: null,
ComponentKey: key,
Metadata: metadata,
Evidence: evidence,
UsedByEntrypoint: record.UsedByEntrypoint);
}
private static string NormalizeRelative(string relativePath)
{
if (string.IsNullOrWhiteSpace(relativePath) || relativePath == ".")
{
return ".";
}
return relativePath.Replace('\\', '/');
}
}
}
internal sealed record RustAnalyzerCollection(
ImmutableArray<RustComponentRecord> Crates,
ImmutableArray<RustComponentRecord> Heuristics,
ImmutableArray<RustComponentRecord> Fallbacks);
internal sealed record RustComponentRecord(
string Name,
string? Version,
string Type,
string? Purl,
string ComponentKey,
IReadOnlyList<KeyValuePair<string, string?>> Metadata,
IReadOnlyCollection<LanguageComponentEvidence> Evidence,
bool UsedByEntrypoint);
internal sealed class RustCrateBuilder
{
private readonly SortedDictionary<string, string?> _metadata = new(StringComparer.Ordinal);
private readonly HashSet<LanguageComponentEvidence> _evidence = new(new LanguageComponentEvidenceComparer());
private readonly SortedSet<string> _binaryPaths = new(StringComparer.Ordinal);
private readonly SortedSet<string> _binaryHashes = new(StringComparer.Ordinal);
private string? _version;
private string? _source;
private string? _checksum;
private bool _usedByEntrypoint;
public RustCrateBuilder(string name, string? version)
{
Name = NormalizeName(name);
EnsureVersion(version);
}
public string Name { get; }
public string? Version => _version;
public static string NormalizeName(string value)
{
if (string.IsNullOrWhiteSpace(value))
{
return string.Empty;
}
return value.Trim();
}
public void EnsureVersion(string? version)
{
if (string.IsNullOrWhiteSpace(version))
{
return;
}
_version ??= version.Trim();
}
public void ApplyCargoPackage(RustCargoPackage package, string relativePath)
{
EnsureVersion(package.Version);
if (!string.IsNullOrWhiteSpace(package.Source))
{
_source ??= package.Source.Trim();
_metadata["source"] = _source;
}
if (!string.IsNullOrWhiteSpace(package.Checksum))
{
_checksum ??= package.Checksum.Trim();
_metadata["checksum"] = _checksum;
}
_metadata["cargo.lock.path"] = relativePath;
_evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"cargo.lock",
relativePath,
$"{package.Name} {package.Version}",
string.IsNullOrWhiteSpace(package.Checksum) ? null : package.Checksum));
}
public void ApplyFingerprint(RustFingerprintRecord record, string relativePath)
{
EnsureVersion(record.Version);
if (!string.IsNullOrWhiteSpace(record.Source))
{
_source ??= record.Source.Trim();
_metadata["source"] = _source;
}
AddMetadataIfEmpty("fingerprint.profile", record.Profile);
AddMetadataIfEmpty("fingerprint.targetKind", record.TargetKind);
_evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"cargo.fingerprint",
relativePath,
record.TargetKind ?? record.Profile ?? "fingerprint",
null));
}
public void AddBinaryEvidence(string relativePath, string? hash, bool usedByEntrypoint)
{
if (!string.IsNullOrWhiteSpace(relativePath))
{
_binaryPaths.Add(relativePath);
}
if (!string.IsNullOrWhiteSpace(hash))
{
_binaryHashes.Add(hash);
}
if (usedByEntrypoint)
{
_usedByEntrypoint = true;
}
if (!string.IsNullOrWhiteSpace(relativePath))
{
_evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"binary",
relativePath,
null,
string.IsNullOrWhiteSpace(hash) ? null : hash));
}
}
public RustComponentRecord Build()
{
if (_binaryPaths.Count > 0)
{
_metadata["binary.paths"] = string.Join(';', _binaryPaths);
}
if (_binaryHashes.Count > 0)
{
_metadata["binary.sha256"] = string.Join(';', _binaryHashes);
}
var metadata = _metadata
.Select(static pair => new KeyValuePair<string, string?>(pair.Key, pair.Value))
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
.ToList();
var evidence = _evidence
.OrderBy(static item => item.ComparisonKey, StringComparer.Ordinal)
.ToImmutableArray();
var purl = BuildPurl(Name, _version);
var componentKey = string.IsNullOrEmpty(purl)
? $"cargo::{Name}::{_version ?? "unknown"}"
: $"purl::{purl}";
return new RustComponentRecord(
Name: Name,
Version: _version,
Type: "cargo",
Purl: purl,
ComponentKey: componentKey,
Metadata: metadata,
Evidence: evidence,
UsedByEntrypoint: _usedByEntrypoint);
}
private void AddMetadataIfEmpty(string key, string? value)
{
if (string.IsNullOrWhiteSpace(key) || string.IsNullOrWhiteSpace(value))
{
return;
}
if (_metadata.ContainsKey(key))
{
return;
}
_metadata[key] = value.Trim();
}
private static string? BuildPurl(string name, string? version)
{
if (string.IsNullOrWhiteSpace(name))
{
return null;
}
var escapedName = Uri.EscapeDataString(name.Trim());
if (string.IsNullOrWhiteSpace(version))
{
return $"pkg:cargo/{escapedName}";
}
var escapedVersion = Uri.EscapeDataString(version.Trim());
return $"pkg:cargo/{escapedName}@{escapedVersion}";
}
}
internal sealed class RustHeuristicBuilder
{
private readonly HashSet<LanguageComponentEvidence> _evidence = new(new LanguageComponentEvidenceComparer());
private readonly SortedSet<string> _binaryPaths = new(StringComparer.Ordinal);
private readonly SortedSet<string> _binaryHashes = new(StringComparer.Ordinal);
private bool _usedByEntrypoint;
public RustHeuristicBuilder(string crateName)
{
CrateName = RustCrateBuilder.NormalizeName(crateName);
}
public string CrateName { get; }
public void AddBinary(string relativePath, string? hash, bool usedByEntrypoint)
{
if (!string.IsNullOrWhiteSpace(relativePath))
{
_binaryPaths.Add(relativePath);
}
if (!string.IsNullOrWhiteSpace(hash))
{
_binaryHashes.Add(hash);
}
if (usedByEntrypoint)
{
_usedByEntrypoint = true;
}
if (!string.IsNullOrWhiteSpace(relativePath))
{
_evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Derived,
"rust.heuristic",
relativePath,
CrateName,
string.IsNullOrWhiteSpace(hash) ? null : hash));
}
}
public RustComponentRecord Build()
{
var metadata = new List<KeyValuePair<string, string?>>
{
new("crate", CrateName),
new("provenance", "heuristic"),
new("binary.paths", string.Join(';', _binaryPaths)),
};
if (_binaryHashes.Count > 0)
{
metadata.Add(new KeyValuePair<string, string?>("binary.sha256", string.Join(';', _binaryHashes)));
}
metadata.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));
var evidence = _evidence
.OrderBy(static item => item.ComparisonKey, StringComparer.Ordinal)
.ToImmutableArray();
var suffix = string.Join("|", _binaryPaths);
var componentKey = $"rust::heuristic::{CrateName}::{suffix}";
return new RustComponentRecord(
Name: CrateName,
Version: null,
Type: "cargo",
Purl: null,
ComponentKey: componentKey,
Metadata: metadata,
Evidence: evidence,
UsedByEntrypoint: _usedByEntrypoint);
}
}
internal sealed class RustBinaryRecord
{
private string? _hash;
public RustBinaryRecord(string absolutePath, string relativePath, bool usedByEntrypoint, string? hash)
{
AbsolutePath = absolutePath ?? throw new ArgumentNullException(nameof(absolutePath));
RelativePath = string.IsNullOrWhiteSpace(relativePath) ? "." : relativePath;
UsedByEntrypoint = usedByEntrypoint;
_hash = string.IsNullOrWhiteSpace(hash) ? null : hash;
}
public string AbsolutePath { get; }
public string RelativePath { get; }
public bool UsedByEntrypoint { get; private set; }
public bool HasMatches => HasCrateMatch || HasHeuristicMatch;
public bool HasCrateMatch { get; private set; }
public bool HasHeuristicMatch { get; private set; }
public string? Hash => _hash;
public void MarkCrateMatch() => HasCrateMatch = true;
public void MarkHeuristicMatch() => HasHeuristicMatch = true;
public void MergeUsage(bool used)
{
if (used)
{
UsedByEntrypoint = true;
}
}
public void EnsureHash(string? hash)
{
if (!string.IsNullOrWhiteSpace(hash))
{
_hash ??= hash;
}
if (_hash is null)
{
_hash = ComputeHashSafely();
}
}
private string? ComputeHashSafely()
{
try
{
using var stream = new FileStream(AbsolutePath, FileMode.Open, FileAccess.Read, FileShare.Read);
using var sha = SHA256.Create();
var hash = sha.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
catch (IOException)
{
return null;
}
catch (UnauthorizedAccessException)
{
return null;
}
}
}
internal readonly record struct RustCrateKey
{
public RustCrateKey(string name, string? version)
{
Name = RustCrateBuilder.NormalizeName(name);
Version = string.IsNullOrWhiteSpace(version) ? null : version.Trim();
}
public string Name { get; }
public string? Version { get; }
}
internal sealed class LanguageComponentEvidenceComparer : IEqualityComparer<LanguageComponentEvidence>
{
public bool Equals(LanguageComponentEvidence? x, LanguageComponentEvidence? y)
{
if (ReferenceEquals(x, y))
{
return true;
}
if (x is null || y is null)
{
return false;
}
return x.Kind == y.Kind &&
string.Equals(x.Source, y.Source, StringComparison.Ordinal) &&
string.Equals(x.Locator, y.Locator, StringComparison.Ordinal) &&
string.Equals(x.Value, y.Value, StringComparison.Ordinal) &&
string.Equals(x.Sha256, y.Sha256, StringComparison.Ordinal);
}
public int GetHashCode(LanguageComponentEvidence obj)
{
var hash = new HashCode();
hash.Add(obj.Kind);
hash.Add(obj.Source, StringComparer.Ordinal);
hash.Add(obj.Locator, StringComparer.Ordinal);
hash.Add(obj.Value, StringComparer.Ordinal);
hash.Add(obj.Sha256, StringComparer.Ordinal);
return hash.ToHashCode();
}
}

View File

@@ -0,0 +1,250 @@
using System.Buffers;
using System.Collections.Immutable;
using System.Linq;
using System.Security.Cryptography;
using System.Text;
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
internal static class RustBinaryClassifier
{
private static readonly ReadOnlyMemory<byte> ElfMagic = new byte[] { 0x7F, (byte)'E', (byte)'L', (byte)'F' };
private static readonly ReadOnlyMemory<byte> SymbolPrefix = new byte[] { (byte)'_', (byte)'Z', (byte)'N' };
private const int ChunkSize = 64 * 1024;
private const int OverlapSize = 48;
private const long MaxBinarySize = 128L * 1024L * 1024L;
private static readonly HashSet<string> StandardCrates = new(StringComparer.Ordinal)
{
"core",
"alloc",
"std",
"panic_unwind",
"panic_abort",
};
private static readonly EnumerationOptions Enumeration = new()
{
MatchCasing = MatchCasing.CaseSensitive,
IgnoreInaccessible = true,
RecurseSubdirectories = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint,
};
public static IReadOnlyList<RustBinaryInfo> Scan(string rootPath, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(rootPath))
{
throw new ArgumentException("Root path is required", nameof(rootPath));
}
var binaries = new List<RustBinaryInfo>();
foreach (var path in Directory.EnumerateFiles(rootPath, "*", Enumeration))
{
cancellationToken.ThrowIfCancellationRequested();
if (!IsEligibleBinary(path))
{
continue;
}
var candidates = ExtractCrateNames(path, cancellationToken);
binaries.Add(new RustBinaryInfo(path, candidates));
}
return binaries;
}
private static bool IsEligibleBinary(string path)
{
try
{
var info = new FileInfo(path);
if (!info.Exists || info.Length == 0 || info.Length > MaxBinarySize)
{
return false;
}
using var stream = info.OpenRead();
Span<byte> buffer = stackalloc byte[4];
var read = stream.Read(buffer);
if (read != 4)
{
return false;
}
return buffer.SequenceEqual(ElfMagic.Span);
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
}
private static ImmutableArray<string> ExtractCrateNames(string path, CancellationToken cancellationToken)
{
var names = new HashSet<string>(StringComparer.Ordinal);
var buffer = ArrayPool<byte>.Shared.Rent(ChunkSize + OverlapSize);
var overlap = new byte[OverlapSize];
var overlapLength = 0;
try
{
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
while (true)
{
cancellationToken.ThrowIfCancellationRequested();
// Copy previous overlap to buffer prefix.
if (overlapLength > 0)
{
Array.Copy(overlap, 0, buffer, 0, overlapLength);
}
var read = stream.Read(buffer, overlapLength, ChunkSize);
if (read <= 0)
{
break;
}
var span = new ReadOnlySpan<byte>(buffer, 0, overlapLength + read);
ScanForSymbols(span, names);
overlapLength = Math.Min(OverlapSize, span.Length);
if (overlapLength > 0)
{
span[^overlapLength..].CopyTo(overlap);
}
}
}
catch (IOException)
{
return ImmutableArray<string>.Empty;
}
catch (UnauthorizedAccessException)
{
return ImmutableArray<string>.Empty;
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
}
if (names.Count == 0)
{
return ImmutableArray<string>.Empty;
}
var ordered = names
.Where(static name => !string.IsNullOrWhiteSpace(name))
.Select(static name => name.Trim())
.Where(static name => name.Length > 1)
.Where(name => !StandardCrates.Contains(name))
.Distinct(StringComparer.Ordinal)
.OrderBy(static name => name, StringComparer.Ordinal)
.ToImmutableArray();
return ordered;
}
private static void ScanForSymbols(ReadOnlySpan<byte> span, HashSet<string> names)
{
var prefix = SymbolPrefix.Span;
var index = 0;
while (index < span.Length)
{
var slice = span[index..];
var offset = slice.IndexOf(prefix);
if (offset < 0)
{
break;
}
index += offset + prefix.Length;
if (index >= span.Length)
{
break;
}
var remaining = span[index..];
if (!TryParseCrate(remaining, out var crate, out var consumed))
{
index += 1;
continue;
}
if (!string.IsNullOrWhiteSpace(crate))
{
names.Add(crate);
}
index += Math.Max(consumed, 1);
}
}
private static bool TryParseCrate(ReadOnlySpan<byte> span, out string? crate, out int consumed)
{
crate = null;
consumed = 0;
var i = 0;
var length = 0;
while (i < span.Length && span[i] is >= (byte)'0' and <= (byte)'9')
{
length = (length * 10) + (span[i] - (byte)'0');
i++;
if (length > 256)
{
return false;
}
}
if (i == 0 || length <= 0 || i + length > span.Length)
{
return false;
}
crate = Encoding.ASCII.GetString(span.Slice(i, length));
consumed = i + length;
return true;
}
}
internal sealed record RustBinaryInfo(string AbsolutePath, ImmutableArray<string> CrateCandidates)
{
private string? _sha256;
public string ComputeSha256()
{
if (_sha256 is not null)
{
return _sha256;
}
try
{
using var stream = new FileStream(AbsolutePath, FileMode.Open, FileAccess.Read, FileShare.Read);
using var sha = SHA256.Create();
var hash = sha.ComputeHash(stream);
_sha256 = Convert.ToHexString(hash).ToLowerInvariant();
}
catch (IOException)
{
_sha256 = string.Empty;
}
catch (UnauthorizedAccessException)
{
_sha256 = string.Empty;
}
return _sha256 ?? string.Empty;
}
}

View File

@@ -0,0 +1,298 @@
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
internal static class RustCargoLockParser
{
public static IReadOnlyList<RustCargoPackage> Parse(string path, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(path))
{
throw new ArgumentException("Lock path is required", nameof(path));
}
var info = new FileInfo(path);
if (!info.Exists)
{
return Array.Empty<RustCargoPackage>();
}
var packages = new List<RustCargoPackage>();
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var reader = new StreamReader(stream);
RustCargoPackageBuilder? builder = null;
string? currentArrayKey = null;
var arrayValues = new List<string>();
while (!reader.EndOfStream)
{
cancellationToken.ThrowIfCancellationRequested();
var line = reader.ReadLine();
if (line is null)
{
break;
}
var trimmed = TrimComments(line.AsSpan());
if (trimmed.Length == 0)
{
continue;
}
if (IsPackageHeader(trimmed))
{
FlushCurrent(builder, packages);
builder = new RustCargoPackageBuilder();
currentArrayKey = null;
arrayValues.Clear();
continue;
}
if (builder is null)
{
continue;
}
if (currentArrayKey is not null)
{
if (trimmed[0] == ']')
{
builder.SetArray(currentArrayKey, arrayValues);
currentArrayKey = null;
arrayValues.Clear();
continue;
}
var value = ExtractString(trimmed);
if (!string.IsNullOrEmpty(value))
{
arrayValues.Add(value);
}
continue;
}
if (trimmed[0] == '[')
{
// Entering a new table; finish any pending package and skip section.
FlushCurrent(builder, packages);
builder = null;
continue;
}
var equalsIndex = trimmed.IndexOf('=');
if (equalsIndex < 0)
{
continue;
}
var key = trimmed[..equalsIndex].Trim();
var valuePart = trimmed[(equalsIndex + 1)..].Trim();
if (valuePart.Length == 0)
{
continue;
}
if (valuePart[0] == '[')
{
currentArrayKey = key.ToString();
arrayValues.Clear();
if (valuePart.Length > 1 && valuePart[^1] == ']')
{
var inline = valuePart[1..^1].Trim();
if (inline.Length > 0)
{
foreach (var token in SplitInlineArray(inline.ToString()))
{
var parsedValue = ExtractString(token.AsSpan());
if (!string.IsNullOrEmpty(parsedValue))
{
arrayValues.Add(parsedValue);
}
}
}
builder.SetArray(currentArrayKey, arrayValues);
currentArrayKey = null;
arrayValues.Clear();
}
continue;
}
var parsed = ExtractString(valuePart);
if (parsed is not null)
{
builder.SetField(key, parsed);
}
}
if (currentArrayKey is not null && arrayValues.Count > 0)
{
builder?.SetArray(currentArrayKey, arrayValues);
}
FlushCurrent(builder, packages);
return packages;
}
private static ReadOnlySpan<char> TrimComments(ReadOnlySpan<char> line)
{
var index = line.IndexOf('#');
if (index >= 0)
{
line = line[..index];
}
return line.Trim();
}
private static bool IsPackageHeader(ReadOnlySpan<char> value)
=> value.SequenceEqual("[[package]]".AsSpan());
private static IEnumerable<string> SplitInlineArray(string value)
{
var start = 0;
var inString = false;
for (var i = 0; i < value.Length; i++)
{
var current = value[i];
if (current == '"')
{
inString = !inString;
}
if (current == ',' && !inString)
{
var item = value.AsSpan(start, i - start).Trim();
if (item.Length > 0)
{
yield return item.ToString();
}
start = i + 1;
}
}
if (start < value.Length)
{
var item = value.AsSpan(start).Trim();
if (item.Length > 0)
{
yield return item.ToString();
}
}
}
private static string? ExtractString(ReadOnlySpan<char> value)
{
if (value.Length == 0)
{
return null;
}
if (value[0] == '"' && value[^1] == '"')
{
var inner = value[1..^1];
return inner.ToString();
}
var trimmed = value.Trim();
return trimmed.Length == 0 ? null : trimmed.ToString();
}
private static void FlushCurrent(RustCargoPackageBuilder? builder, List<RustCargoPackage> packages)
{
if (builder is null || !builder.HasData)
{
return;
}
if (builder.TryBuild(out var package))
{
packages.Add(package);
}
}
private sealed class RustCargoPackageBuilder
{
private readonly SortedSet<string> _dependencies = new(StringComparer.Ordinal);
private string? _name;
private string? _version;
private string? _source;
private string? _checksum;
public bool HasData => !string.IsNullOrWhiteSpace(_name);
public void SetField(ReadOnlySpan<char> key, string value)
{
if (key.SequenceEqual("name".AsSpan()))
{
_name ??= value.Trim();
}
else if (key.SequenceEqual("version".AsSpan()))
{
_version ??= value.Trim();
}
else if (key.SequenceEqual("source".AsSpan()))
{
_source ??= value.Trim();
}
else if (key.SequenceEqual("checksum".AsSpan()))
{
_checksum ??= value.Trim();
}
}
public void SetArray(string key, IEnumerable<string> values)
{
if (!string.Equals(key, "dependencies", StringComparison.Ordinal))
{
return;
}
foreach (var entry in values)
{
if (string.IsNullOrWhiteSpace(entry))
{
continue;
}
var normalized = entry.Trim();
if (normalized.Length > 0)
{
_dependencies.Add(normalized);
}
}
}
public bool TryBuild(out RustCargoPackage package)
{
if (string.IsNullOrWhiteSpace(_name))
{
package = null!;
return false;
}
package = new RustCargoPackage(
_name!,
_version ?? string.Empty,
_source,
_checksum,
_dependencies.ToArray());
return true;
}
}
}
internal sealed record RustCargoPackage(
string Name,
string Version,
string? Source,
string? Checksum,
IReadOnlyList<string> Dependencies);

View File

@@ -0,0 +1,178 @@
using System.Text.Json;
namespace StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
internal static class RustFingerprintScanner
{
private static readonly EnumerationOptions Enumeration = new()
{
MatchCasing = MatchCasing.CaseSensitive,
IgnoreInaccessible = true,
RecurseSubdirectories = true,
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint,
};
private static readonly string FingerprintSegment = $"{Path.DirectorySeparatorChar}.fingerprint{Path.DirectorySeparatorChar}";
public static IReadOnlyList<RustFingerprintRecord> Scan(string rootPath, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(rootPath))
{
throw new ArgumentException("Root path is required", nameof(rootPath));
}
var results = new List<RustFingerprintRecord>();
foreach (var path in Directory.EnumerateFiles(rootPath, "*.json", Enumeration))
{
cancellationToken.ThrowIfCancellationRequested();
if (!path.Contains(FingerprintSegment, StringComparison.Ordinal))
{
continue;
}
if (TryParse(path, out var record))
{
results.Add(record);
}
}
return results;
}
private static bool TryParse(string path, out RustFingerprintRecord record)
{
record = default!;
try
{
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var document = JsonDocument.Parse(stream);
var root = document.RootElement;
var pkgId = TryGetString(root, "pkgid")
?? TryGetString(root, "package_id")
?? TryGetString(root, "packageId");
var (name, version, source) = ParseIdentity(pkgId, path);
if (string.IsNullOrWhiteSpace(name))
{
return false;
}
var profile = TryGetString(root, "profile");
var targetKind = TryGetKind(root);
record = new RustFingerprintRecord(
Name: name!,
Version: version,
Source: source,
TargetKind: targetKind,
Profile: profile,
AbsolutePath: path);
return true;
}
catch (JsonException)
{
return false;
}
catch (IOException)
{
return false;
}
catch (UnauthorizedAccessException)
{
return false;
}
}
private static (string? Name, string? Version, string? Source) ParseIdentity(string? pkgId, string filePath)
{
if (!string.IsNullOrWhiteSpace(pkgId))
{
var span = pkgId.AsSpan().Trim();
var firstSpace = span.IndexOf(' ');
if (firstSpace > 0 && firstSpace < span.Length - 1)
{
var name = span[..firstSpace].ToString();
var remaining = span[(firstSpace + 1)..].Trim();
var secondSpace = remaining.IndexOf(' ');
if (secondSpace < 0)
{
return (name, remaining.ToString(), null);
}
var version = remaining[..secondSpace].ToString();
var potentialSource = remaining[(secondSpace + 1)..].Trim();
if (potentialSource.Length > 1 && potentialSource[0] == '(' && potentialSource[^1] == ')')
{
potentialSource = potentialSource[1..^1].Trim();
}
var source = potentialSource.Length == 0 ? null : potentialSource.ToString();
return (name, version, source);
}
}
var directory = Path.GetDirectoryName(filePath);
if (string.IsNullOrEmpty(directory))
{
return (null, null, null);
}
var crateDirectory = Path.GetFileName(directory);
if (string.IsNullOrWhiteSpace(crateDirectory))
{
return (null, null, null);
}
var dashIndex = crateDirectory.LastIndexOf('-');
if (dashIndex <= 0)
{
return (crateDirectory, null, null);
}
var maybeName = crateDirectory[..dashIndex];
return (maybeName, null, null);
}
private static string? TryGetKind(JsonElement root)
{
if (root.TryGetProperty("target_kind", out var array) && array.ValueKind == JsonValueKind.Array && array.GetArrayLength() > 0)
{
var first = array[0];
if (first.ValueKind == JsonValueKind.String)
{
return first.GetString();
}
}
if (root.TryGetProperty("target", out var target) && target.ValueKind == JsonValueKind.String)
{
return target.GetString();
}
return null;
}
private static string? TryGetString(JsonElement element, string propertyName)
{
if (element.TryGetProperty(propertyName, out var value) && value.ValueKind == JsonValueKind.String)
{
return value.GetString();
}
return null;
}
}
internal sealed record RustFingerprintRecord(
string Name,
string? Version,
string? Source,
string? TargetKind,
string? Profile,
string AbsolutePath);

View File

@@ -1,6 +0,0 @@
namespace StellaOps.Scanner.Analyzers.Lang.Rust;
internal static class Placeholder
{
// Analyzer implementation will be added during Sprint LA5.
}

View File

@@ -7,7 +7,7 @@ public sealed class RustAnalyzerPlugin : ILanguageAnalyzerPlugin
{
public string Name => "StellaOps.Scanner.Analyzers.Lang.Rust";
public bool IsAvailable(IServiceProvider services) => false;
public bool IsAvailable(IServiceProvider services) => services is not null;
public ILanguageAnalyzer CreateAnalyzer(IServiceProvider services)
{

View File

@@ -1,6 +1,7 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using StellaOps.Scanner.Analyzers.Lang.Rust.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Rust;
@@ -11,5 +12,55 @@ public sealed class RustLanguageAnalyzer : ILanguageAnalyzer
public string DisplayName => "Rust Analyzer (preview)";
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
=> ValueTask.FromException(new NotImplementedException("Rust analyzer implementation pending Sprint LA5."));
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
cancellationToken.ThrowIfCancellationRequested();
var collection = RustAnalyzerCollector.Collect(context, cancellationToken);
EmitRecords(Id, writer, collection.Crates);
EmitRecords(Id, writer, collection.Heuristics);
EmitRecords(Id, writer, collection.Fallbacks);
return ValueTask.CompletedTask;
}
private static void EmitRecords(string analyzerId, LanguageComponentWriter writer, IReadOnlyList<RustComponentRecord> records)
{
foreach (var record in records)
{
if (record is null)
{
continue;
}
if (!string.IsNullOrEmpty(record.Purl))
{
writer.AddFromPurl(
analyzerId: analyzerId,
purl: record.Purl!,
name: record.Name,
version: record.Version,
type: record.Type,
metadata: record.Metadata,
evidence: record.Evidence,
usedByEntrypoint: record.UsedByEntrypoint);
}
else
{
writer.AddFromExplicitKey(
analyzerId: analyzerId,
componentKey: record.ComponentKey,
purl: null,
name: record.Name,
version: record.Version,
type: record.Type,
metadata: record.Metadata,
evidence: record.Evidence,
usedByEntrypoint: record.UsedByEntrypoint);
}
}
}
}

View File

@@ -2,9 +2,9 @@
| Seq | ID | Status | Depends on | Description | Exit Criteria |
|-----|----|--------|------------|-------------|---------------|
| 1 | SCANNER-ANALYZERS-LANG-10-306A | TODO | SCANNER-ANALYZERS-LANG-10-307 | Parse Cargo metadata (`Cargo.lock`, `.fingerprint`, `.metadata`) and map crates to components with evidence. | Fixtures confirm crate attribution ≥85% coverage; metadata normalized; evidence includes path + hash. |
| 2 | SCANNER-ANALYZERS-LANG-10-306B | TODO | SCANNER-ANALYZERS-LANG-10-306A | Implement heuristic classifier using ELF section names, symbol mangling, and `.comment` data for stripped binaries. | Heuristic output flagged as `heuristic`; regression tests ensure no false “observed” classifications. |
| 3 | SCANNER-ANALYZERS-LANG-10-306C | TODO | SCANNER-ANALYZERS-LANG-10-306B | Integrate binary hash fallback (`bin:{sha256}`) and tie into shared quiet provenance helpers. | Fallback path deterministic; shared helpers reused; tests verify consistent hashing. |
| 1 | SCANNER-ANALYZERS-LANG-10-306A | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-307 | Parse Cargo metadata (`Cargo.lock`, `.fingerprint`, `.metadata`) and map crates to components with evidence. | Fixtures confirm crate attribution ≥85% coverage; metadata normalized; evidence includes path + hash. |
| 2 | SCANNER-ANALYZERS-LANG-10-306B | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-306A | Implement heuristic classifier using ELF section names, symbol mangling, and `.comment` data for stripped binaries. | Heuristic output flagged as `heuristic`; regression tests ensure no false “observed” classifications. |
| 3 | SCANNER-ANALYZERS-LANG-10-306C | DONE (2025-10-22) | SCANNER-ANALYZERS-LANG-10-306B | Integrate binary hash fallback (`bin:{sha256}`) and tie into shared quiet provenance helpers. | Fallback path deterministic; shared helpers reused; tests verify consistent hashing. |
| 4 | SCANNER-ANALYZERS-LANG-10-307R | TODO | SCANNER-ANALYZERS-LANG-10-306C | Finalize shared helper usage (license, usage flags) and concurrency-safe caches. | Analyzer uses shared utilities; concurrency tests pass; no race conditions. |
| 5 | SCANNER-ANALYZERS-LANG-10-308R | TODO | SCANNER-ANALYZERS-LANG-10-307R | Determinism fixtures + performance benchmarks; compare against competitor heuristic coverage. | Fixtures `Fixtures/lang/rust/` committed; determinism guard; benchmark shows ≥15% better coverage vs competitor. |
| 6 | SCANNER-ANALYZERS-LANG-10-309R | TODO | SCANNER-ANALYZERS-LANG-10-308R | Package plug-in manifest + Offline Kit documentation; ensure Worker integration. | Manifest copied; Worker loads analyzer; Offline Kit doc updated. |