Files
git.stella-ops.org/src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Go/GoLanguageAnalyzer.cs
StellaOps Bot 05597616d6 feat: Add Go module and workspace test fixtures
- Created expected JSON files for Go modules and workspaces.
- Added go.mod and go.sum files for example projects.
- Implemented private module structure with expected JSON output.
- Introduced vendored dependencies with corresponding expected JSON.
- Developed PostgresGraphJobStore for managing graph jobs.
- Established SQL migration scripts for graph jobs schema.
- Implemented GraphJobRepository for CRUD operations on graph jobs.
- Created IGraphJobRepository interface for repository abstraction.
- Added unit tests for GraphJobRepository to ensure functionality.
2025-12-06 20:04:03 +02:00

663 lines
23 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Security.Cryptography;
using System.Linq;
using StellaOps.Scanner.Analyzers.Lang.Go.Internal;
namespace StellaOps.Scanner.Analyzers.Lang.Go;
public sealed class GoLanguageAnalyzer : ILanguageAnalyzer
{
public string Id => "golang";
public string DisplayName => "Go Analyzer";
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
{
ArgumentNullException.ThrowIfNull(context);
ArgumentNullException.ThrowIfNull(writer);
// Track emitted modules to avoid duplicates (binary takes precedence over source)
var emittedModules = new HashSet<string>(StringComparer.Ordinal);
// Phase 1: Source scanning (go.mod, go.sum, go.work, vendor)
ScanSourceFiles(context, writer, emittedModules, cancellationToken);
// Phase 2: Binary scanning (existing behavior)
ScanBinaries(context, writer, emittedModules, cancellationToken);
return ValueTask.CompletedTask;
}
private void ScanSourceFiles(
LanguageAnalyzerContext context,
LanguageComponentWriter writer,
HashSet<string> emittedModules,
CancellationToken cancellationToken)
{
// Discover Go projects
var projects = GoProjectDiscoverer.Discover(context.RootPath, cancellationToken);
if (projects.Count == 0)
{
return;
}
foreach (var project in projects)
{
cancellationToken.ThrowIfCancellationRequested();
IReadOnlyList<GoSourceInventory.SourceInventoryResult> inventories;
if (project.IsWorkspace)
{
// Handle workspace with multiple modules
inventories = GoSourceInventory.BuildWorkspaceInventory(project, cancellationToken);
}
else
{
// Single module
var inventory = GoSourceInventory.BuildInventory(project);
inventories = inventory.IsEmpty
? Array.Empty<GoSourceInventory.SourceInventoryResult>()
: new[] { inventory };
}
foreach (var inventory in inventories)
{
if (inventory.IsEmpty)
{
continue;
}
// Emit the main module
if (!string.IsNullOrEmpty(inventory.ModulePath))
{
EmitMainModuleFromSource(inventory, project, context, writer, emittedModules);
}
// Emit dependencies
foreach (var module in inventory.Modules.OrderBy(m => m.Path, StringComparer.Ordinal))
{
cancellationToken.ThrowIfCancellationRequested();
EmitSourceModule(module, inventory, project, context, writer, emittedModules);
}
}
}
}
private void ScanBinaries(
LanguageAnalyzerContext context,
LanguageComponentWriter writer,
HashSet<string> emittedModules,
CancellationToken cancellationToken)
{
var candidatePaths = new List<string>();
// Use binary format pre-filtering for efficiency
foreach (var path in GoBinaryScanner.EnumerateCandidateFiles(context.RootPath))
{
cancellationToken.ThrowIfCancellationRequested();
// Quick check for known binary formats
if (GoBinaryFormatDetector.IsPotentialBinary(path))
{
candidatePaths.Add(path);
}
}
candidatePaths.Sort(StringComparer.Ordinal);
var fallbackBinaries = new List<GoStrippedBinaryClassification>();
foreach (var absolutePath in candidatePaths)
{
cancellationToken.ThrowIfCancellationRequested();
if (!GoBuildInfoProvider.TryGetBuildInfo(absolutePath, out var buildInfo) || buildInfo is null)
{
if (GoBinaryScanner.TryClassifyStrippedBinary(absolutePath, out var classification))
{
fallbackBinaries.Add(classification);
}
continue;
}
EmitComponents(buildInfo, context, writer, emittedModules);
}
foreach (var fallback in fallbackBinaries)
{
cancellationToken.ThrowIfCancellationRequested();
EmitFallbackComponent(fallback, context, writer);
}
}
private void EmitMainModuleFromSource(
GoSourceInventory.SourceInventoryResult inventory,
GoProjectDiscoverer.GoProject project,
LanguageAnalyzerContext context,
LanguageComponentWriter writer,
HashSet<string> emittedModules)
{
// Main module from go.mod (typically no version in source)
var modulePath = inventory.ModulePath!;
var moduleKey = $"{modulePath}@(devel)";
if (!emittedModules.Add(moduleKey))
{
return; // Already emitted
}
var relativePath = context.GetRelativePath(project.RootPath);
var goModRelative = project.HasGoMod ? context.GetRelativePath(project.GoModPath!) : null;
var metadata = new SortedDictionary<string, string?>(StringComparer.Ordinal)
{
["modulePath"] = modulePath,
["modulePath.main"] = modulePath,
["provenance"] = "source"
};
if (!string.IsNullOrEmpty(inventory.GoVersion))
{
metadata["go.version"] = inventory.GoVersion;
}
if (!string.IsNullOrEmpty(relativePath))
{
metadata["projectPath"] = relativePath;
}
if (project.IsWorkspace)
{
metadata["workspace"] = "true";
}
var evidence = new List<LanguageComponentEvidence>();
if (!string.IsNullOrEmpty(goModRelative))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"go.mod",
goModRelative,
modulePath,
null));
}
evidence.Sort(static (l, r) => string.CompareOrdinal(l.ComparisonKey, r.ComparisonKey));
// Main module typically has (devel) as version in source context
writer.AddFromExplicitKey(
analyzerId: Id,
componentKey: $"golang::source::{modulePath}::(devel)",
purl: null,
name: modulePath,
version: "(devel)",
type: "golang",
metadata: metadata,
evidence: evidence);
}
private void EmitSourceModule(
GoSourceInventory.GoSourceModule module,
GoSourceInventory.SourceInventoryResult inventory,
GoProjectDiscoverer.GoProject project,
LanguageAnalyzerContext context,
LanguageComponentWriter writer,
HashSet<string> emittedModules)
{
var moduleKey = $"{module.Path}@{module.Version}";
if (!emittedModules.Add(moduleKey))
{
return; // Already emitted (binary takes precedence)
}
var purl = BuildPurl(module.Path, module.Version);
var goModRelative = project.HasGoMod ? context.GetRelativePath(project.GoModPath!) : null;
var metadata = new SortedDictionary<string, string?>(StringComparer.Ordinal)
{
["modulePath"] = module.Path,
["moduleVersion"] = module.Version,
["provenance"] = "source"
};
if (!string.IsNullOrEmpty(module.Checksum))
{
metadata["moduleSum"] = module.Checksum;
}
if (module.IsDirect)
{
metadata["dependency.direct"] = "true";
}
if (module.IsIndirect)
{
metadata["dependency.indirect"] = "true";
}
if (module.IsVendored)
{
metadata["vendored"] = "true";
}
if (module.IsPrivate)
{
metadata["private"] = "true";
}
if (module.ModuleCategory != "public")
{
metadata["moduleCategory"] = module.ModuleCategory;
}
if (!string.IsNullOrEmpty(module.Registry))
{
metadata["registry"] = module.Registry;
}
if (module.IsReplaced)
{
metadata["replaced"] = "true";
if (!string.IsNullOrEmpty(module.ReplacementPath))
{
metadata["replacedBy.path"] = module.ReplacementPath;
}
if (!string.IsNullOrEmpty(module.ReplacementVersion))
{
metadata["replacedBy.version"] = module.ReplacementVersion;
}
}
if (module.IsExcluded)
{
metadata["excluded"] = "true";
}
var evidence = new List<LanguageComponentEvidence>();
// Evidence from go.mod
if (!string.IsNullOrEmpty(goModRelative))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
module.Source,
goModRelative,
$"{module.Path}@{module.Version}",
module.Checksum));
}
evidence.Sort(static (l, r) => string.CompareOrdinal(l.ComparisonKey, r.ComparisonKey));
if (!string.IsNullOrEmpty(purl))
{
writer.AddFromPurl(
analyzerId: Id,
purl: purl,
name: module.Path,
version: module.Version,
type: "golang",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: false);
}
else
{
writer.AddFromExplicitKey(
analyzerId: Id,
componentKey: $"golang::source::{module.Path}@{module.Version}",
purl: null,
name: module.Path,
version: module.Version,
type: "golang",
metadata: metadata,
evidence: evidence);
}
}
private void EmitComponents(GoBuildInfo buildInfo, LanguageAnalyzerContext context, LanguageComponentWriter writer, HashSet<string> emittedModules)
{
var components = new List<GoModule> { buildInfo.MainModule };
components.AddRange(buildInfo.Dependencies
.OrderBy(static module => module.Path, StringComparer.Ordinal)
.ThenBy(static module => module.Version, StringComparer.Ordinal));
string? binaryHash = null;
var binaryRelativePath = context.GetRelativePath(buildInfo.AbsoluteBinaryPath);
foreach (var module in components)
{
// Track emitted modules (binary evidence is more accurate than source)
var moduleKey = $"{module.Path}@{module.Version ?? "(devel)"}";
emittedModules.Add(moduleKey);
var metadata = BuildMetadata(buildInfo, module, binaryRelativePath);
var evidence = BuildEvidence(buildInfo, module, binaryRelativePath, context, ref binaryHash);
var usedByEntrypoint = module.IsMain && context.UsageHints.IsPathUsed(buildInfo.AbsoluteBinaryPath);
var purl = BuildPurl(module.Path, module.Version);
if (!string.IsNullOrEmpty(purl))
{
writer.AddFromPurl(
analyzerId: Id,
purl: purl,
name: module.Path,
version: module.Version,
type: "golang",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: usedByEntrypoint);
}
else
{
var componentKey = BuildFallbackComponentKey(module, buildInfo, binaryRelativePath, ref binaryHash);
writer.AddFromExplicitKey(
analyzerId: Id,
componentKey: componentKey,
purl: null,
name: module.Path,
version: module.Version,
type: "golang",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: usedByEntrypoint);
}
}
}
private static IEnumerable<KeyValuePair<string, string?>> BuildMetadata(GoBuildInfo buildInfo, GoModule module, string binaryRelativePath)
{
var entries = new List<KeyValuePair<string, string?>>(16)
{
new("modulePath", module.Path),
new("binaryPath", string.IsNullOrEmpty(binaryRelativePath) ? "." : binaryRelativePath),
};
if (!string.IsNullOrEmpty(module.Version))
{
entries.Add(new KeyValuePair<string, string?>("moduleVersion", module.Version));
}
if (!string.IsNullOrEmpty(module.Sum))
{
entries.Add(new KeyValuePair<string, string?>("moduleSum", module.Sum));
}
if (module.Replacement is not null)
{
entries.Add(new KeyValuePair<string, string?>("replacedBy.path", module.Replacement.Path));
if (!string.IsNullOrEmpty(module.Replacement.Version))
{
entries.Add(new KeyValuePair<string, string?>("replacedBy.version", module.Replacement.Version));
}
if (!string.IsNullOrEmpty(module.Replacement.Sum))
{
entries.Add(new KeyValuePair<string, string?>("replacedBy.sum", module.Replacement.Sum));
}
}
if (module.IsMain)
{
entries.Add(new KeyValuePair<string, string?>("go.version", buildInfo.GoVersion));
entries.Add(new KeyValuePair<string, string?>("modulePath.main", buildInfo.ModulePath));
foreach (var setting in buildInfo.Settings)
{
var key = $"build.{setting.Key}";
if (!entries.Any(pair => string.Equals(pair.Key, key, StringComparison.Ordinal)))
{
entries.Add(new KeyValuePair<string, string?>(key, setting.Value));
}
}
if (buildInfo.DwarfMetadata is { } dwarf)
{
AddIfMissing(entries, "build.vcs", dwarf.VcsSystem);
AddIfMissing(entries, "build.vcs.revision", dwarf.Revision);
AddIfMissing(entries, "build.vcs.modified", dwarf.Modified?.ToString()?.ToLowerInvariant());
AddIfMissing(entries, "build.vcs.time", dwarf.TimestampUtc);
}
}
entries.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));
return entries;
}
private void EmitFallbackComponent(GoStrippedBinaryClassification strippedBinary, LanguageAnalyzerContext context, LanguageComponentWriter writer)
{
var relativePath = context.GetRelativePath(strippedBinary.AbsolutePath);
var normalizedRelative = string.IsNullOrEmpty(relativePath) ? "." : relativePath;
var usedByEntrypoint = context.UsageHints.IsPathUsed(strippedBinary.AbsolutePath);
var binaryHash = ComputeBinaryHash(strippedBinary.AbsolutePath);
var metadata = new List<KeyValuePair<string, string?>>
{
new("binaryPath", normalizedRelative),
new("languageHint", "golang"),
new("provenance", "binary"),
};
if (!string.IsNullOrEmpty(binaryHash))
{
metadata.Add(new KeyValuePair<string, string?>("binary.sha256", binaryHash));
}
if (!string.IsNullOrEmpty(strippedBinary.GoVersionHint))
{
metadata.Add(new KeyValuePair<string, string?>("go.version.hint", strippedBinary.GoVersionHint));
}
metadata.Sort(static (left, right) => string.CompareOrdinal(left.Key, right.Key));
var evidence = new List<LanguageComponentEvidence>
{
new(
LanguageEvidenceKind.File,
"binary",
normalizedRelative,
null,
string.IsNullOrEmpty(binaryHash) ? null : binaryHash),
};
var detectionSource = strippedBinary.Indicator switch
{
GoStrippedBinaryIndicator.BuildId => "build-id",
GoStrippedBinaryIndicator.GoRuntimeMarkers => "runtime-markers",
_ => null,
};
if (!string.IsNullOrEmpty(detectionSource))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.heuristic",
"classification",
detectionSource,
null));
}
evidence.Sort(static (left, right) => string.CompareOrdinal(left.ComparisonKey, right.ComparisonKey));
var componentName = Path.GetFileName(strippedBinary.AbsolutePath);
if (string.IsNullOrWhiteSpace(componentName))
{
componentName = "golang-binary";
}
var componentKey = string.IsNullOrEmpty(binaryHash)
? $"golang::bin::{normalizedRelative}"
: $"golang::bin::sha256:{binaryHash}";
writer.AddFromExplicitKey(
analyzerId: Id,
componentKey: componentKey,
purl: null,
name: componentName,
version: null,
type: "bin",
metadata: metadata,
evidence: evidence,
usedByEntrypoint: usedByEntrypoint);
GoAnalyzerMetrics.RecordHeuristic(strippedBinary.Indicator, !string.IsNullOrEmpty(strippedBinary.GoVersionHint));
}
private static IEnumerable<LanguageComponentEvidence> BuildEvidence(GoBuildInfo buildInfo, GoModule module, string binaryRelativePath, LanguageAnalyzerContext context, ref string? binaryHash)
{
var evidence = new List<LanguageComponentEvidence>
{
new(
LanguageEvidenceKind.Metadata,
"go.buildinfo",
$"module:{module.Path}",
module.Version ?? string.Empty,
module.Sum)
};
if (module.IsMain)
{
foreach (var setting in buildInfo.Settings)
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.buildinfo.setting",
setting.Key,
setting.Value,
null));
}
if (buildInfo.DwarfMetadata is { } dwarf)
{
if (!string.IsNullOrWhiteSpace(dwarf.VcsSystem))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.dwarf",
"vcs",
dwarf.VcsSystem,
null));
}
if (!string.IsNullOrWhiteSpace(dwarf.Revision))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.dwarf",
"vcs.revision",
dwarf.Revision,
null));
}
if (dwarf.Modified.HasValue)
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.dwarf",
"vcs.modified",
dwarf.Modified.Value ? "true" : "false",
null));
}
if (!string.IsNullOrWhiteSpace(dwarf.TimestampUtc))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.Metadata,
"go.dwarf",
"vcs.time",
dwarf.TimestampUtc,
null));
}
}
}
// Attach binary hash evidence for fallback components without purl.
if (string.IsNullOrEmpty(module.Version))
{
binaryHash ??= ComputeBinaryHash(buildInfo.AbsoluteBinaryPath);
if (!string.IsNullOrEmpty(binaryHash))
{
evidence.Add(new LanguageComponentEvidence(
LanguageEvidenceKind.File,
"binary",
string.IsNullOrEmpty(binaryRelativePath) ? "." : binaryRelativePath,
null,
binaryHash));
}
}
evidence.Sort(static (left, right) => string.CompareOrdinal(left.ComparisonKey, right.ComparisonKey));
return evidence;
}
private static string? BuildPurl(string path, string? version)
{
if (string.IsNullOrWhiteSpace(path) || string.IsNullOrWhiteSpace(version))
{
return null;
}
var cleanedPath = path.Trim();
var cleanedVersion = version.Trim();
var encodedVersion = Uri.EscapeDataString(cleanedVersion);
return $"pkg:golang/{cleanedPath}@{encodedVersion}";
}
private static string BuildFallbackComponentKey(GoModule module, GoBuildInfo buildInfo, string binaryRelativePath, ref string? binaryHash)
{
var relative = string.IsNullOrEmpty(binaryRelativePath) ? "." : binaryRelativePath;
binaryHash ??= ComputeBinaryHash(buildInfo.AbsoluteBinaryPath);
if (!string.IsNullOrEmpty(binaryHash))
{
return $"golang::module:{module.Path}::{relative}::{binaryHash}";
}
return $"golang::module:{module.Path}::{relative}";
}
private static void AddIfMissing(List<KeyValuePair<string, string?>> entries, string key, string? value)
{
if (string.IsNullOrWhiteSpace(key) || string.IsNullOrWhiteSpace(value))
{
return;
}
if (entries.Any(entry => string.Equals(entry.Key, key, StringComparison.Ordinal)))
{
return;
}
entries.Add(new KeyValuePair<string, string?>(key, value));
}
private static string? ComputeBinaryHash(string path)
{
try
{
using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var sha = SHA256.Create();
var hash = sha.ComputeHash(stream);
return Convert.ToHexString(hash).ToLowerInvariant();
}
catch (IOException)
{
return null;
}
catch (UnauthorizedAccessException)
{
return null;
}
}
}