- Implemented `chrome-path.js` to define functions for locating Chromium binaries across different platforms and nested directories. - Added `verify-chromium.js` to check for the presence of the Chromium binary and log the results, including candidate paths checked. - The scripts support Linux, Windows, and macOS environments, enhancing the flexibility of Chromium binary detection.
990 lines
32 KiB
C#
990 lines
32 KiB
C#
using System.Buffers;
|
|
using System.Globalization;
|
|
using System.Security.Cryptography;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
|
|
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
|
|
|
internal static class PythonDistributionLoader
|
|
{
|
|
|
|
public static async Task<PythonDistribution?> LoadAsync(LanguageAnalyzerContext context, string distInfoPath, CancellationToken cancellationToken)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
if (string.IsNullOrWhiteSpace(distInfoPath) || !Directory.Exists(distInfoPath))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var metadataPath = Path.Combine(distInfoPath, "METADATA");
|
|
var wheelPath = Path.Combine(distInfoPath, "WHEEL");
|
|
var entryPointsPath = Path.Combine(distInfoPath, "entry_points.txt");
|
|
var recordPath = Path.Combine(distInfoPath, "RECORD");
|
|
var installerPath = Path.Combine(distInfoPath, "INSTALLER");
|
|
var directUrlPath = Path.Combine(distInfoPath, "direct_url.json");
|
|
|
|
var metadataDocument = await PythonMetadataDocument.LoadAsync(metadataPath, cancellationToken).ConfigureAwait(false);
|
|
var name = metadataDocument.GetFirst("Name") ?? ExtractNameFromDirectory(distInfoPath);
|
|
var version = metadataDocument.GetFirst("Version") ?? ExtractVersionFromDirectory(distInfoPath);
|
|
|
|
if (string.IsNullOrWhiteSpace(name) || string.IsNullOrWhiteSpace(version))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var trimmedName = name.Trim();
|
|
var trimmedVersion = version.Trim();
|
|
var normalizedName = NormalizePackageName(trimmedName);
|
|
var purl = $"pkg:pypi/{normalizedName}@{trimmedVersion}";
|
|
|
|
var metadataEntries = new List<KeyValuePair<string, string?>>();
|
|
var evidenceEntries = new List<LanguageComponentEvidence>();
|
|
|
|
AddFileEvidence(context, metadataPath, "METADATA", evidenceEntries);
|
|
AddFileEvidence(context, wheelPath, "WHEEL", evidenceEntries);
|
|
AddFileEvidence(context, entryPointsPath, "entry_points.txt", evidenceEntries);
|
|
|
|
AppendMetadata(metadataEntries, "distInfoPath", PythonPathHelper.NormalizeRelative(context, distInfoPath));
|
|
AppendMetadata(metadataEntries, "name", trimmedName);
|
|
AppendMetadata(metadataEntries, "version", trimmedVersion);
|
|
AppendMetadata(metadataEntries, "summary", metadataDocument.GetFirst("Summary"));
|
|
AppendMetadata(metadataEntries, "license", metadataDocument.GetFirst("License"));
|
|
AppendMetadata(metadataEntries, "homePage", metadataDocument.GetFirst("Home-page"));
|
|
AppendMetadata(metadataEntries, "author", metadataDocument.GetFirst("Author"));
|
|
AppendMetadata(metadataEntries, "authorEmail", metadataDocument.GetFirst("Author-email"));
|
|
AppendMetadata(metadataEntries, "projectUrl", metadataDocument.GetFirst("Project-URL"));
|
|
AppendMetadata(metadataEntries, "requiresPython", metadataDocument.GetFirst("Requires-Python"));
|
|
|
|
var classifiers = metadataDocument.GetAll("Classifier");
|
|
if (classifiers.Count > 0)
|
|
{
|
|
AppendMetadata(metadataEntries, "classifiers", string.Join(';', classifiers));
|
|
}
|
|
|
|
var requiresDist = metadataDocument.GetAll("Requires-Dist");
|
|
if (requiresDist.Count > 0)
|
|
{
|
|
AppendMetadata(metadataEntries, "requiresDist", string.Join(';', requiresDist));
|
|
}
|
|
|
|
var entryPoints = await PythonEntryPointSet.LoadAsync(entryPointsPath, cancellationToken).ConfigureAwait(false);
|
|
foreach (var group in entryPoints.Groups.OrderBy(static g => g.Key, StringComparer.OrdinalIgnoreCase))
|
|
{
|
|
AppendMetadata(metadataEntries, $"entryPoints.{group.Key}", string.Join(';', group.Value.Select(static ep => $"{ep.Name}={ep.Target}")));
|
|
}
|
|
|
|
var wheelInfo = await PythonWheelInfo.LoadAsync(wheelPath, cancellationToken).ConfigureAwait(false);
|
|
if (wheelInfo is not null)
|
|
{
|
|
foreach (var pair in wheelInfo.ToMetadata())
|
|
{
|
|
AppendMetadata(metadataEntries, pair.Key, pair.Value);
|
|
}
|
|
}
|
|
|
|
var installer = await ReadSingleLineAsync(installerPath, cancellationToken).ConfigureAwait(false);
|
|
if (!string.IsNullOrWhiteSpace(installer))
|
|
{
|
|
AppendMetadata(metadataEntries, "installer", installer);
|
|
}
|
|
|
|
var directUrl = await PythonDirectUrlInfo.LoadAsync(directUrlPath, cancellationToken).ConfigureAwait(false);
|
|
if (directUrl is not null)
|
|
{
|
|
foreach (var pair in directUrl.ToMetadata())
|
|
{
|
|
AppendMetadata(metadataEntries, pair.Key, pair.Value);
|
|
}
|
|
|
|
if (!string.IsNullOrWhiteSpace(directUrl.Url))
|
|
{
|
|
evidenceEntries.Add(new LanguageComponentEvidence(
|
|
LanguageEvidenceKind.Metadata,
|
|
"direct_url.json",
|
|
PythonPathHelper.NormalizeRelative(context, directUrlPath),
|
|
directUrl.Url,
|
|
Sha256: null));
|
|
}
|
|
}
|
|
|
|
var recordEntries = await PythonRecordParser.LoadAsync(recordPath, cancellationToken).ConfigureAwait(false);
|
|
var verification = await PythonRecordVerifier.VerifyAsync(context, distInfoPath, recordEntries, cancellationToken).ConfigureAwait(false);
|
|
|
|
metadataEntries.Add(new KeyValuePair<string, string?>("record.totalEntries", verification.TotalEntries.ToString(CultureInfo.InvariantCulture)));
|
|
metadataEntries.Add(new KeyValuePair<string, string?>("record.hashedEntries", verification.HashedEntries.ToString(CultureInfo.InvariantCulture)));
|
|
metadataEntries.Add(new KeyValuePair<string, string?>("record.missingFiles", verification.MissingFiles.ToString(CultureInfo.InvariantCulture)));
|
|
metadataEntries.Add(new KeyValuePair<string, string?>("record.hashMismatches", verification.HashMismatches.ToString(CultureInfo.InvariantCulture)));
|
|
metadataEntries.Add(new KeyValuePair<string, string?>("record.ioErrors", verification.IoErrors.ToString(CultureInfo.InvariantCulture)));
|
|
|
|
if (verification.UnsupportedAlgorithms.Count > 0)
|
|
{
|
|
AppendMetadata(metadataEntries, "record.unsupportedAlgorithms", string.Join(';', verification.UnsupportedAlgorithms));
|
|
}
|
|
|
|
evidenceEntries.AddRange(verification.Evidence);
|
|
var usedByEntrypoint = verification.UsedByEntrypoint || EvaluateEntryPointUsage(context, distInfoPath, entryPoints);
|
|
|
|
return new PythonDistribution(
|
|
trimmedName,
|
|
trimmedVersion,
|
|
purl,
|
|
metadataEntries,
|
|
evidenceEntries,
|
|
usedByEntrypoint);
|
|
}
|
|
|
|
private static bool EvaluateEntryPointUsage(LanguageAnalyzerContext context, string distInfoPath, PythonEntryPointSet entryPoints)
|
|
{
|
|
if (entryPoints.Groups.Count == 0)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
var parentDirectory = Directory.GetParent(distInfoPath)?.FullName;
|
|
if (string.IsNullOrWhiteSpace(parentDirectory))
|
|
{
|
|
return false;
|
|
}
|
|
|
|
foreach (var group in entryPoints.Groups.Values)
|
|
{
|
|
foreach (var entryPoint in group)
|
|
{
|
|
var candidatePaths = entryPoint.GetCandidateRelativeScriptPaths();
|
|
foreach (var relative in candidatePaths)
|
|
{
|
|
var combined = Path.GetFullPath(Path.Combine(parentDirectory, relative));
|
|
if (context.UsageHints.IsPathUsed(combined))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private static void AddFileEvidence(LanguageAnalyzerContext context, string path, string source, ICollection<LanguageComponentEvidence> evidence)
|
|
{
|
|
if (!File.Exists(path))
|
|
{
|
|
return;
|
|
}
|
|
|
|
evidence.Add(new LanguageComponentEvidence(
|
|
LanguageEvidenceKind.File,
|
|
source,
|
|
PythonPathHelper.NormalizeRelative(context, path),
|
|
Value: null,
|
|
Sha256: null));
|
|
}
|
|
|
|
private static void AppendMetadata(ICollection<KeyValuePair<string, string?>> metadata, string key, string? value)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(key))
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(value))
|
|
{
|
|
return;
|
|
}
|
|
|
|
metadata.Add(new KeyValuePair<string, string?>(key, value.Trim()));
|
|
}
|
|
|
|
private static string? ExtractNameFromDirectory(string distInfoPath)
|
|
{
|
|
var directoryName = Path.GetFileName(distInfoPath);
|
|
if (string.IsNullOrWhiteSpace(directoryName))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var suffixIndex = directoryName.IndexOf(".dist-info", StringComparison.OrdinalIgnoreCase);
|
|
if (suffixIndex <= 0)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var trimmed = directoryName[..suffixIndex];
|
|
var dashIndex = trimmed.LastIndexOf('-');
|
|
if (dashIndex <= 0)
|
|
{
|
|
return trimmed;
|
|
}
|
|
|
|
return trimmed[..dashIndex];
|
|
}
|
|
|
|
private static string? ExtractVersionFromDirectory(string distInfoPath)
|
|
{
|
|
var directoryName = Path.GetFileName(distInfoPath);
|
|
if (string.IsNullOrWhiteSpace(directoryName))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var suffixIndex = directoryName.IndexOf(".dist-info", StringComparison.OrdinalIgnoreCase);
|
|
if (suffixIndex <= 0)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var trimmed = directoryName[..suffixIndex];
|
|
var dashIndex = trimmed.LastIndexOf('-');
|
|
if (dashIndex >= 0 && dashIndex < trimmed.Length - 1)
|
|
{
|
|
return trimmed[(dashIndex + 1)..];
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private static string NormalizePackageName(string name)
|
|
{
|
|
if (string.IsNullOrWhiteSpace(name))
|
|
{
|
|
return string.Empty;
|
|
}
|
|
|
|
var builder = new StringBuilder(name.Length);
|
|
foreach (var ch in name.Trim().ToLowerInvariant())
|
|
{
|
|
builder.Append(ch switch
|
|
{
|
|
'_' => '-',
|
|
'.' => '-',
|
|
' ' => '-',
|
|
_ => ch
|
|
});
|
|
}
|
|
|
|
return builder.ToString();
|
|
}
|
|
|
|
private static async Task<string?> ReadSingleLineAsync(string path, CancellationToken cancellationToken)
|
|
{
|
|
if (!File.Exists(path))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
|
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
|
var line = await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false);
|
|
return line?.Trim();
|
|
}
|
|
}
|
|
|
|
internal sealed record PythonDistribution(
|
|
string Name,
|
|
string Version,
|
|
string Purl,
|
|
IReadOnlyCollection<KeyValuePair<string, string?>> Metadata,
|
|
IReadOnlyCollection<LanguageComponentEvidence> Evidence,
|
|
bool UsedByEntrypoint)
|
|
{
|
|
public IReadOnlyCollection<KeyValuePair<string, string?>> SortedMetadata =>
|
|
Metadata
|
|
.OrderBy(static pair => pair.Key, StringComparer.Ordinal)
|
|
.ToArray();
|
|
|
|
public IReadOnlyCollection<LanguageComponentEvidence> SortedEvidence =>
|
|
Evidence
|
|
.OrderBy(static item => item.Locator, StringComparer.Ordinal)
|
|
.ToArray();
|
|
}
|
|
|
|
internal sealed class PythonMetadataDocument
|
|
{
|
|
private readonly Dictionary<string, List<string>> _values;
|
|
|
|
private PythonMetadataDocument(Dictionary<string, List<string>> values)
|
|
{
|
|
_values = values;
|
|
}
|
|
|
|
public static async Task<PythonMetadataDocument> LoadAsync(string path, CancellationToken cancellationToken)
|
|
{
|
|
if (!File.Exists(path))
|
|
{
|
|
return new PythonMetadataDocument(new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase));
|
|
}
|
|
|
|
var values = new Dictionary<string, List<string>>(StringComparer.OrdinalIgnoreCase);
|
|
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
|
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
|
|
|
string? currentKey = null;
|
|
var builder = new StringBuilder();
|
|
|
|
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
if (line.Length == 0)
|
|
{
|
|
Commit();
|
|
continue;
|
|
}
|
|
|
|
if (line.StartsWith(' ') || line.StartsWith('\t'))
|
|
{
|
|
if (currentKey is not null)
|
|
{
|
|
if (builder.Length > 0)
|
|
{
|
|
builder.Append(' ');
|
|
}
|
|
|
|
builder.Append(line.Trim());
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
Commit();
|
|
|
|
var separator = line.IndexOf(':');
|
|
if (separator <= 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
currentKey = line[..separator].Trim();
|
|
builder.Clear();
|
|
builder.Append(line[(separator + 1)..].Trim());
|
|
}
|
|
|
|
Commit();
|
|
return new PythonMetadataDocument(values);
|
|
|
|
void Commit()
|
|
{
|
|
if (string.IsNullOrWhiteSpace(currentKey))
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (!values.TryGetValue(currentKey, out var list))
|
|
{
|
|
list = new List<string>();
|
|
values[currentKey] = list;
|
|
}
|
|
|
|
var value = builder.ToString().Trim();
|
|
if (value.Length > 0)
|
|
{
|
|
list.Add(value);
|
|
}
|
|
|
|
currentKey = null;
|
|
builder.Clear();
|
|
}
|
|
}
|
|
|
|
public string? GetFirst(string key)
|
|
{
|
|
if (key is null)
|
|
{
|
|
return null;
|
|
}
|
|
|
|
return _values.TryGetValue(key, out var list) && list.Count > 0
|
|
? list[0]
|
|
: null;
|
|
}
|
|
|
|
public IReadOnlyList<string> GetAll(string key)
|
|
{
|
|
if (key is null)
|
|
{
|
|
return Array.Empty<string>();
|
|
}
|
|
|
|
return _values.TryGetValue(key, out var list)
|
|
? list.AsReadOnly()
|
|
: Array.Empty<string>();
|
|
}
|
|
}
|
|
|
|
internal sealed class PythonWheelInfo
|
|
{
|
|
private readonly Dictionary<string, string> _values;
|
|
|
|
private PythonWheelInfo(Dictionary<string, string> values)
|
|
{
|
|
_values = values;
|
|
}
|
|
|
|
public static async Task<PythonWheelInfo?> LoadAsync(string path, CancellationToken cancellationToken)
|
|
{
|
|
if (!File.Exists(path))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
var values = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
|
|
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
|
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
|
|
|
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
if (string.IsNullOrWhiteSpace(line))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var separator = line.IndexOf(':');
|
|
if (separator <= 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var key = line[..separator].Trim();
|
|
var value = line[(separator + 1)..].Trim();
|
|
if (key.Length == 0 || value.Length == 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
values[key] = value;
|
|
}
|
|
|
|
return new PythonWheelInfo(values);
|
|
}
|
|
|
|
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
|
|
{
|
|
var entries = new List<KeyValuePair<string, string?>>(4);
|
|
|
|
if (_values.TryGetValue("Wheel-Version", out var wheelVersion))
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("wheel.version", wheelVersion));
|
|
}
|
|
|
|
if (_values.TryGetValue("Tag", out var tags))
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("wheel.tags", tags));
|
|
}
|
|
|
|
if (_values.TryGetValue("Root-Is-Purelib", out var purelib))
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("wheel.rootIsPurelib", purelib));
|
|
}
|
|
|
|
if (_values.TryGetValue("Generator", out var generator))
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("wheel.generator", generator));
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
}
|
|
|
|
internal sealed class PythonEntryPointSet
|
|
{
|
|
public IReadOnlyDictionary<string, IReadOnlyList<PythonEntryPoint>> Groups { get; }
|
|
|
|
private PythonEntryPointSet(Dictionary<string, IReadOnlyList<PythonEntryPoint>> groups)
|
|
{
|
|
Groups = groups;
|
|
}
|
|
|
|
public static async Task<PythonEntryPointSet> LoadAsync(string path, CancellationToken cancellationToken)
|
|
{
|
|
if (!File.Exists(path))
|
|
{
|
|
return new PythonEntryPointSet(new Dictionary<string, IReadOnlyList<PythonEntryPoint>>(StringComparer.OrdinalIgnoreCase));
|
|
}
|
|
|
|
var groups = new Dictionary<string, List<PythonEntryPoint>>(StringComparer.OrdinalIgnoreCase);
|
|
string? currentGroup = null;
|
|
|
|
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
|
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
|
|
|
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
line = line.Trim();
|
|
if (line.Length == 0 || line.StartsWith('#'))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (line.StartsWith('[') && line.EndsWith(']'))
|
|
{
|
|
currentGroup = line[1..^1].Trim();
|
|
if (currentGroup.Length == 0)
|
|
{
|
|
currentGroup = null;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
if (currentGroup is null)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var separator = line.IndexOf('=');
|
|
if (separator <= 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var name = line[..separator].Trim();
|
|
var target = line[(separator + 1)..].Trim();
|
|
if (name.Length == 0 || target.Length == 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (!groups.TryGetValue(currentGroup, out var list))
|
|
{
|
|
list = new List<PythonEntryPoint>();
|
|
groups[currentGroup] = list;
|
|
}
|
|
|
|
list.Add(new PythonEntryPoint(name, target));
|
|
}
|
|
|
|
return new PythonEntryPointSet(groups.ToDictionary(
|
|
static pair => pair.Key,
|
|
static pair => (IReadOnlyList<PythonEntryPoint>)pair.Value.AsReadOnly(),
|
|
StringComparer.OrdinalIgnoreCase));
|
|
}
|
|
}
|
|
|
|
internal sealed record PythonEntryPoint(string Name, string Target)
|
|
{
|
|
public IReadOnlyCollection<string> GetCandidateRelativeScriptPaths()
|
|
{
|
|
var list = new List<string>(3)
|
|
{
|
|
Path.Combine("bin", Name),
|
|
Path.Combine("Scripts", $"{Name}.exe"),
|
|
Path.Combine("Scripts", Name)
|
|
};
|
|
|
|
return list;
|
|
}
|
|
}
|
|
|
|
internal sealed record PythonRecordEntry(string Path, string? HashAlgorithm, string? HashValue, long? Size);
|
|
|
|
internal static class PythonRecordParser
|
|
{
|
|
public static async Task<IReadOnlyList<PythonRecordEntry>> LoadAsync(string path, CancellationToken cancellationToken)
|
|
{
|
|
if (!File.Exists(path))
|
|
{
|
|
return Array.Empty<PythonRecordEntry>();
|
|
}
|
|
|
|
var entries = new List<PythonRecordEntry>();
|
|
|
|
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
|
using var reader = new StreamReader(stream, PythonEncoding.Utf8, detectEncodingFromByteOrderMarks: true);
|
|
|
|
while (await reader.ReadLineAsync(cancellationToken).ConfigureAwait(false) is { } line)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
|
|
if (line.Length == 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var fields = ParseCsvLine(line);
|
|
if (fields.Count < 1)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
var entryPath = fields[0];
|
|
string? algorithm = null;
|
|
string? hashValue = null;
|
|
|
|
if (fields.Count > 1 && !string.IsNullOrWhiteSpace(fields[1]))
|
|
{
|
|
var hashField = fields[1].Trim();
|
|
var separator = hashField.IndexOf('=');
|
|
if (separator > 0 && separator < hashField.Length - 1)
|
|
{
|
|
algorithm = hashField[..separator];
|
|
hashValue = hashField[(separator + 1)..];
|
|
}
|
|
}
|
|
|
|
long? size = null;
|
|
if (fields.Count > 2 && long.TryParse(fields[2], NumberStyles.Integer, CultureInfo.InvariantCulture, out var parsedSize))
|
|
{
|
|
size = parsedSize;
|
|
}
|
|
|
|
entries.Add(new PythonRecordEntry(entryPath, algorithm, hashValue, size));
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
|
|
private static List<string> ParseCsvLine(string line)
|
|
{
|
|
var values = new List<string>();
|
|
var builder = new StringBuilder();
|
|
var inQuotes = false;
|
|
|
|
for (var i = 0; i < line.Length; i++)
|
|
{
|
|
var ch = line[i];
|
|
|
|
if (inQuotes)
|
|
{
|
|
if (ch == '"')
|
|
{
|
|
var next = i + 1 < line.Length ? line[i + 1] : '\0';
|
|
if (next == '"')
|
|
{
|
|
builder.Append('"');
|
|
i++;
|
|
}
|
|
else
|
|
{
|
|
inQuotes = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
builder.Append(ch);
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
if (ch == ',')
|
|
{
|
|
values.Add(builder.ToString());
|
|
builder.Clear();
|
|
continue;
|
|
}
|
|
|
|
if (ch == '"')
|
|
{
|
|
inQuotes = true;
|
|
continue;
|
|
}
|
|
|
|
builder.Append(ch);
|
|
}
|
|
|
|
values.Add(builder.ToString());
|
|
return values;
|
|
}
|
|
}
|
|
|
|
internal sealed class PythonRecordVerificationResult
|
|
{
|
|
public PythonRecordVerificationResult(
|
|
int totalEntries,
|
|
int hashedEntries,
|
|
int missingFiles,
|
|
int hashMismatches,
|
|
int ioErrors,
|
|
bool usedByEntrypoint,
|
|
IReadOnlyCollection<string> unsupportedAlgorithms,
|
|
IReadOnlyCollection<LanguageComponentEvidence> evidence)
|
|
{
|
|
TotalEntries = totalEntries;
|
|
HashedEntries = hashedEntries;
|
|
MissingFiles = missingFiles;
|
|
HashMismatches = hashMismatches;
|
|
IoErrors = ioErrors;
|
|
UsedByEntrypoint = usedByEntrypoint;
|
|
UnsupportedAlgorithms = unsupportedAlgorithms;
|
|
Evidence = evidence;
|
|
}
|
|
|
|
public int TotalEntries { get; }
|
|
public int HashedEntries { get; }
|
|
public int MissingFiles { get; }
|
|
public int HashMismatches { get; }
|
|
public int IoErrors { get; }
|
|
public bool UsedByEntrypoint { get; }
|
|
public IReadOnlyCollection<string> UnsupportedAlgorithms { get; }
|
|
public IReadOnlyCollection<LanguageComponentEvidence> Evidence { get; }
|
|
}
|
|
|
|
internal static class PythonRecordVerifier
|
|
{
|
|
private static readonly HashSet<string> SupportedAlgorithms = new(StringComparer.OrdinalIgnoreCase)
|
|
{
|
|
"sha256"
|
|
};
|
|
|
|
public static async Task<PythonRecordVerificationResult> VerifyAsync(
|
|
LanguageAnalyzerContext context,
|
|
string distInfoPath,
|
|
IReadOnlyList<PythonRecordEntry> entries,
|
|
CancellationToken cancellationToken)
|
|
{
|
|
if (entries.Count == 0)
|
|
{
|
|
return new PythonRecordVerificationResult(0, 0, 0, 0, 0, usedByEntrypoint: false, Array.Empty<string>(), Array.Empty<LanguageComponentEvidence>());
|
|
}
|
|
|
|
var evidence = new List<LanguageComponentEvidence>();
|
|
var unsupported = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
|
|
|
var root = context.RootPath;
|
|
if (!root.EndsWith(Path.DirectorySeparatorChar))
|
|
{
|
|
root += Path.DirectorySeparatorChar;
|
|
}
|
|
|
|
var parent = Directory.GetParent(distInfoPath)?.FullName ?? distInfoPath;
|
|
|
|
var total = 0;
|
|
var hashed = 0;
|
|
var missing = 0;
|
|
var mismatched = 0;
|
|
var ioErrors = 0;
|
|
var usedByEntrypoint = false;
|
|
|
|
foreach (var entry in entries)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
total++;
|
|
|
|
var entryPath = entry.Path.Replace('/', Path.DirectorySeparatorChar);
|
|
var fullPath = Path.GetFullPath(Path.Combine(parent, entryPath));
|
|
|
|
if (!fullPath.StartsWith(root, StringComparison.Ordinal))
|
|
{
|
|
missing++;
|
|
evidence.Add(new LanguageComponentEvidence(
|
|
LanguageEvidenceKind.Derived,
|
|
"RECORD",
|
|
PythonPathHelper.NormalizeRelative(context, fullPath),
|
|
"outside-root",
|
|
Sha256: null));
|
|
continue;
|
|
}
|
|
|
|
if (!File.Exists(fullPath))
|
|
{
|
|
missing++;
|
|
evidence.Add(new LanguageComponentEvidence(
|
|
LanguageEvidenceKind.Derived,
|
|
"RECORD",
|
|
PythonPathHelper.NormalizeRelative(context, fullPath),
|
|
"missing",
|
|
Sha256: null));
|
|
continue;
|
|
}
|
|
|
|
if (context.UsageHints.IsPathUsed(fullPath))
|
|
{
|
|
usedByEntrypoint = true;
|
|
}
|
|
|
|
if (string.IsNullOrWhiteSpace(entry.HashAlgorithm) || string.IsNullOrWhiteSpace(entry.HashValue))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
hashed++;
|
|
|
|
if (!SupportedAlgorithms.Contains(entry.HashAlgorithm))
|
|
{
|
|
unsupported.Add(entry.HashAlgorithm);
|
|
continue;
|
|
}
|
|
|
|
string? actualHash = null;
|
|
|
|
try
|
|
{
|
|
actualHash = await ComputeSha256Base64Async(fullPath, cancellationToken).ConfigureAwait(false);
|
|
}
|
|
catch (IOException)
|
|
{
|
|
ioErrors++;
|
|
evidence.Add(new LanguageComponentEvidence(
|
|
LanguageEvidenceKind.Derived,
|
|
"RECORD",
|
|
PythonPathHelper.NormalizeRelative(context, fullPath),
|
|
"io-error",
|
|
Sha256: null));
|
|
continue;
|
|
}
|
|
catch (UnauthorizedAccessException)
|
|
{
|
|
ioErrors++;
|
|
evidence.Add(new LanguageComponentEvidence(
|
|
LanguageEvidenceKind.Derived,
|
|
"RECORD",
|
|
PythonPathHelper.NormalizeRelative(context, fullPath),
|
|
"access-denied",
|
|
Sha256: null));
|
|
continue;
|
|
}
|
|
|
|
if (actualHash is null)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (!string.Equals(actualHash, entry.HashValue, StringComparison.Ordinal))
|
|
{
|
|
mismatched++;
|
|
evidence.Add(new LanguageComponentEvidence(
|
|
LanguageEvidenceKind.Derived,
|
|
"RECORD",
|
|
PythonPathHelper.NormalizeRelative(context, fullPath),
|
|
$"sha256 mismatch expected={entry.HashValue} actual={actualHash}",
|
|
Sha256: actualHash));
|
|
}
|
|
}
|
|
|
|
return new PythonRecordVerificationResult(
|
|
total,
|
|
hashed,
|
|
missing,
|
|
mismatched,
|
|
ioErrors,
|
|
usedByEntrypoint,
|
|
unsupported.ToArray(),
|
|
evidence);
|
|
}
|
|
|
|
private static async Task<string> ComputeSha256Base64Async(string path, CancellationToken cancellationToken)
|
|
{
|
|
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
|
|
|
using var sha = SHA256.Create();
|
|
var buffer = ArrayPool<byte>.Shared.Rent(81920);
|
|
try
|
|
{
|
|
int bytesRead;
|
|
while ((bytesRead = await stream.ReadAsync(buffer.AsMemory(0, buffer.Length), cancellationToken).ConfigureAwait(false)) > 0)
|
|
{
|
|
sha.TransformBlock(buffer, 0, bytesRead, null, 0);
|
|
}
|
|
|
|
sha.TransformFinalBlock(Array.Empty<byte>(), 0, 0);
|
|
return Convert.ToBase64String(sha.Hash ?? Array.Empty<byte>());
|
|
}
|
|
finally
|
|
{
|
|
ArrayPool<byte>.Shared.Return(buffer);
|
|
}
|
|
}
|
|
}
|
|
|
|
internal sealed class PythonDirectUrlInfo
|
|
{
|
|
public string? Url { get; }
|
|
public bool IsEditable { get; }
|
|
public string? Subdirectory { get; }
|
|
public string? Vcs { get; }
|
|
public string? Commit { get; }
|
|
|
|
private PythonDirectUrlInfo(string? url, bool isEditable, string? subdirectory, string? vcs, string? commit)
|
|
{
|
|
Url = url;
|
|
IsEditable = isEditable;
|
|
Subdirectory = subdirectory;
|
|
Vcs = vcs;
|
|
Commit = commit;
|
|
}
|
|
|
|
public static async Task<PythonDirectUrlInfo?> LoadAsync(string path, CancellationToken cancellationToken)
|
|
{
|
|
if (!File.Exists(path))
|
|
{
|
|
return null;
|
|
}
|
|
|
|
await using var stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
|
|
using var document = await JsonDocument.ParseAsync(stream, cancellationToken: cancellationToken).ConfigureAwait(false);
|
|
var root = document.RootElement;
|
|
|
|
var url = root.TryGetProperty("url", out var urlElement) ? urlElement.GetString() : null;
|
|
var isEditable = root.TryGetProperty("dir_info", out var dirInfo) && dirInfo.TryGetProperty("editable", out var editableValue) && editableValue.GetBoolean();
|
|
var subdir = root.TryGetProperty("dir_info", out dirInfo) && dirInfo.TryGetProperty("subdirectory", out var subdirElement) ? subdirElement.GetString() : null;
|
|
|
|
string? vcs = null;
|
|
string? commit = null;
|
|
|
|
if (root.TryGetProperty("vcs_info", out var vcsInfo))
|
|
{
|
|
vcs = vcsInfo.TryGetProperty("vcs", out var vcsElement) ? vcsElement.GetString() : null;
|
|
commit = vcsInfo.TryGetProperty("commit_id", out var commitElement) ? commitElement.GetString() : null;
|
|
}
|
|
|
|
return new PythonDirectUrlInfo(url, isEditable, subdir, vcs, commit);
|
|
}
|
|
|
|
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
|
|
{
|
|
var entries = new List<KeyValuePair<string, string?>>();
|
|
|
|
if (IsEditable)
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("editable", "true"));
|
|
}
|
|
|
|
if (!string.IsNullOrWhiteSpace(Url))
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("sourceUrl", Url));
|
|
}
|
|
|
|
if (!string.IsNullOrWhiteSpace(Subdirectory))
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("sourceSubdirectory", Subdirectory));
|
|
}
|
|
|
|
if (!string.IsNullOrWhiteSpace(Vcs))
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("sourceVcs", Vcs));
|
|
}
|
|
|
|
if (!string.IsNullOrWhiteSpace(Commit))
|
|
{
|
|
entries.Add(new KeyValuePair<string, string?>("sourceCommit", Commit));
|
|
}
|
|
|
|
return entries;
|
|
}
|
|
}
|
|
|
|
internal static class PythonPathHelper
|
|
{
|
|
public static string NormalizeRelative(LanguageAnalyzerContext context, string path)
|
|
{
|
|
var relative = context.GetRelativePath(path);
|
|
if (string.IsNullOrEmpty(relative) || relative == ".")
|
|
{
|
|
return ".";
|
|
}
|
|
|
|
return relative;
|
|
}
|
|
}
|
|
|
|
internal static class PythonEncoding
|
|
{
|
|
public static readonly UTF8Encoding Utf8 = new(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
|
|
}
|