up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
This commit is contained in:
@@ -0,0 +1,423 @@
|
||||
using System.Collections.Frozen;
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
|
||||
|
||||
/// <summary>
|
||||
/// Detects framework usage hints from Python source code.
|
||||
/// </summary>
|
||||
internal sealed partial class PythonFrameworkDetector
|
||||
{
|
||||
// File patterns that strongly indicate frameworks
|
||||
private static readonly FrozenDictionary<string, (PythonFrameworkKind Kind, PythonFrameworkConfidence Confidence)> FilePatterns =
|
||||
new Dictionary<string, (PythonFrameworkKind, PythonFrameworkConfidence)>(StringComparer.OrdinalIgnoreCase)
|
||||
{
|
||||
// Django
|
||||
["manage.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
|
||||
["settings.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.Medium),
|
||||
["urls.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.Medium),
|
||||
["wsgi.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.Medium),
|
||||
["asgi.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.Medium),
|
||||
|
||||
// Celery
|
||||
["celery.py"] = (PythonFrameworkKind.Celery, PythonFrameworkConfidence.High),
|
||||
["tasks.py"] = (PythonFrameworkKind.Celery, PythonFrameworkConfidence.Low),
|
||||
|
||||
// Gunicorn
|
||||
["gunicorn.conf.py"] = (PythonFrameworkKind.Gunicorn, PythonFrameworkConfidence.Definitive),
|
||||
["gunicorn_config.py"] = (PythonFrameworkKind.Gunicorn, PythonFrameworkConfidence.Definitive),
|
||||
|
||||
// uWSGI
|
||||
["uwsgi.ini"] = (PythonFrameworkKind.Uwsgi, PythonFrameworkConfidence.Definitive),
|
||||
|
||||
// Pytest
|
||||
["conftest.py"] = (PythonFrameworkKind.Pytest, PythonFrameworkConfidence.High),
|
||||
["pytest.ini"] = (PythonFrameworkKind.Pytest, PythonFrameworkConfidence.Definitive),
|
||||
|
||||
// Jupyter
|
||||
["*.ipynb"] = (PythonFrameworkKind.Jupyter, PythonFrameworkConfidence.Definitive),
|
||||
}.ToFrozenDictionary();
|
||||
|
||||
// Import patterns that indicate frameworks
|
||||
private static readonly FrozenDictionary<string, (PythonFrameworkKind Kind, PythonFrameworkConfidence Confidence)> ImportPatterns =
|
||||
new Dictionary<string, (PythonFrameworkKind, PythonFrameworkConfidence)>(StringComparer.Ordinal)
|
||||
{
|
||||
// Django
|
||||
["django"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
|
||||
["django.conf"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
|
||||
["django.urls"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
|
||||
["django.views"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
|
||||
["django.db"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
|
||||
|
||||
// Flask
|
||||
["flask"] = (PythonFrameworkKind.Flask, PythonFrameworkConfidence.High),
|
||||
["flask_restful"] = (PythonFrameworkKind.Flask, PythonFrameworkConfidence.High),
|
||||
["flask_sqlalchemy"] = (PythonFrameworkKind.Flask, PythonFrameworkConfidence.High),
|
||||
|
||||
// FastAPI
|
||||
["fastapi"] = (PythonFrameworkKind.FastAPI, PythonFrameworkConfidence.High),
|
||||
["starlette"] = (PythonFrameworkKind.Starlette, PythonFrameworkConfidence.High),
|
||||
|
||||
// Celery
|
||||
["celery"] = (PythonFrameworkKind.Celery, PythonFrameworkConfidence.High),
|
||||
|
||||
// RQ
|
||||
["rq"] = (PythonFrameworkKind.RQ, PythonFrameworkConfidence.High),
|
||||
|
||||
// Click
|
||||
["click"] = (PythonFrameworkKind.Click, PythonFrameworkConfidence.High),
|
||||
|
||||
// Typer
|
||||
["typer"] = (PythonFrameworkKind.Typer, PythonFrameworkConfidence.High),
|
||||
|
||||
// Pytest
|
||||
["pytest"] = (PythonFrameworkKind.Pytest, PythonFrameworkConfidence.High),
|
||||
|
||||
// Streamlit
|
||||
["streamlit"] = (PythonFrameworkKind.Streamlit, PythonFrameworkConfidence.Definitive),
|
||||
|
||||
// Gradio
|
||||
["gradio"] = (PythonFrameworkKind.Gradio, PythonFrameworkConfidence.Definitive),
|
||||
|
||||
// Pydantic Settings
|
||||
["pydantic_settings"] = (PythonFrameworkKind.PydanticSettings, PythonFrameworkConfidence.Definitive),
|
||||
}.ToFrozenDictionary();
|
||||
|
||||
// Django patterns
|
||||
[GeneratedRegex(@"INSTALLED_APPS\s*=\s*\[", RegexOptions.Compiled)]
|
||||
private static partial Regex DjangoInstalledAppsPattern();
|
||||
|
||||
[GeneratedRegex(@"MIDDLEWARE\s*=\s*\[", RegexOptions.Compiled)]
|
||||
private static partial Regex DjangoMiddlewarePattern();
|
||||
|
||||
[GeneratedRegex(@"ROOT_URLCONF\s*=", RegexOptions.Compiled)]
|
||||
private static partial Regex DjangoRootUrlConfPattern();
|
||||
|
||||
[GeneratedRegex(@"os\.environ\.setdefault\s*\(\s*[""']DJANGO_SETTINGS_MODULE[""']", RegexOptions.Compiled)]
|
||||
private static partial Regex DjangoSettingsModulePattern();
|
||||
|
||||
// Flask patterns
|
||||
[GeneratedRegex(@"Flask\s*\(\s*__name__", RegexOptions.Compiled)]
|
||||
private static partial Regex FlaskAppPattern();
|
||||
|
||||
[GeneratedRegex(@"Blueprint\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex FlaskBlueprintPattern();
|
||||
|
||||
// FastAPI patterns
|
||||
[GeneratedRegex(@"FastAPI\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex FastAPIAppPattern();
|
||||
|
||||
[GeneratedRegex(@"APIRouter\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex FastAPIRouterPattern();
|
||||
|
||||
// Celery patterns
|
||||
[GeneratedRegex(@"Celery\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex CeleryAppPattern();
|
||||
|
||||
[GeneratedRegex(@"@\s*(?:app\.task|celery\.task|shared_task)", RegexOptions.Compiled)]
|
||||
private static partial Regex CeleryTaskPattern();
|
||||
|
||||
// AWS Lambda patterns
|
||||
[GeneratedRegex(@"def\s+(lambda_handler|handler)\s*\(\s*event\s*,\s*context\s*\)", RegexOptions.Compiled)]
|
||||
private static partial Regex LambdaHandlerPattern();
|
||||
|
||||
[GeneratedRegex(@"def\s+\w+\s*\(\s*event\s*:\s*dict\s*,\s*context\s*:\s*LambdaContext", RegexOptions.Compiled)]
|
||||
private static partial Regex LambdaTypedHandlerPattern();
|
||||
|
||||
// Click patterns
|
||||
[GeneratedRegex(@"@\s*click\.command", RegexOptions.Compiled)]
|
||||
private static partial Regex ClickCommandPattern();
|
||||
|
||||
[GeneratedRegex(@"@\s*click\.group", RegexOptions.Compiled)]
|
||||
private static partial Regex ClickGroupPattern();
|
||||
|
||||
// Typer patterns
|
||||
[GeneratedRegex(@"typer\.Typer\s*\(", RegexOptions.Compiled)]
|
||||
private static partial Regex TyperAppPattern();
|
||||
|
||||
[GeneratedRegex(@"@\s*app\.command", RegexOptions.Compiled)]
|
||||
private static partial Regex TyperCommandPattern();
|
||||
|
||||
// Logging patterns
|
||||
[GeneratedRegex(@"logging\.config\.dictConfig", RegexOptions.Compiled)]
|
||||
private static partial Regex LoggingDictConfigPattern();
|
||||
|
||||
[GeneratedRegex(@"logging\.config\.fileConfig", RegexOptions.Compiled)]
|
||||
private static partial Regex LoggingFileConfigPattern();
|
||||
|
||||
[GeneratedRegex(@"LOGGING\s*=\s*\{", RegexOptions.Compiled)]
|
||||
private static partial Regex DjangoLoggingPattern();
|
||||
|
||||
// Gunicorn patterns
|
||||
[GeneratedRegex(@"bind\s*=\s*[""']", RegexOptions.Compiled)]
|
||||
private static partial Regex GunicornBindPattern();
|
||||
|
||||
[GeneratedRegex(@"workers\s*=", RegexOptions.Compiled)]
|
||||
private static partial Regex GunicornWorkersPattern();
|
||||
|
||||
/// <summary>
|
||||
/// Detects framework hints from Python source code.
|
||||
/// </summary>
|
||||
public async Task<ImmutableArray<PythonFrameworkHint>> DetectAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var hints = new List<PythonFrameworkHint>();
|
||||
|
||||
// First pass: check file patterns
|
||||
foreach (var file in vfs.Files)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var fileName = Path.GetFileName(file.VirtualPath);
|
||||
if (FilePatterns.TryGetValue(fileName, out var fileHint))
|
||||
{
|
||||
hints.Add(new PythonFrameworkHint(
|
||||
Kind: fileHint.Kind,
|
||||
SourceFile: file.VirtualPath,
|
||||
LineNumber: null,
|
||||
Evidence: $"file pattern: {fileName}",
|
||||
Confidence: fileHint.Confidence));
|
||||
}
|
||||
|
||||
// Special case for Jupyter notebooks
|
||||
if (file.VirtualPath.EndsWith(".ipynb", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
hints.Add(new PythonFrameworkHint(
|
||||
Kind: PythonFrameworkKind.Jupyter,
|
||||
SourceFile: file.VirtualPath,
|
||||
LineNumber: null,
|
||||
Evidence: "Jupyter notebook file",
|
||||
Confidence: PythonFrameworkConfidence.Definitive));
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: scan Python files for patterns
|
||||
var pythonFiles = vfs.Files
|
||||
.Where(f => f.VirtualPath.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
|
||||
.ToList();
|
||||
|
||||
foreach (var file in pythonFiles)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var fileHints = await DetectInFileAsync(vfs, file, cancellationToken).ConfigureAwait(false);
|
||||
hints.AddRange(fileHints);
|
||||
}
|
||||
|
||||
// Deduplicate and prioritize by confidence
|
||||
return hints
|
||||
.GroupBy(h => (h.Kind, h.SourceFile))
|
||||
.Select(g => g.OrderByDescending(h => h.Confidence).First())
|
||||
.ToImmutableArray();
|
||||
}
|
||||
|
||||
private async Task<IEnumerable<PythonFrameworkHint>> DetectInFileAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
PythonVirtualFile file,
|
||||
CancellationToken cancellationToken)
|
||||
{
|
||||
var hints = new List<PythonFrameworkHint>();
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return hints;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
var lines = content.Split('\n');
|
||||
|
||||
for (var lineNum = 0; lineNum < lines.Length; lineNum++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var line = lines[lineNum];
|
||||
var trimmed = line.TrimStart();
|
||||
|
||||
// Skip comments
|
||||
if (trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for imports
|
||||
if (trimmed.StartsWith("import ", StringComparison.Ordinal) ||
|
||||
trimmed.StartsWith("from ", StringComparison.Ordinal))
|
||||
{
|
||||
var importHints = DetectImportPatterns(trimmed, file.VirtualPath, lineNum + 1);
|
||||
hints.AddRange(importHints);
|
||||
}
|
||||
|
||||
// Django patterns
|
||||
if (DjangoInstalledAppsPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Django, file.VirtualPath, lineNum + 1,
|
||||
"INSTALLED_APPS configuration", PythonFrameworkConfidence.Definitive));
|
||||
}
|
||||
|
||||
if (DjangoSettingsModulePattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Django, file.VirtualPath, lineNum + 1,
|
||||
"DJANGO_SETTINGS_MODULE", PythonFrameworkConfidence.Definitive));
|
||||
}
|
||||
|
||||
// Flask patterns
|
||||
if (FlaskAppPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Flask, file.VirtualPath, lineNum + 1,
|
||||
"Flask(__name__)", PythonFrameworkConfidence.Definitive));
|
||||
}
|
||||
|
||||
if (FlaskBlueprintPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Flask, file.VirtualPath, lineNum + 1,
|
||||
"Blueprint()", PythonFrameworkConfidence.High));
|
||||
}
|
||||
|
||||
// FastAPI patterns
|
||||
if (FastAPIAppPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.FastAPI, file.VirtualPath, lineNum + 1,
|
||||
"FastAPI()", PythonFrameworkConfidence.Definitive));
|
||||
}
|
||||
|
||||
if (FastAPIRouterPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.FastAPI, file.VirtualPath, lineNum + 1,
|
||||
"APIRouter()", PythonFrameworkConfidence.High));
|
||||
}
|
||||
|
||||
// Celery patterns
|
||||
if (CeleryAppPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Celery, file.VirtualPath, lineNum + 1,
|
||||
"Celery()", PythonFrameworkConfidence.Definitive));
|
||||
}
|
||||
|
||||
if (CeleryTaskPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Celery, file.VirtualPath, lineNum + 1,
|
||||
"@app.task decorator", PythonFrameworkConfidence.High));
|
||||
}
|
||||
|
||||
// AWS Lambda patterns
|
||||
if (LambdaHandlerPattern().IsMatch(line) || LambdaTypedHandlerPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.AwsLambda, file.VirtualPath, lineNum + 1,
|
||||
"Lambda handler function", PythonFrameworkConfidence.High));
|
||||
}
|
||||
|
||||
// Click patterns
|
||||
if (ClickCommandPattern().IsMatch(line) || ClickGroupPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Click, file.VirtualPath, lineNum + 1,
|
||||
"@click.command/group", PythonFrameworkConfidence.High));
|
||||
}
|
||||
|
||||
// Typer patterns
|
||||
if (TyperAppPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Typer, file.VirtualPath, lineNum + 1,
|
||||
"typer.Typer()", PythonFrameworkConfidence.Definitive));
|
||||
}
|
||||
|
||||
if (TyperCommandPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Typer, file.VirtualPath, lineNum + 1,
|
||||
"@app.command", PythonFrameworkConfidence.High));
|
||||
}
|
||||
|
||||
// Logging patterns
|
||||
if (LoggingDictConfigPattern().IsMatch(line) || LoggingFileConfigPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.LoggingConfig, file.VirtualPath, lineNum + 1,
|
||||
"logging.config", PythonFrameworkConfidence.High));
|
||||
}
|
||||
|
||||
if (DjangoLoggingPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.LoggingConfig, file.VirtualPath, lineNum + 1,
|
||||
"Django LOGGING dict", PythonFrameworkConfidence.High));
|
||||
}
|
||||
|
||||
// Gunicorn patterns (in config files)
|
||||
if (file.VirtualPath.Contains("gunicorn", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
if (GunicornBindPattern().IsMatch(line) || GunicornWorkersPattern().IsMatch(line))
|
||||
{
|
||||
hints.Add(CreateHint(PythonFrameworkKind.Gunicorn, file.VirtualPath, lineNum + 1,
|
||||
"Gunicorn configuration", PythonFrameworkConfidence.Definitive));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Skip unreadable files
|
||||
}
|
||||
|
||||
return hints;
|
||||
}
|
||||
|
||||
private static IEnumerable<PythonFrameworkHint> DetectImportPatterns(string line, string sourceFile, int lineNumber)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
string? moduleName = null;
|
||||
|
||||
if (trimmed.StartsWith("import ", StringComparison.Ordinal))
|
||||
{
|
||||
var parts = trimmed[7..].Split(',');
|
||||
foreach (var part in parts)
|
||||
{
|
||||
moduleName = part.Trim().Split(new[] { " as ", " " }, StringSplitOptions.RemoveEmptyEntries)[0];
|
||||
if (ImportPatterns.TryGetValue(moduleName, out var hint))
|
||||
{
|
||||
yield return CreateHint(hint.Kind, sourceFile, lineNumber,
|
||||
$"import {moduleName}", hint.Confidence);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (trimmed.StartsWith("from ", StringComparison.Ordinal))
|
||||
{
|
||||
var parts = trimmed[5..].Split(new[] { " import " }, StringSplitOptions.RemoveEmptyEntries);
|
||||
if (parts.Length > 0)
|
||||
{
|
||||
moduleName = parts[0].Trim();
|
||||
// Check base module
|
||||
var baseModule = moduleName.Split('.')[0];
|
||||
if (ImportPatterns.TryGetValue(baseModule, out var hint))
|
||||
{
|
||||
yield return CreateHint(hint.Kind, sourceFile, lineNumber,
|
||||
$"from {moduleName}", hint.Confidence);
|
||||
}
|
||||
// Check full module path
|
||||
if (ImportPatterns.TryGetValue(moduleName, out hint))
|
||||
{
|
||||
yield return CreateHint(hint.Kind, sourceFile, lineNumber,
|
||||
$"from {moduleName}", hint.Confidence);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static PythonFrameworkHint CreateHint(
|
||||
PythonFrameworkKind kind,
|
||||
string sourceFile,
|
||||
int lineNumber,
|
||||
string evidence,
|
||||
PythonFrameworkConfidence confidence)
|
||||
{
|
||||
return new PythonFrameworkHint(
|
||||
Kind: kind,
|
||||
SourceFile: sourceFile,
|
||||
LineNumber: lineNumber,
|
||||
Evidence: evidence,
|
||||
Confidence: confidence);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,151 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a detected framework or configuration hint in a Python project.
|
||||
/// These are hints/suggestions, not definitive detections.
|
||||
/// </summary>
|
||||
/// <param name="Kind">The type of framework or configuration.</param>
|
||||
/// <param name="SourceFile">The file where this hint was detected.</param>
|
||||
/// <param name="LineNumber">The line number (if available).</param>
|
||||
/// <param name="Evidence">The code pattern that indicated this hint.</param>
|
||||
/// <param name="Confidence">Confidence level for this detection.</param>
|
||||
/// <param name="Metadata">Additional metadata about the detection.</param>
|
||||
internal sealed record PythonFrameworkHint(
|
||||
PythonFrameworkKind Kind,
|
||||
string SourceFile,
|
||||
int? LineNumber,
|
||||
string Evidence,
|
||||
PythonFrameworkConfidence Confidence,
|
||||
ImmutableDictionary<string, string>? Metadata = null)
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets whether this is a web framework.
|
||||
/// </summary>
|
||||
public bool IsWebFramework => Kind is
|
||||
PythonFrameworkKind.Django or
|
||||
PythonFrameworkKind.Flask or
|
||||
PythonFrameworkKind.FastAPI or
|
||||
PythonFrameworkKind.Starlette or
|
||||
PythonFrameworkKind.Tornado or
|
||||
PythonFrameworkKind.Bottle or
|
||||
PythonFrameworkKind.Pyramid;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a task queue.
|
||||
/// </summary>
|
||||
public bool IsTaskQueue => Kind is
|
||||
PythonFrameworkKind.Celery or
|
||||
PythonFrameworkKind.RQ or
|
||||
PythonFrameworkKind.Huey or
|
||||
PythonFrameworkKind.Dramatiq;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a serverless runtime.
|
||||
/// </summary>
|
||||
public bool IsServerless => Kind is
|
||||
PythonFrameworkKind.AwsLambda or
|
||||
PythonFrameworkKind.AzureFunctions or
|
||||
PythonFrameworkKind.GoogleCloudFunctions;
|
||||
|
||||
/// <summary>
|
||||
/// Gets whether this is a CLI framework.
|
||||
/// </summary>
|
||||
public bool IsCliFramework => Kind is
|
||||
PythonFrameworkKind.Click or
|
||||
PythonFrameworkKind.Typer or
|
||||
PythonFrameworkKind.Argparse;
|
||||
|
||||
/// <summary>
|
||||
/// Generates metadata entries for this hint.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
|
||||
{
|
||||
yield return new($"{prefix}.kind", Kind.ToString());
|
||||
yield return new($"{prefix}.file", SourceFile);
|
||||
|
||||
if (LineNumber.HasValue)
|
||||
{
|
||||
yield return new($"{prefix}.line", LineNumber.Value.ToString());
|
||||
}
|
||||
|
||||
yield return new($"{prefix}.evidence", Evidence);
|
||||
yield return new($"{prefix}.confidence", Confidence.ToString());
|
||||
|
||||
if (IsWebFramework)
|
||||
{
|
||||
yield return new($"{prefix}.category", "WebFramework");
|
||||
}
|
||||
else if (IsTaskQueue)
|
||||
{
|
||||
yield return new($"{prefix}.category", "TaskQueue");
|
||||
}
|
||||
else if (IsServerless)
|
||||
{
|
||||
yield return new($"{prefix}.category", "Serverless");
|
||||
}
|
||||
else if (IsCliFramework)
|
||||
{
|
||||
yield return new($"{prefix}.category", "CLI");
|
||||
}
|
||||
|
||||
if (Metadata is not null)
|
||||
{
|
||||
foreach (var (key, value) in Metadata)
|
||||
{
|
||||
yield return new($"{prefix}.{key}", value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents an AWS Lambda handler configuration.
|
||||
/// </summary>
|
||||
/// <param name="HandlerPath">The handler path (module.function).</param>
|
||||
/// <param name="ModulePath">The module file path.</param>
|
||||
/// <param name="FunctionName">The handler function name.</param>
|
||||
/// <param name="Runtime">The detected Python runtime (if available).</param>
|
||||
internal sealed record PythonLambdaHandler(
|
||||
string HandlerPath,
|
||||
string ModulePath,
|
||||
string FunctionName,
|
||||
string? Runtime = null);
|
||||
|
||||
/// <summary>
|
||||
/// Represents a Django project configuration.
|
||||
/// </summary>
|
||||
/// <param name="SettingsModule">The settings module path.</param>
|
||||
/// <param name="InstalledApps">List of installed apps.</param>
|
||||
/// <param name="Middlewares">List of middleware classes.</param>
|
||||
/// <param name="RootUrlConf">The root URL configuration module.</param>
|
||||
internal sealed record PythonDjangoConfig(
|
||||
string SettingsModule,
|
||||
ImmutableArray<string> InstalledApps,
|
||||
ImmutableArray<string> Middlewares,
|
||||
string? RootUrlConf = null);
|
||||
|
||||
/// <summary>
|
||||
/// Represents a Flask application configuration.
|
||||
/// </summary>
|
||||
/// <param name="AppVariable">The Flask app variable name.</param>
|
||||
/// <param name="ModulePath">The module containing the app.</param>
|
||||
/// <param name="Blueprints">Registered blueprints.</param>
|
||||
internal sealed record PythonFlaskConfig(
|
||||
string AppVariable,
|
||||
string ModulePath,
|
||||
ImmutableArray<string> Blueprints);
|
||||
|
||||
/// <summary>
|
||||
/// Represents a Celery configuration.
|
||||
/// </summary>
|
||||
/// <param name="AppVariable">The Celery app variable name.</param>
|
||||
/// <param name="ModulePath">The module containing the app.</param>
|
||||
/// <param name="BrokerUrl">The broker URL pattern (if detected).</param>
|
||||
/// <param name="Tasks">Discovered task modules.</param>
|
||||
internal sealed record PythonCeleryConfig(
|
||||
string AppVariable,
|
||||
string ModulePath,
|
||||
string? BrokerUrl,
|
||||
ImmutableArray<string> Tasks);
|
||||
@@ -0,0 +1,186 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
|
||||
|
||||
/// <summary>
|
||||
/// Types of Python frameworks and configurations detected.
|
||||
/// </summary>
|
||||
internal enum PythonFrameworkKind
|
||||
{
|
||||
/// <summary>
|
||||
/// Unknown framework.
|
||||
/// </summary>
|
||||
Unknown,
|
||||
|
||||
// Web Frameworks
|
||||
/// <summary>
|
||||
/// Django web framework.
|
||||
/// </summary>
|
||||
Django,
|
||||
|
||||
/// <summary>
|
||||
/// Flask web framework.
|
||||
/// </summary>
|
||||
Flask,
|
||||
|
||||
/// <summary>
|
||||
/// FastAPI async web framework.
|
||||
/// </summary>
|
||||
FastAPI,
|
||||
|
||||
/// <summary>
|
||||
/// Starlette ASGI framework.
|
||||
/// </summary>
|
||||
Starlette,
|
||||
|
||||
/// <summary>
|
||||
/// Tornado async web framework.
|
||||
/// </summary>
|
||||
Tornado,
|
||||
|
||||
/// <summary>
|
||||
/// Bottle micro framework.
|
||||
/// </summary>
|
||||
Bottle,
|
||||
|
||||
/// <summary>
|
||||
/// Pyramid web framework.
|
||||
/// </summary>
|
||||
Pyramid,
|
||||
|
||||
// Task Queues
|
||||
/// <summary>
|
||||
/// Celery distributed task queue.
|
||||
/// </summary>
|
||||
Celery,
|
||||
|
||||
/// <summary>
|
||||
/// RQ (Redis Queue) task queue.
|
||||
/// </summary>
|
||||
RQ,
|
||||
|
||||
/// <summary>
|
||||
/// Huey task queue.
|
||||
/// </summary>
|
||||
Huey,
|
||||
|
||||
/// <summary>
|
||||
/// Dramatiq task queue.
|
||||
/// </summary>
|
||||
Dramatiq,
|
||||
|
||||
// Serverless
|
||||
/// <summary>
|
||||
/// AWS Lambda handler.
|
||||
/// </summary>
|
||||
AwsLambda,
|
||||
|
||||
/// <summary>
|
||||
/// Azure Functions.
|
||||
/// </summary>
|
||||
AzureFunctions,
|
||||
|
||||
/// <summary>
|
||||
/// Google Cloud Functions.
|
||||
/// </summary>
|
||||
GoogleCloudFunctions,
|
||||
|
||||
// Application Servers
|
||||
/// <summary>
|
||||
/// Gunicorn WSGI server.
|
||||
/// </summary>
|
||||
Gunicorn,
|
||||
|
||||
/// <summary>
|
||||
/// uWSGI server.
|
||||
/// </summary>
|
||||
Uwsgi,
|
||||
|
||||
/// <summary>
|
||||
/// Uvicorn ASGI server.
|
||||
/// </summary>
|
||||
Uvicorn,
|
||||
|
||||
/// <summary>
|
||||
/// Hypercorn ASGI server.
|
||||
/// </summary>
|
||||
Hypercorn,
|
||||
|
||||
// CLI Frameworks
|
||||
/// <summary>
|
||||
/// Click CLI framework.
|
||||
/// </summary>
|
||||
Click,
|
||||
|
||||
/// <summary>
|
||||
/// Typer CLI framework (Click-based).
|
||||
/// </summary>
|
||||
Typer,
|
||||
|
||||
/// <summary>
|
||||
/// Argparse standard library CLI.
|
||||
/// </summary>
|
||||
Argparse,
|
||||
|
||||
// Testing Frameworks
|
||||
/// <summary>
|
||||
/// Pytest testing framework.
|
||||
/// </summary>
|
||||
Pytest,
|
||||
|
||||
/// <summary>
|
||||
/// Unittest standard library.
|
||||
/// </summary>
|
||||
Unittest,
|
||||
|
||||
// Data/ML Frameworks
|
||||
/// <summary>
|
||||
/// Jupyter notebook.
|
||||
/// </summary>
|
||||
Jupyter,
|
||||
|
||||
/// <summary>
|
||||
/// Streamlit data app.
|
||||
/// </summary>
|
||||
Streamlit,
|
||||
|
||||
/// <summary>
|
||||
/// Gradio ML demo.
|
||||
/// </summary>
|
||||
Gradio,
|
||||
|
||||
// Configuration
|
||||
/// <summary>
|
||||
/// Python logging configuration.
|
||||
/// </summary>
|
||||
LoggingConfig,
|
||||
|
||||
/// <summary>
|
||||
/// Pydantic settings configuration.
|
||||
/// </summary>
|
||||
PydanticSettings
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for framework detection.
|
||||
/// </summary>
|
||||
internal enum PythonFrameworkConfidence
|
||||
{
|
||||
/// <summary>
|
||||
/// Low confidence - heuristic match based on file patterns.
|
||||
/// </summary>
|
||||
Low = 0,
|
||||
|
||||
/// <summary>
|
||||
/// Medium confidence - import detected but usage unclear.
|
||||
/// </summary>
|
||||
Medium = 1,
|
||||
|
||||
/// <summary>
|
||||
/// High confidence - clear usage pattern detected.
|
||||
/// </summary>
|
||||
High = 2,
|
||||
|
||||
/// <summary>
|
||||
/// Definitive - explicit configuration or initialization found.
|
||||
/// </summary>
|
||||
Definitive = 3
|
||||
}
|
||||
@@ -0,0 +1,327 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.RegularExpressions;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
|
||||
|
||||
/// <summary>
|
||||
/// Parses Python project configuration files (pyproject.toml, setup.cfg, setup.py).
|
||||
/// </summary>
|
||||
internal sealed partial class PythonProjectConfigParser
|
||||
{
|
||||
// pyproject.toml patterns
|
||||
[GeneratedRegex(@"^\[project\]", RegexOptions.Compiled | RegexOptions.Multiline)]
|
||||
private static partial Regex PyprojectProjectSection();
|
||||
|
||||
[GeneratedRegex(@"^\[project\.optional-dependencies\]", RegexOptions.Compiled | RegexOptions.Multiline)]
|
||||
private static partial Regex PyprojectOptionalDepsSection();
|
||||
|
||||
[GeneratedRegex(@"^\[tool\.poetry\.extras\]", RegexOptions.Compiled | RegexOptions.Multiline)]
|
||||
private static partial Regex PoetryExtrasSection();
|
||||
|
||||
[GeneratedRegex(@"^\[tool\.poetry\.group\.(\w+)\.dependencies\]", RegexOptions.Compiled | RegexOptions.Multiline)]
|
||||
private static partial Regex PoetryGroupSection();
|
||||
|
||||
// Pattern to extract key = value or key = [...] lines
|
||||
[GeneratedRegex(@"^(\w+)\s*=\s*\[(.*?)\]", RegexOptions.Compiled | RegexOptions.Singleline)]
|
||||
private static partial Regex ArrayValuePattern();
|
||||
|
||||
[GeneratedRegex(@"^name\s*=\s*[""']([^""']+)[""']", RegexOptions.Compiled | RegexOptions.Multiline)]
|
||||
private static partial Regex ProjectNamePattern();
|
||||
|
||||
[GeneratedRegex(@"^version\s*=\s*[""']([^""']+)[""']", RegexOptions.Compiled | RegexOptions.Multiline)]
|
||||
private static partial Regex ProjectVersionPattern();
|
||||
|
||||
/// <summary>
|
||||
/// Parses pyproject.toml and extracts optional dependencies.
|
||||
/// </summary>
|
||||
public async Task<PythonProjectConfig?> ParsePyprojectAsync(
|
||||
PythonVirtualFileSystem vfs,
|
||||
string pyprojectPath,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
using var stream = await vfs.OpenReadAsync(pyprojectPath, cancellationToken).ConfigureAwait(false);
|
||||
if (stream is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
|
||||
|
||||
return ParsePyprojectContent(content, pyprojectPath);
|
||||
}
|
||||
|
||||
private static PythonProjectConfig ParsePyprojectContent(string content, string filePath)
|
||||
{
|
||||
string? projectName = null;
|
||||
string? projectVersion = null;
|
||||
var optionalDependencies = new Dictionary<string, ImmutableArray<string>>();
|
||||
var scripts = new Dictionary<string, string>();
|
||||
var extras = new List<string>();
|
||||
|
||||
// Extract project name and version
|
||||
var nameMatch = ProjectNamePattern().Match(content);
|
||||
if (nameMatch.Success)
|
||||
{
|
||||
projectName = nameMatch.Groups[1].Value;
|
||||
}
|
||||
|
||||
var versionMatch = ProjectVersionPattern().Match(content);
|
||||
if (versionMatch.Success)
|
||||
{
|
||||
projectVersion = versionMatch.Groups[1].Value;
|
||||
}
|
||||
|
||||
// Parse optional dependencies section
|
||||
var optDepsMatch = PyprojectOptionalDepsSection().Match(content);
|
||||
if (optDepsMatch.Success)
|
||||
{
|
||||
var sectionStart = optDepsMatch.Index + optDepsMatch.Length;
|
||||
var sectionContent = ExtractSectionContent(content, sectionStart);
|
||||
optionalDependencies = ParseOptionalDependencies(sectionContent);
|
||||
extras.AddRange(optionalDependencies.Keys);
|
||||
}
|
||||
|
||||
// Parse Poetry extras section
|
||||
var poetryExtrasMatch = PoetryExtrasSection().Match(content);
|
||||
if (poetryExtrasMatch.Success)
|
||||
{
|
||||
var sectionStart = poetryExtrasMatch.Index + poetryExtrasMatch.Length;
|
||||
var sectionContent = ExtractSectionContent(content, sectionStart);
|
||||
var poetryExtras = ParseOptionalDependencies(sectionContent);
|
||||
foreach (var (key, value) in poetryExtras)
|
||||
{
|
||||
if (!optionalDependencies.ContainsKey(key))
|
||||
{
|
||||
optionalDependencies[key] = value;
|
||||
extras.Add(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parse Poetry group dependencies
|
||||
foreach (Match groupMatch in PoetryGroupSection().Matches(content))
|
||||
{
|
||||
var groupName = groupMatch.Groups[1].Value;
|
||||
if (!extras.Contains(groupName))
|
||||
{
|
||||
extras.Add(groupName);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse scripts section
|
||||
scripts = ParseScriptsSection(content);
|
||||
|
||||
return new PythonProjectConfig(
|
||||
FilePath: filePath,
|
||||
ProjectName: projectName,
|
||||
ProjectVersion: projectVersion,
|
||||
OptionalDependencies: optionalDependencies.ToImmutableDictionary(),
|
||||
Extras: extras.Distinct().ToImmutableArray(),
|
||||
Scripts: scripts.ToImmutableDictionary());
|
||||
}
|
||||
|
||||
private static string ExtractSectionContent(string content, int startIndex)
|
||||
{
|
||||
// Find the next section header or end of file
|
||||
var nextSection = content.IndexOf("\n[", startIndex, StringComparison.Ordinal);
|
||||
if (nextSection < 0)
|
||||
{
|
||||
return content[startIndex..];
|
||||
}
|
||||
return content[startIndex..nextSection];
|
||||
}
|
||||
|
||||
private static Dictionary<string, ImmutableArray<string>> ParseOptionalDependencies(string sectionContent)
|
||||
{
|
||||
var result = new Dictionary<string, ImmutableArray<string>>();
|
||||
var lines = sectionContent.Split('\n');
|
||||
|
||||
string? currentKey = null;
|
||||
var currentValues = new List<string>();
|
||||
var inArray = false;
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for new key
|
||||
if (!inArray && trimmed.Contains('='))
|
||||
{
|
||||
// Save previous key
|
||||
if (currentKey is not null)
|
||||
{
|
||||
result[currentKey] = currentValues.ToImmutableArray();
|
||||
currentValues = [];
|
||||
}
|
||||
|
||||
var parts = trimmed.Split('=', 2);
|
||||
currentKey = parts[0].Trim();
|
||||
var value = parts.Length > 1 ? parts[1].Trim() : "";
|
||||
|
||||
if (value.StartsWith('['))
|
||||
{
|
||||
if (value.EndsWith(']'))
|
||||
{
|
||||
// Single-line array
|
||||
currentValues = ParseArrayValues(value);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Multi-line array
|
||||
inArray = true;
|
||||
currentValues = ParseArrayValues(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (inArray)
|
||||
{
|
||||
if (trimmed.EndsWith(']'))
|
||||
{
|
||||
currentValues.AddRange(ParseArrayValues(trimmed));
|
||||
inArray = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentValues.AddRange(ParseArrayValues(trimmed));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save last key
|
||||
if (currentKey is not null)
|
||||
{
|
||||
result[currentKey] = currentValues.ToImmutableArray();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<string> ParseArrayValues(string value)
|
||||
{
|
||||
var result = new List<string>();
|
||||
var cleaned = value.Trim('[', ']', ' ', '\t');
|
||||
if (string.IsNullOrEmpty(cleaned))
|
||||
{
|
||||
return result;
|
||||
}
|
||||
|
||||
// Split by comma, handling quoted strings
|
||||
var parts = cleaned.Split(',');
|
||||
foreach (var part in parts)
|
||||
{
|
||||
var trimmed = part.Trim().Trim('"', '\'', ' ');
|
||||
if (!string.IsNullOrEmpty(trimmed))
|
||||
{
|
||||
result.Add(trimmed);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Dictionary<string, string> ParseScriptsSection(string content)
|
||||
{
|
||||
var result = new Dictionary<string, string>();
|
||||
|
||||
// Look for [project.scripts] or [tool.poetry.scripts]
|
||||
var scriptsPatterns = new[]
|
||||
{
|
||||
@"\[project\.scripts\]",
|
||||
@"\[tool\.poetry\.scripts\]"
|
||||
};
|
||||
|
||||
foreach (var pattern in scriptsPatterns)
|
||||
{
|
||||
var match = Regex.Match(content, pattern);
|
||||
if (match.Success)
|
||||
{
|
||||
var sectionStart = match.Index + match.Length;
|
||||
var sectionContent = ExtractSectionContent(content, sectionStart);
|
||||
var scripts = ParseKeyValueSection(sectionContent);
|
||||
foreach (var (key, value) in scripts)
|
||||
{
|
||||
result[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static Dictionary<string, string> ParseKeyValueSection(string sectionContent)
|
||||
{
|
||||
var result = new Dictionary<string, string>();
|
||||
var lines = sectionContent.Split('\n');
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmed.Contains('='))
|
||||
{
|
||||
var parts = trimmed.Split('=', 2);
|
||||
var key = parts[0].Trim();
|
||||
var value = parts.Length > 1 ? parts[1].Trim().Trim('"', '\'') : "";
|
||||
result[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Represents parsed Python project configuration.
|
||||
/// </summary>
|
||||
/// <param name="FilePath">Path to the configuration file.</param>
|
||||
/// <param name="ProjectName">The project name.</param>
|
||||
/// <param name="ProjectVersion">The project version.</param>
|
||||
/// <param name="OptionalDependencies">Optional dependencies by group name.</param>
|
||||
/// <param name="Extras">List of available extras.</param>
|
||||
/// <param name="Scripts">Entry point scripts.</param>
|
||||
internal sealed record PythonProjectConfig(
|
||||
string FilePath,
|
||||
string? ProjectName,
|
||||
string? ProjectVersion,
|
||||
ImmutableDictionary<string, ImmutableArray<string>> OptionalDependencies,
|
||||
ImmutableArray<string> Extras,
|
||||
ImmutableDictionary<string, string> Scripts)
|
||||
{
|
||||
/// <summary>
|
||||
/// Generates metadata entries for this configuration.
|
||||
/// </summary>
|
||||
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
|
||||
{
|
||||
yield return new($"{prefix}.path", FilePath);
|
||||
|
||||
if (ProjectName is not null)
|
||||
{
|
||||
yield return new($"{prefix}.name", ProjectName);
|
||||
}
|
||||
|
||||
if (ProjectVersion is not null)
|
||||
{
|
||||
yield return new($"{prefix}.version", ProjectVersion);
|
||||
}
|
||||
|
||||
if (Extras.Length > 0)
|
||||
{
|
||||
yield return new($"{prefix}.extras", string.Join(",", Extras));
|
||||
}
|
||||
|
||||
if (Scripts.Count > 0)
|
||||
{
|
||||
yield return new($"{prefix}.scripts", string.Join(",", Scripts.Keys));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,395 @@
|
||||
using System.Collections.Immutable;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Observations;
|
||||
|
||||
/// <summary>
|
||||
/// Builds AOC-compliant observation documents from analysis results.
|
||||
/// </summary>
|
||||
internal sealed class PythonObservationBuilder
|
||||
{
|
||||
private const string SchemaVersion = "python-aoc-v1";
|
||||
|
||||
private readonly List<PythonObservationPackage> _packages = [];
|
||||
private readonly List<PythonObservationModule> _modules = [];
|
||||
private readonly List<PythonObservationEntrypoint> _entrypoints = [];
|
||||
private readonly List<PythonObservationDependencyEdge> _dependencyEdges = [];
|
||||
private readonly List<PythonObservationImportEdge> _importEdges = [];
|
||||
private readonly List<PythonObservationNativeExtension> _nativeExtensions = [];
|
||||
private readonly List<PythonObservationFrameworkHint> _frameworks = [];
|
||||
private readonly List<PythonObservationWarning> _warnings = [];
|
||||
private readonly List<string> _securitySensitiveCapabilities = [];
|
||||
|
||||
private PythonObservationEnvironment? _environment;
|
||||
private PythonObservationRuntimeEvidence? _runtimeEvidence;
|
||||
|
||||
private bool _usesProcessExecution;
|
||||
private bool _usesNetworkAccess;
|
||||
private bool _usesFileSystem;
|
||||
private bool _usesCodeExecution;
|
||||
private bool _usesDeserialization;
|
||||
private bool _usesNativeCode;
|
||||
private bool _usesAsyncAwait;
|
||||
private bool _usesMultiprocessing;
|
||||
|
||||
/// <summary>
|
||||
/// Adds packages from package discovery results.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder AddPackages(IEnumerable<PythonPackageInfo> packages)
|
||||
{
|
||||
foreach (var pkg in packages)
|
||||
{
|
||||
_packages.Add(new PythonObservationPackage(
|
||||
Name: pkg.Name,
|
||||
Version: pkg.Version ?? "unknown",
|
||||
Source: pkg.Kind.ToString(),
|
||||
Platform: null,
|
||||
IsDirect: pkg.IsDirectDependency,
|
||||
InstallerKind: pkg.InstallerTool,
|
||||
DistInfoPath: pkg.MetadataPath,
|
||||
Groups: pkg.Extras,
|
||||
Extras: pkg.Extras));
|
||||
|
||||
// Add dependency edges
|
||||
foreach (var dep in pkg.Dependencies)
|
||||
{
|
||||
_dependencyEdges.Add(new PythonObservationDependencyEdge(
|
||||
FromPackage: pkg.Name,
|
||||
ToPackage: ExtractPackageName(dep),
|
||||
VersionConstraint: ExtractVersionConstraint(dep),
|
||||
Extra: null,
|
||||
IsOptional: false));
|
||||
}
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds modules from import graph analysis.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder AddModules(
|
||||
IEnumerable<PythonModuleNode> moduleNodes,
|
||||
PythonImportGraph? importGraph = null)
|
||||
{
|
||||
foreach (var node in moduleNodes)
|
||||
{
|
||||
var imports = importGraph?.GetImportsForFile(node.VirtualPath ?? "")
|
||||
.SelectMany(i => i.ImportedNames)
|
||||
.ToImmutableArray() ?? ImmutableArray<string>.Empty;
|
||||
|
||||
_modules.Add(new PythonObservationModule(
|
||||
Name: node.ModulePath,
|
||||
Type: node.IsPackage ? "package" : "module",
|
||||
FilePath: node.VirtualPath ?? "",
|
||||
Line: null,
|
||||
IsNamespacePackage: node.IsNamespacePackage,
|
||||
ParentPackage: ExtractParentPackage(node.ModulePath),
|
||||
Imports: imports));
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds import edges from the import graph.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder AddImportEdges(IEnumerable<PythonImportEdge> edges)
|
||||
{
|
||||
foreach (var edge in edges)
|
||||
{
|
||||
_importEdges.Add(new PythonObservationImportEdge(
|
||||
FromModule: edge.From,
|
||||
ToModule: edge.To,
|
||||
Kind: MapImportKind(edge.Import.Kind),
|
||||
Confidence: MapImportConfidence(edge.Import.Confidence),
|
||||
ResolvedPath: null,
|
||||
SourceFile: edge.Import.SourceFile,
|
||||
Line: edge.Import.LineNumber ?? 0,
|
||||
ResolverTrace: ImmutableArray<string>.Empty));
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds entrypoints from entrypoint discovery.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder AddEntrypoints(IEnumerable<PythonEntrypoint> entrypoints)
|
||||
{
|
||||
foreach (var ep in entrypoints)
|
||||
{
|
||||
_entrypoints.Add(new PythonObservationEntrypoint(
|
||||
Path: ep.VirtualPath ?? ep.Target,
|
||||
Type: ep.Kind.ToString(),
|
||||
Handler: ep.Callable,
|
||||
RequiredPackages: ImmutableArray<string>.Empty,
|
||||
InvocationContext: ep.InvocationContext.InvocationType.ToString()));
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds capabilities from capability detection.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder AddCapabilities(IEnumerable<PythonCapability> capabilities)
|
||||
{
|
||||
foreach (var cap in capabilities)
|
||||
{
|
||||
switch (cap.Kind)
|
||||
{
|
||||
case PythonCapabilityKind.ProcessExecution:
|
||||
_usesProcessExecution = true;
|
||||
break;
|
||||
case PythonCapabilityKind.NetworkAccess:
|
||||
_usesNetworkAccess = true;
|
||||
break;
|
||||
case PythonCapabilityKind.FileSystemAccess:
|
||||
_usesFileSystem = true;
|
||||
break;
|
||||
case PythonCapabilityKind.CodeExecution:
|
||||
_usesCodeExecution = true;
|
||||
break;
|
||||
case PythonCapabilityKind.Deserialization:
|
||||
_usesDeserialization = true;
|
||||
break;
|
||||
case PythonCapabilityKind.Ctypes or PythonCapabilityKind.Cffi or PythonCapabilityKind.NativeCodeExecution:
|
||||
_usesNativeCode = true;
|
||||
break;
|
||||
case PythonCapabilityKind.AsyncAwait:
|
||||
_usesAsyncAwait = true;
|
||||
break;
|
||||
case PythonCapabilityKind.Multiprocessing:
|
||||
_usesMultiprocessing = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (cap.IsSecuritySensitive && !_securitySensitiveCapabilities.Contains(cap.Kind.ToString()))
|
||||
{
|
||||
_securitySensitiveCapabilities.Add(cap.Kind.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds native extensions from extension scanning.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder AddNativeExtensions(IEnumerable<PythonNativeExtension> extensions)
|
||||
{
|
||||
foreach (var ext in extensions)
|
||||
{
|
||||
_nativeExtensions.Add(new PythonObservationNativeExtension(
|
||||
ModuleName: ext.ModuleName,
|
||||
Path: ext.Path,
|
||||
Kind: ext.Kind.ToString(),
|
||||
Platform: ext.Platform,
|
||||
Architecture: ext.Architecture,
|
||||
PackageName: ext.PackageName));
|
||||
|
||||
_usesNativeCode = true;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds framework hints from framework detection.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder AddFrameworkHints(IEnumerable<PythonFrameworkHint> hints)
|
||||
{
|
||||
foreach (var hint in hints)
|
||||
{
|
||||
string? category = null;
|
||||
if (hint.IsWebFramework) category = "WebFramework";
|
||||
else if (hint.IsTaskQueue) category = "TaskQueue";
|
||||
else if (hint.IsServerless) category = "Serverless";
|
||||
else if (hint.IsCliFramework) category = "CLI";
|
||||
|
||||
_frameworks.Add(new PythonObservationFrameworkHint(
|
||||
Kind: hint.Kind.ToString(),
|
||||
SourceFile: hint.SourceFile,
|
||||
Line: hint.LineNumber,
|
||||
Evidence: hint.Evidence,
|
||||
Confidence: MapConfidence(hint.Confidence),
|
||||
Category: category));
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets environment information.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder SetEnvironment(
|
||||
string? pythonVersion,
|
||||
IEnumerable<string>? sitePackagesPaths = null,
|
||||
IEnumerable<string>? requirementsFiles = null,
|
||||
IEnumerable<string>? pyprojectFiles = null,
|
||||
string? virtualenvPath = null,
|
||||
string? condaPrefix = null,
|
||||
bool isContainer = false)
|
||||
{
|
||||
_environment = new PythonObservationEnvironment(
|
||||
PythonVersion: pythonVersion,
|
||||
SitePackagesPaths: sitePackagesPaths?.ToImmutableArray() ?? ImmutableArray<string>.Empty,
|
||||
VersionSources: ImmutableArray<PythonObservationVersionSource>.Empty,
|
||||
RequirementsFiles: requirementsFiles?.ToImmutableArray() ?? ImmutableArray<string>.Empty,
|
||||
PyprojectFiles: pyprojectFiles?.ToImmutableArray() ?? ImmutableArray<string>.Empty,
|
||||
VirtualenvPath: virtualenvPath,
|
||||
CondaPrefix: condaPrefix,
|
||||
IsContainer: isContainer);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Adds a warning.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder AddWarning(
|
||||
string code,
|
||||
string message,
|
||||
string? filePath = null,
|
||||
int? line = null,
|
||||
string severity = "warning")
|
||||
{
|
||||
_warnings.Add(new PythonObservationWarning(
|
||||
Code: code,
|
||||
Message: message,
|
||||
FilePath: filePath,
|
||||
Line: line,
|
||||
Severity: severity));
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets runtime evidence from optional runtime analysis.
|
||||
/// </summary>
|
||||
public PythonObservationBuilder SetRuntimeEvidence(PythonObservationRuntimeEvidence evidence)
|
||||
{
|
||||
_runtimeEvidence = evidence;
|
||||
return this;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the final observation document.
|
||||
/// </summary>
|
||||
public PythonObservationDocument Build()
|
||||
{
|
||||
var detectedFrameworks = _frameworks
|
||||
.Select(f => f.Kind)
|
||||
.Distinct()
|
||||
.ToImmutableArray();
|
||||
|
||||
return new PythonObservationDocument(
|
||||
Schema: SchemaVersion,
|
||||
Packages: _packages.ToImmutableArray(),
|
||||
Modules: _modules.ToImmutableArray(),
|
||||
Entrypoints: _entrypoints.ToImmutableArray(),
|
||||
DependencyEdges: _dependencyEdges.ToImmutableArray(),
|
||||
ImportEdges: _importEdges.ToImmutableArray(),
|
||||
NativeExtensions: _nativeExtensions.ToImmutableArray(),
|
||||
Frameworks: _frameworks.ToImmutableArray(),
|
||||
Warnings: _warnings.ToImmutableArray(),
|
||||
Environment: _environment ?? new PythonObservationEnvironment(
|
||||
PythonVersion: null,
|
||||
SitePackagesPaths: ImmutableArray<string>.Empty,
|
||||
VersionSources: ImmutableArray<PythonObservationVersionSource>.Empty,
|
||||
RequirementsFiles: ImmutableArray<string>.Empty,
|
||||
PyprojectFiles: ImmutableArray<string>.Empty,
|
||||
VirtualenvPath: null,
|
||||
CondaPrefix: null,
|
||||
IsContainer: false),
|
||||
Capabilities: new PythonObservationCapabilitySummary(
|
||||
UsesProcessExecution: _usesProcessExecution,
|
||||
UsesNetworkAccess: _usesNetworkAccess,
|
||||
UsesFileSystem: _usesFileSystem,
|
||||
UsesCodeExecution: _usesCodeExecution,
|
||||
UsesDeserialization: _usesDeserialization,
|
||||
UsesNativeCode: _usesNativeCode,
|
||||
UsesAsyncAwait: _usesAsyncAwait,
|
||||
UsesMultiprocessing: _usesMultiprocessing,
|
||||
DetectedFrameworks: detectedFrameworks,
|
||||
SecuritySensitiveCapabilities: _securitySensitiveCapabilities.ToImmutableArray()),
|
||||
RuntimeEvidence: _runtimeEvidence);
|
||||
}
|
||||
|
||||
private static PythonObservationImportKind MapImportKind(PythonImportKind kind)
|
||||
{
|
||||
return kind switch
|
||||
{
|
||||
PythonImportKind.Import => PythonObservationImportKind.Import,
|
||||
PythonImportKind.FromImport => PythonObservationImportKind.FromImport,
|
||||
PythonImportKind.RelativeImport => PythonObservationImportKind.RelativeImport,
|
||||
PythonImportKind.ImportlibImportModule => PythonObservationImportKind.DynamicImport,
|
||||
PythonImportKind.BuiltinImport => PythonObservationImportKind.DynamicImport,
|
||||
_ => PythonObservationImportKind.Import
|
||||
};
|
||||
}
|
||||
|
||||
private static PythonObservationConfidence MapImportConfidence(PythonImportConfidence confidence)
|
||||
{
|
||||
return confidence switch
|
||||
{
|
||||
PythonImportConfidence.Low => PythonObservationConfidence.Low,
|
||||
PythonImportConfidence.Medium => PythonObservationConfidence.Medium,
|
||||
PythonImportConfidence.High => PythonObservationConfidence.High,
|
||||
PythonImportConfidence.Definitive => PythonObservationConfidence.Definitive,
|
||||
_ => PythonObservationConfidence.Medium
|
||||
};
|
||||
}
|
||||
|
||||
private static PythonObservationConfidence MapConfidence(PythonFrameworkConfidence confidence)
|
||||
{
|
||||
return confidence switch
|
||||
{
|
||||
PythonFrameworkConfidence.Low => PythonObservationConfidence.Low,
|
||||
PythonFrameworkConfidence.Medium => PythonObservationConfidence.Medium,
|
||||
PythonFrameworkConfidence.High => PythonObservationConfidence.High,
|
||||
PythonFrameworkConfidence.Definitive => PythonObservationConfidence.Definitive,
|
||||
_ => PythonObservationConfidence.Medium
|
||||
};
|
||||
}
|
||||
|
||||
private static string ExtractPackageName(string dependency)
|
||||
{
|
||||
// Extract package name from dependency spec like "requests>=2.0" or "numpy[extra]"
|
||||
var name = dependency;
|
||||
|
||||
var bracketIdx = name.IndexOf('[');
|
||||
if (bracketIdx > 0) name = name[..bracketIdx];
|
||||
|
||||
foreach (var op in new[] { ">=", "<=", "==", "!=", ">", "<", "~=", "^" })
|
||||
{
|
||||
var opIdx = name.IndexOf(op, StringComparison.Ordinal);
|
||||
if (opIdx > 0) name = name[..opIdx];
|
||||
}
|
||||
|
||||
return name.Trim();
|
||||
}
|
||||
|
||||
private static string? ExtractVersionConstraint(string dependency)
|
||||
{
|
||||
foreach (var op in new[] { ">=", "<=", "==", "!=", ">", "<", "~=", "^" })
|
||||
{
|
||||
var opIdx = dependency.IndexOf(op, StringComparison.Ordinal);
|
||||
if (opIdx > 0) return dependency[opIdx..].Trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? ExtractParentPackage(string moduleName)
|
||||
{
|
||||
var lastDot = moduleName.LastIndexOf('.');
|
||||
return lastDot > 0 ? moduleName[..lastDot] : null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,231 @@
|
||||
using System.Collections.Immutable;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Observations;
|
||||
|
||||
/// <summary>
|
||||
/// AOC-compliant observation document for Python project analysis.
|
||||
/// Contains packages, modules, entrypoints, dependency edges, capabilities, and warnings.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationDocument(
|
||||
string Schema,
|
||||
ImmutableArray<PythonObservationPackage> Packages,
|
||||
ImmutableArray<PythonObservationModule> Modules,
|
||||
ImmutableArray<PythonObservationEntrypoint> Entrypoints,
|
||||
ImmutableArray<PythonObservationDependencyEdge> DependencyEdges,
|
||||
ImmutableArray<PythonObservationImportEdge> ImportEdges,
|
||||
ImmutableArray<PythonObservationNativeExtension> NativeExtensions,
|
||||
ImmutableArray<PythonObservationFrameworkHint> Frameworks,
|
||||
ImmutableArray<PythonObservationWarning> Warnings,
|
||||
PythonObservationEnvironment Environment,
|
||||
PythonObservationCapabilitySummary Capabilities,
|
||||
PythonObservationRuntimeEvidence? RuntimeEvidence = null);
|
||||
|
||||
/// <summary>
|
||||
/// Python package detected in the project (from pip, conda, or other package managers).
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationPackage(
|
||||
string Name,
|
||||
string Version,
|
||||
string Source,
|
||||
string? Platform,
|
||||
bool IsDirect,
|
||||
string? InstallerKind,
|
||||
string? DistInfoPath,
|
||||
ImmutableArray<string> Groups,
|
||||
ImmutableArray<string> Extras);
|
||||
|
||||
/// <summary>
|
||||
/// Python module or package detected in the project.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationModule(
|
||||
string Name,
|
||||
string Type,
|
||||
string FilePath,
|
||||
int? Line,
|
||||
bool IsNamespacePackage,
|
||||
string? ParentPackage,
|
||||
ImmutableArray<string> Imports);
|
||||
|
||||
/// <summary>
|
||||
/// Entrypoint detected in the Python project.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationEntrypoint(
|
||||
string Path,
|
||||
string Type,
|
||||
string? Handler,
|
||||
ImmutableArray<string> RequiredPackages,
|
||||
string? InvocationContext);
|
||||
|
||||
/// <summary>
|
||||
/// Package dependency edge (declared in requirements or pyproject).
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationDependencyEdge(
|
||||
string FromPackage,
|
||||
string ToPackage,
|
||||
string? VersionConstraint,
|
||||
string? Extra,
|
||||
bool IsOptional);
|
||||
|
||||
/// <summary>
|
||||
/// Import edge between modules with reason codes and confidence.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationImportEdge(
|
||||
string FromModule,
|
||||
string ToModule,
|
||||
PythonObservationImportKind Kind,
|
||||
PythonObservationConfidence Confidence,
|
||||
string? ResolvedPath,
|
||||
string SourceFile,
|
||||
int Line,
|
||||
ImmutableArray<string> ResolverTrace);
|
||||
|
||||
/// <summary>
|
||||
/// Import edge types.
|
||||
/// </summary>
|
||||
internal enum PythonObservationImportKind
|
||||
{
|
||||
/// <summary>Standard import statement.</summary>
|
||||
Import,
|
||||
|
||||
/// <summary>From X import Y statement.</summary>
|
||||
FromImport,
|
||||
|
||||
/// <summary>Relative import within package.</summary>
|
||||
RelativeImport,
|
||||
|
||||
/// <summary>Dynamic import via importlib.</summary>
|
||||
DynamicImport,
|
||||
|
||||
/// <summary>Namespace package implicit import.</summary>
|
||||
NamespacePackage,
|
||||
|
||||
/// <summary>Native extension load.</summary>
|
||||
NativeExtension,
|
||||
|
||||
/// <summary>Heuristic/hint-based import (not definitively resolved).</summary>
|
||||
Hint
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Confidence level for observations.
|
||||
/// </summary>
|
||||
internal enum PythonObservationConfidence
|
||||
{
|
||||
/// <summary>Low confidence - heuristic match.</summary>
|
||||
Low = 0,
|
||||
|
||||
/// <summary>Medium confidence - likely correct.</summary>
|
||||
Medium = 1,
|
||||
|
||||
/// <summary>High confidence - clear evidence.</summary>
|
||||
High = 2,
|
||||
|
||||
/// <summary>Definitive - direct evidence found.</summary>
|
||||
Definitive = 3
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Native extension detected in the project.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationNativeExtension(
|
||||
string ModuleName,
|
||||
string Path,
|
||||
string Kind,
|
||||
string? Platform,
|
||||
string? Architecture,
|
||||
string? PackageName);
|
||||
|
||||
/// <summary>
|
||||
/// Framework hint detected in the project.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationFrameworkHint(
|
||||
string Kind,
|
||||
string SourceFile,
|
||||
int? Line,
|
||||
string Evidence,
|
||||
PythonObservationConfidence Confidence,
|
||||
string? Category);
|
||||
|
||||
/// <summary>
|
||||
/// Analysis warning generated during scanning.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationWarning(
|
||||
string Code,
|
||||
string Message,
|
||||
string? FilePath,
|
||||
int? Line,
|
||||
string Severity);
|
||||
|
||||
/// <summary>
|
||||
/// Environment profile with Python version, package manager settings, and paths.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationEnvironment(
|
||||
string? PythonVersion,
|
||||
ImmutableArray<string> SitePackagesPaths,
|
||||
ImmutableArray<PythonObservationVersionSource> VersionSources,
|
||||
ImmutableArray<string> RequirementsFiles,
|
||||
ImmutableArray<string> PyprojectFiles,
|
||||
string? VirtualenvPath,
|
||||
string? CondaPrefix,
|
||||
bool IsContainer);
|
||||
|
||||
/// <summary>
|
||||
/// Python version source with provenance.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationVersionSource(
|
||||
string? Version,
|
||||
string Source,
|
||||
string SourceType);
|
||||
|
||||
/// <summary>
|
||||
/// Capability summary for the Python project.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationCapabilitySummary(
|
||||
bool UsesProcessExecution,
|
||||
bool UsesNetworkAccess,
|
||||
bool UsesFileSystem,
|
||||
bool UsesCodeExecution,
|
||||
bool UsesDeserialization,
|
||||
bool UsesNativeCode,
|
||||
bool UsesAsyncAwait,
|
||||
bool UsesMultiprocessing,
|
||||
ImmutableArray<string> DetectedFrameworks,
|
||||
ImmutableArray<string> SecuritySensitiveCapabilities);
|
||||
|
||||
/// <summary>
|
||||
/// Optional runtime evidence section for Python.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationRuntimeEvidence(
|
||||
bool HasEvidence,
|
||||
string? RuntimePythonVersion,
|
||||
string? RuntimePlatform,
|
||||
int LoadedModulesCount,
|
||||
ImmutableArray<string> LoadedPackages,
|
||||
ImmutableArray<string> LoadedModules,
|
||||
ImmutableDictionary<string, string> PathHashes,
|
||||
ImmutableArray<string> RuntimeCapabilities,
|
||||
ImmutableArray<PythonObservationRuntimeError> Errors)
|
||||
{
|
||||
/// <summary>
|
||||
/// Empty runtime evidence instance.
|
||||
/// </summary>
|
||||
public static PythonObservationRuntimeEvidence Empty { get; } = new(
|
||||
HasEvidence: false,
|
||||
RuntimePythonVersion: null,
|
||||
RuntimePlatform: null,
|
||||
LoadedModulesCount: 0,
|
||||
LoadedPackages: ImmutableArray<string>.Empty,
|
||||
LoadedModules: ImmutableArray<string>.Empty,
|
||||
PathHashes: ImmutableDictionary<string, string>.Empty,
|
||||
RuntimeCapabilities: ImmutableArray<string>.Empty,
|
||||
Errors: ImmutableArray<PythonObservationRuntimeError>.Empty);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Runtime error captured during execution.
|
||||
/// </summary>
|
||||
internal sealed record PythonObservationRuntimeError(
|
||||
string Timestamp,
|
||||
string Message,
|
||||
string? Path,
|
||||
string? PathSha256);
|
||||
@@ -0,0 +1,73 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Observations;
|
||||
|
||||
/// <summary>
|
||||
/// Serializes Python observation documents to JSON.
|
||||
/// </summary>
|
||||
internal static class PythonObservationSerializer
|
||||
{
|
||||
private static readonly JsonSerializerOptions SerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
WriteIndented = true,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters =
|
||||
{
|
||||
new JsonStringEnumConverter(JsonNamingPolicy.CamelCase)
|
||||
}
|
||||
};
|
||||
|
||||
private static readonly JsonSerializerOptions CompactSerializerOptions = new()
|
||||
{
|
||||
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
|
||||
WriteIndented = false,
|
||||
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
|
||||
Converters =
|
||||
{
|
||||
new JsonStringEnumConverter(JsonNamingPolicy.CamelCase)
|
||||
}
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// Serializes the observation document to JSON.
|
||||
/// </summary>
|
||||
public static string Serialize(PythonObservationDocument document, bool compact = false)
|
||||
{
|
||||
var options = compact ? CompactSerializerOptions : SerializerOptions;
|
||||
return JsonSerializer.Serialize(document, options);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Serializes the observation document to a stream.
|
||||
/// </summary>
|
||||
public static async Task SerializeAsync(
|
||||
PythonObservationDocument document,
|
||||
Stream stream,
|
||||
bool compact = false,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
var options = compact ? CompactSerializerOptions : SerializerOptions;
|
||||
await JsonSerializer.SerializeAsync(stream, document, options, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserializes a JSON string to an observation document.
|
||||
/// </summary>
|
||||
public static PythonObservationDocument? Deserialize(string json)
|
||||
{
|
||||
return JsonSerializer.Deserialize<PythonObservationDocument>(json, SerializerOptions);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Deserializes a stream to an observation document.
|
||||
/// </summary>
|
||||
public static async Task<PythonObservationDocument?> DeserializeAsync(
|
||||
Stream stream,
|
||||
CancellationToken cancellationToken = default)
|
||||
{
|
||||
return await JsonSerializer.DeserializeAsync<PythonObservationDocument>(
|
||||
stream, SerializerOptions, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,528 @@
|
||||
using System.IO.Compression;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes Python zipapp archives (.pyz, .pyzw) for runtime information,
|
||||
/// entry points, and startup behavior.
|
||||
/// </summary>
|
||||
internal static partial class PythonZipappAdapter
|
||||
{
|
||||
private static readonly string[] LayerRootCandidates = { "layers", ".layers", "layer" };
|
||||
|
||||
/// <summary>
|
||||
/// Discovers zipapp files in the workspace and container layers.
|
||||
/// </summary>
|
||||
public static IReadOnlyCollection<string> DiscoverZipapps(string rootPath)
|
||||
{
|
||||
var discovered = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
// Search in root path
|
||||
DiscoverInDirectory(rootPath, discovered);
|
||||
|
||||
// Search in container layers
|
||||
foreach (var layerRoot in EnumerateLayerRoots(rootPath))
|
||||
{
|
||||
DiscoverInDirectory(layerRoot, discovered);
|
||||
|
||||
// Check common locations within layers
|
||||
var appDir = Path.Combine(layerRoot, "app");
|
||||
if (Directory.Exists(appDir))
|
||||
{
|
||||
DiscoverInDirectory(appDir, discovered);
|
||||
}
|
||||
|
||||
var optDir = Path.Combine(layerRoot, "opt");
|
||||
if (Directory.Exists(optDir))
|
||||
{
|
||||
DiscoverInDirectory(optDir, discovered);
|
||||
}
|
||||
|
||||
var usrLocalBin = Path.Combine(layerRoot, "usr", "local", "bin");
|
||||
if (Directory.Exists(usrLocalBin))
|
||||
{
|
||||
DiscoverInDirectory(usrLocalBin, discovered);
|
||||
}
|
||||
}
|
||||
|
||||
return discovered
|
||||
.OrderBy(static path => path, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes a zipapp archive for runtime information.
|
||||
/// </summary>
|
||||
public static PythonZipappInfo? AnalyzeZipapp(string zipappPath)
|
||||
{
|
||||
if (!File.Exists(zipappPath))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var shebang = ExtractShebang(zipappPath);
|
||||
var pythonVersion = shebang != null ? ParsePythonVersionFromShebang(shebang) : null;
|
||||
var hasMain = false;
|
||||
var hasInit = false;
|
||||
var entryModule = (string?)null;
|
||||
var warnings = new List<string>();
|
||||
var dependencies = new List<string>();
|
||||
|
||||
// Open as zip archive to inspect contents
|
||||
using var stream = File.OpenRead(zipappPath);
|
||||
|
||||
// Skip shebang if present
|
||||
var firstByte = stream.ReadByte();
|
||||
if (firstByte == '#')
|
||||
{
|
||||
// Skip to end of shebang line
|
||||
while (stream.ReadByte() is int b && b != '\n' && b != -1)
|
||||
{
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
stream.Position = 0;
|
||||
}
|
||||
|
||||
using var archive = new ZipArchive(stream, ZipArchiveMode.Read);
|
||||
|
||||
foreach (var entry in archive.Entries)
|
||||
{
|
||||
var name = entry.FullName.Replace('\\', '/');
|
||||
|
||||
if (string.Equals(name, "__main__.py", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
hasMain = true;
|
||||
entryModule = TryExtractEntryModule(entry);
|
||||
}
|
||||
else if (string.Equals(name, "__init__.py", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
hasInit = true;
|
||||
}
|
||||
else if (name.EndsWith("/__main__.py", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Package with __main__.py
|
||||
var package = Path.GetDirectoryName(name)?.Replace('/', '.');
|
||||
if (!string.IsNullOrEmpty(package))
|
||||
{
|
||||
entryModule ??= package;
|
||||
}
|
||||
}
|
||||
else if (name.EndsWith("/requirements.txt", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(name, "requirements.txt", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
var reqs = ExtractRequirements(entry);
|
||||
dependencies.AddRange(reqs);
|
||||
}
|
||||
}
|
||||
|
||||
// Generate warnings
|
||||
if (!hasMain && !hasInit)
|
||||
{
|
||||
warnings.Add("Zipapp missing __main__.py; may not be directly executable");
|
||||
}
|
||||
|
||||
if (shebang != null && shebang.Contains("/env ", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
warnings.Add("Zipapp uses /usr/bin/env shebang; Python version may vary by environment");
|
||||
}
|
||||
|
||||
var isWindows = zipappPath.EndsWith(".pyzw", StringComparison.OrdinalIgnoreCase);
|
||||
if (isWindows)
|
||||
{
|
||||
warnings.Add("Zipapp is Windows-specific (.pyzw); uses pythonw.exe without console");
|
||||
}
|
||||
|
||||
return new PythonZipappInfo(
|
||||
Path: zipappPath,
|
||||
FileName: Path.GetFileName(zipappPath),
|
||||
Shebang: shebang,
|
||||
PythonVersion: pythonVersion,
|
||||
HasMainPy: hasMain,
|
||||
EntryModule: entryModule,
|
||||
IsWindowsApp: isWindows,
|
||||
EmbeddedDependencies: dependencies,
|
||||
Warnings: warnings);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
catch (InvalidDataException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analyzes all zipapps in the workspace.
|
||||
/// </summary>
|
||||
public static PythonZipappAnalysis AnalyzeAll(string rootPath)
|
||||
{
|
||||
var zipapps = new List<PythonZipappInfo>();
|
||||
var allWarnings = new List<string>();
|
||||
|
||||
foreach (var zipappPath in DiscoverZipapps(rootPath))
|
||||
{
|
||||
var info = AnalyzeZipapp(zipappPath);
|
||||
if (info != null)
|
||||
{
|
||||
zipapps.Add(info);
|
||||
allWarnings.AddRange(info.Warnings);
|
||||
}
|
||||
}
|
||||
|
||||
if (zipapps.Count > 1)
|
||||
{
|
||||
allWarnings.Add($"Multiple zipapps detected ({zipapps.Count}); entry point resolution may be ambiguous");
|
||||
}
|
||||
|
||||
return new PythonZipappAnalysis(zipapps, allWarnings);
|
||||
}
|
||||
|
||||
private static string? ExtractShebang(string path)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = File.OpenRead(path);
|
||||
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: false, leaveOpen: true);
|
||||
|
||||
var firstLine = reader.ReadLine();
|
||||
if (firstLine != null && firstLine.StartsWith("#!"))
|
||||
{
|
||||
return firstLine[2..].Trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static string? ParsePythonVersionFromShebang(string shebang)
|
||||
{
|
||||
// Match patterns like:
|
||||
// /usr/bin/python3.11
|
||||
// /usr/bin/env python3.10
|
||||
// python3.9
|
||||
|
||||
var match = PythonVersionPattern().Match(shebang);
|
||||
if (match.Success)
|
||||
{
|
||||
return match.Groups["version"].Value;
|
||||
}
|
||||
|
||||
// Check for generic python3 or python
|
||||
if (shebang.Contains("python3", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return "3";
|
||||
}
|
||||
|
||||
if (shebang.Contains("python", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
return null; // Could be Python 2 or 3
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static string? TryExtractEntryModule(ZipArchiveEntry entry)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var stream = entry.Open();
|
||||
using var reader = new StreamReader(stream);
|
||||
var content = reader.ReadToEnd();
|
||||
|
||||
// Look for common patterns:
|
||||
// from package.module import main
|
||||
// import package.main
|
||||
// runpy.run_module('package')
|
||||
|
||||
var runpyMatch = RunpyPattern().Match(content);
|
||||
if (runpyMatch.Success)
|
||||
{
|
||||
return runpyMatch.Groups["module"].Value;
|
||||
}
|
||||
|
||||
var fromImportMatch = FromImportPattern().Match(content);
|
||||
if (fromImportMatch.Success)
|
||||
{
|
||||
return fromImportMatch.Groups["module"].Value;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static List<string> ExtractRequirements(ZipArchiveEntry entry)
|
||||
{
|
||||
var results = new List<string>();
|
||||
|
||||
try
|
||||
{
|
||||
using var stream = entry.Open();
|
||||
using var reader = new StreamReader(stream);
|
||||
|
||||
var content = reader.ReadToEnd();
|
||||
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
foreach (var line in lines)
|
||||
{
|
||||
var trimmed = line.Trim();
|
||||
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#') || trimmed.StartsWith('-'))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract package name (before any version specifier)
|
||||
var match = PackageNamePattern().Match(trimmed);
|
||||
if (match.Success)
|
||||
{
|
||||
results.Add(match.Groups["name"].Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore read errors
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static void DiscoverInDirectory(string directory, HashSet<string> discovered)
|
||||
{
|
||||
if (!Directory.Exists(directory))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var file in Directory.EnumerateFiles(directory, "*.pyz"))
|
||||
{
|
||||
discovered.Add(file);
|
||||
}
|
||||
|
||||
foreach (var file in Directory.EnumerateFiles(directory, "*.pyzw"))
|
||||
{
|
||||
discovered.Add(file);
|
||||
}
|
||||
|
||||
// Also check in subdirectories (up to 3 levels)
|
||||
foreach (var subdir in Directory.EnumerateDirectories(directory))
|
||||
{
|
||||
DiscoverInSubdirectory(subdir, discovered, 1);
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
private static void DiscoverInSubdirectory(string directory, HashSet<string> discovered, int depth)
|
||||
{
|
||||
if (depth > 3)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
foreach (var file in Directory.EnumerateFiles(directory, "*.pyz"))
|
||||
{
|
||||
discovered.Add(file);
|
||||
}
|
||||
|
||||
foreach (var file in Directory.EnumerateFiles(directory, "*.pyzw"))
|
||||
{
|
||||
discovered.Add(file);
|
||||
}
|
||||
|
||||
foreach (var subdir in Directory.EnumerateDirectories(directory))
|
||||
{
|
||||
var dirName = Path.GetFileName(subdir);
|
||||
// Skip common non-relevant directories
|
||||
if (dirName.StartsWith('.') ||
|
||||
string.Equals(dirName, "node_modules", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(dirName, "__pycache__", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(dirName, "venv", StringComparison.OrdinalIgnoreCase) ||
|
||||
string.Equals(dirName, ".venv", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
DiscoverInSubdirectory(subdir, discovered, depth + 1);
|
||||
}
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
// Ignore
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> EnumerateLayerRoots(string workspaceRoot)
|
||||
{
|
||||
foreach (var candidate in LayerRootCandidates)
|
||||
{
|
||||
var root = Path.Combine(workspaceRoot, candidate);
|
||||
if (!Directory.Exists(root))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
IEnumerable<string>? directories;
|
||||
try
|
||||
{
|
||||
directories = Directory.EnumerateDirectories(root);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
foreach (var layerDirectory in directories)
|
||||
{
|
||||
var fsDirectory = Path.Combine(layerDirectory, "fs");
|
||||
yield return Directory.Exists(fsDirectory) ? fsDirectory : layerDirectory;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"python(?<version>\d+\.\d+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex PythonVersionPattern();
|
||||
|
||||
[GeneratedRegex(@"runpy\.run_module\(['""](?<module>[^'""]+)['""]", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex RunpyPattern();
|
||||
|
||||
[GeneratedRegex(@"from\s+(?<module>[\w.]+)\s+import", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex FromImportPattern();
|
||||
|
||||
[GeneratedRegex(@"^(?<name>[\w\-_.]+)", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex PackageNamePattern();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Information about a Python zipapp archive.
|
||||
/// </summary>
|
||||
internal sealed record PythonZipappInfo(
|
||||
string Path,
|
||||
string FileName,
|
||||
string? Shebang,
|
||||
string? PythonVersion,
|
||||
bool HasMainPy,
|
||||
string? EntryModule,
|
||||
bool IsWindowsApp,
|
||||
IReadOnlyCollection<string> EmbeddedDependencies,
|
||||
IReadOnlyCollection<string> Warnings)
|
||||
{
|
||||
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
|
||||
{
|
||||
var entries = new List<KeyValuePair<string, string?>>
|
||||
{
|
||||
new("zipapp.path", Path),
|
||||
new("zipapp.hasMain", HasMainPy.ToString().ToLowerInvariant())
|
||||
};
|
||||
|
||||
if (Shebang != null)
|
||||
{
|
||||
entries.Add(new("zipapp.shebang", Shebang));
|
||||
}
|
||||
|
||||
if (PythonVersion != null)
|
||||
{
|
||||
entries.Add(new("zipapp.pythonVersion", PythonVersion));
|
||||
}
|
||||
|
||||
if (EntryModule != null)
|
||||
{
|
||||
entries.Add(new("zipapp.entryModule", EntryModule));
|
||||
}
|
||||
|
||||
if (IsWindowsApp)
|
||||
{
|
||||
entries.Add(new("zipapp.windowsApp", "true"));
|
||||
}
|
||||
|
||||
if (EmbeddedDependencies.Count > 0)
|
||||
{
|
||||
entries.Add(new("zipapp.embeddedDeps.count", EmbeddedDependencies.Count.ToString()));
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Analysis results for all zipapps in a workspace.
|
||||
/// </summary>
|
||||
internal sealed class PythonZipappAnalysis
|
||||
{
|
||||
public PythonZipappAnalysis(
|
||||
IReadOnlyCollection<PythonZipappInfo> zipapps,
|
||||
IReadOnlyCollection<string> warnings)
|
||||
{
|
||||
Zipapps = zipapps;
|
||||
Warnings = warnings;
|
||||
}
|
||||
|
||||
public IReadOnlyCollection<PythonZipappInfo> Zipapps { get; }
|
||||
public IReadOnlyCollection<string> Warnings { get; }
|
||||
|
||||
public bool HasZipapps => Zipapps.Count > 0;
|
||||
public bool HasWarnings => Warnings.Count > 0;
|
||||
|
||||
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
|
||||
{
|
||||
var entries = new List<KeyValuePair<string, string?>>();
|
||||
|
||||
if (Zipapps.Count > 0)
|
||||
{
|
||||
entries.Add(new("zipapps.count", Zipapps.Count.ToString()));
|
||||
|
||||
var withShebang = Zipapps.Count(z => z.Shebang != null);
|
||||
if (withShebang > 0)
|
||||
{
|
||||
entries.Add(new("zipapps.withShebang", withShebang.ToString()));
|
||||
}
|
||||
|
||||
var windowsApps = Zipapps.Count(z => z.IsWindowsApp);
|
||||
if (windowsApps > 0)
|
||||
{
|
||||
entries.Add(new("zipapps.windowsApps", windowsApps.ToString()));
|
||||
}
|
||||
}
|
||||
|
||||
for (var i = 0; i < Warnings.Count; i++)
|
||||
{
|
||||
entries.Add(new($"zipapps.warning[{i}]", Warnings.ElementAt(i)));
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,194 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.RuntimeEvidence;
|
||||
|
||||
/// <summary>
|
||||
/// Provides the Python import hook script for runtime evidence collection.
|
||||
/// </summary>
|
||||
internal static class PythonImportHookScript
|
||||
{
|
||||
/// <summary>
|
||||
/// Gets the Python import hook script that captures module load events.
|
||||
/// This script outputs NDJSON to stdout.
|
||||
/// </summary>
|
||||
public static string GetScript() => Script;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the Python import hook script that writes to a file.
|
||||
/// </summary>
|
||||
/// <param name="outputPath">The path to write output to.</param>
|
||||
/// <returns>The modified script.</returns>
|
||||
public static string GetFileScript(string outputPath)
|
||||
{
|
||||
var escapedPath = outputPath.Replace("\\", "\\\\").Replace("'", "\\'");
|
||||
return Script.Replace(
|
||||
"_stellaops_output = None",
|
||||
$"_stellaops_output = open('{escapedPath}', 'w', buffering=1)");
|
||||
}
|
||||
|
||||
// The Python script is stored as a verbatim string to avoid issues with # characters
|
||||
private const string Script = @"
|
||||
# StellaOps Python Import Hook
|
||||
# This script captures module import events for static analysis validation.
|
||||
# Output format: NDJSON (Newline-Delimited JSON)
|
||||
|
||||
import sys
|
||||
import json
|
||||
import threading
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
|
||||
_stellaops_output = None
|
||||
_stellaops_lock = threading.Lock()
|
||||
_stellaops_seen = set()
|
||||
|
||||
def _stellaops_emit(event_type, **kwargs):
|
||||
""""""Emit an event as JSON.""""""
|
||||
event = {
|
||||
'type': event_type,
|
||||
'timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'pid': os.getpid(),
|
||||
**kwargs
|
||||
}
|
||||
|
||||
with _stellaops_lock:
|
||||
line = json.dumps(event, default=str)
|
||||
if _stellaops_output:
|
||||
_stellaops_output.write(line + '\n')
|
||||
_stellaops_output.flush()
|
||||
else:
|
||||
print(line, flush=True)
|
||||
|
||||
def _stellaops_get_module_path(module):
|
||||
""""""Get the file path for a module if available.""""""
|
||||
try:
|
||||
if hasattr(module, '__file__') and module.__file__:
|
||||
return module.__file__
|
||||
if hasattr(module, '__spec__') and module.__spec__:
|
||||
if hasattr(module.__spec__, 'origin') and module.__spec__.origin:
|
||||
return module.__spec__.origin
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
class StellaOpsMetaPathFinder:
|
||||
""""""Meta path finder that logs all import attempts.""""""
|
||||
|
||||
def find_module(self, fullname, path=None):
|
||||
if fullname not in _stellaops_seen:
|
||||
_stellaops_seen.add(fullname)
|
||||
_stellaops_emit('import_attempt', module=fullname, path=str(path) if path else None)
|
||||
return None
|
||||
|
||||
def find_spec(self, fullname, path, target=None):
|
||||
return None
|
||||
|
||||
def _stellaops_wrap_import():
|
||||
""""""Wrap the built-in __import__ to capture all imports.""""""
|
||||
original_import = __builtins__.__import__ if hasattr(__builtins__, '__import__') else __builtins__['__import__']
|
||||
|
||||
def wrapped_import(name, globals=None, locals=None, fromlist=(), level=0):
|
||||
module = original_import(name, globals, locals, fromlist, level)
|
||||
|
||||
if name not in _stellaops_seen:
|
||||
_stellaops_seen.add(name)
|
||||
|
||||
path = _stellaops_get_module_path(module)
|
||||
is_native = path and (path.endswith('.so') or path.endswith('.pyd'))
|
||||
|
||||
event_type = 'native_load' if is_native else 'module_import'
|
||||
|
||||
_stellaops_emit(
|
||||
event_type,
|
||||
module=name,
|
||||
path=path,
|
||||
parent=module.__package__ if hasattr(module, '__package__') else None,
|
||||
tid=threading.get_ident()
|
||||
)
|
||||
|
||||
return module
|
||||
|
||||
if hasattr(__builtins__, '__import__'):
|
||||
__builtins__.__import__ = wrapped_import
|
||||
else:
|
||||
__builtins__['__import__'] = wrapped_import
|
||||
|
||||
def _stellaops_wrap_subprocess():
|
||||
""""""Wrap subprocess module to capture process spawns.""""""
|
||||
try:
|
||||
import subprocess
|
||||
original_popen = subprocess.Popen
|
||||
|
||||
class WrappedPopen(original_popen):
|
||||
def __init__(self, *args, **kwargs):
|
||||
_stellaops_emit('process_spawn', spawn_type='subprocess', args=str(args[0]) if args else None)
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
subprocess.Popen = WrappedPopen
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _stellaops_wrap_multiprocessing():
|
||||
""""""Wrap multiprocessing module to capture process spawns.""""""
|
||||
try:
|
||||
import multiprocessing
|
||||
original_process = multiprocessing.Process
|
||||
|
||||
class WrappedProcess(original_process):
|
||||
def __init__(self, *args, **kwargs):
|
||||
_stellaops_emit('process_spawn', spawn_type='multiprocessing', target=str(kwargs.get('target')))
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
multiprocessing.Process = WrappedProcess
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def stellaops_start_tracing():
|
||||
""""""Initialize runtime evidence collection.""""""
|
||||
# Emit interpreter start event
|
||||
_stellaops_emit(
|
||||
'interpreter_start',
|
||||
python_version=sys.version,
|
||||
platform=sys.platform,
|
||||
executable=sys.executable
|
||||
)
|
||||
|
||||
# Install import hook
|
||||
_stellaops_wrap_import()
|
||||
|
||||
# Install meta path finder
|
||||
sys.meta_path.insert(0, StellaOpsMetaPathFinder())
|
||||
|
||||
# Wrap subprocess and multiprocessing (optional)
|
||||
_stellaops_wrap_subprocess()
|
||||
_stellaops_wrap_multiprocessing()
|
||||
|
||||
# Record already-loaded modules
|
||||
for name, module in list(sys.modules.items()):
|
||||
if module is not None and name not in _stellaops_seen:
|
||||
_stellaops_seen.add(name)
|
||||
path = _stellaops_get_module_path(module)
|
||||
if path:
|
||||
is_native = path.endswith('.so') or path.endswith('.pyd')
|
||||
event_type = 'native_load' if is_native else 'module_import'
|
||||
_stellaops_emit(
|
||||
event_type,
|
||||
module=name,
|
||||
path=path,
|
||||
preloaded=True
|
||||
)
|
||||
|
||||
# Auto-start if this module is imported
|
||||
if __name__ != '__main__':
|
||||
stellaops_start_tracing()
|
||||
else:
|
||||
# If run directly, start tracing and wait
|
||||
stellaops_start_tracing()
|
||||
import sys
|
||||
print('StellaOps import hook active. Press Ctrl+C to stop.', file=sys.stderr)
|
||||
try:
|
||||
import time
|
||||
while True:
|
||||
time.sleep(1)
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
";
|
||||
}
|
||||
@@ -0,0 +1,194 @@
|
||||
using System.Security.Cryptography;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.RuntimeEvidence;
|
||||
|
||||
/// <summary>
|
||||
/// Provides path scrubbing and hashing for privacy-preserving runtime evidence.
|
||||
/// </summary>
|
||||
internal static partial class PythonPathHasher
|
||||
{
|
||||
private static readonly string[] SensitivePatterns =
|
||||
[
|
||||
@"/home/[^/]+",
|
||||
@"/Users/[^/]+",
|
||||
@"C:\\Users\\[^\\]+",
|
||||
@"/root",
|
||||
@"/tmp/[^/]+",
|
||||
@"/var/[^/]+/[^/]+",
|
||||
];
|
||||
|
||||
/// <summary>
|
||||
/// Scrubs sensitive path components and returns a normalized path.
|
||||
/// </summary>
|
||||
/// <param name="path">The path to scrub.</param>
|
||||
/// <returns>A scrubbed path with sensitive components replaced.</returns>
|
||||
public static string ScrubPath(string? path)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var result = path;
|
||||
|
||||
// Replace home directories
|
||||
result = HomeDirectoryPattern().Replace(result, "[HOME]");
|
||||
result = WindowsUserPattern().Replace(result, "[HOME]");
|
||||
result = MacUserPattern().Replace(result, "[HOME]");
|
||||
result = RootPattern().Replace(result, "[ROOT]");
|
||||
|
||||
// Replace temp directories
|
||||
result = TempPattern().Replace(result, "[TEMP]");
|
||||
|
||||
// Replace container-specific paths
|
||||
result = ContainerAppPattern().Replace(result, "[APP]");
|
||||
|
||||
// Normalize path separators
|
||||
result = result.Replace('\\', '/');
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Computes a SHA-256 hash of a path for deterministic identification.
|
||||
/// </summary>
|
||||
/// <param name="path">The path to hash.</param>
|
||||
/// <returns>The SHA-256 hash as a hex string.</returns>
|
||||
public static string HashPath(string? path)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path))
|
||||
{
|
||||
return string.Empty;
|
||||
}
|
||||
|
||||
var normalizedPath = NormalizePath(path);
|
||||
var bytes = Encoding.UTF8.GetBytes(normalizedPath);
|
||||
var hash = SHA256.HashData(bytes);
|
||||
return Convert.ToHexStringLower(hash);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a scrubbed and hashed mapping for a path.
|
||||
/// </summary>
|
||||
/// <param name="path">The original path.</param>
|
||||
/// <returns>A tuple of (scrubbedPath, hash).</returns>
|
||||
public static (string ScrubbedPath, string Hash) ScrubAndHash(string? path)
|
||||
{
|
||||
var scrubbed = ScrubPath(path);
|
||||
var hash = HashPath(path);
|
||||
return (scrubbed, hash);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Normalizes a path for consistent hashing.
|
||||
/// </summary>
|
||||
private static string NormalizePath(string path)
|
||||
{
|
||||
// Convert to lowercase and normalize separators
|
||||
var result = path.ToLowerInvariant().Replace('\\', '/');
|
||||
|
||||
// Remove trailing slashes
|
||||
result = result.TrimEnd('/');
|
||||
|
||||
// Collapse multiple slashes
|
||||
result = MultiSlashPattern().Replace(result, "/");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Extracts the module name from a file path.
|
||||
/// </summary>
|
||||
/// <param name="path">The file path.</param>
|
||||
/// <returns>The module name, or null if not determinable.</returns>
|
||||
public static string? ExtractModuleName(string? path)
|
||||
{
|
||||
if (string.IsNullOrEmpty(path))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
// Normalize path
|
||||
var normalizedPath = path.Replace('\\', '/');
|
||||
|
||||
// Check for site-packages
|
||||
var sitePackagesIndex = normalizedPath.IndexOf("site-packages/", StringComparison.OrdinalIgnoreCase);
|
||||
if (sitePackagesIndex >= 0)
|
||||
{
|
||||
var relativePath = normalizedPath[(sitePackagesIndex + 14)..];
|
||||
return ConvertPathToModule(relativePath);
|
||||
}
|
||||
|
||||
// Check for dist-packages
|
||||
var distPackagesIndex = normalizedPath.IndexOf("dist-packages/", StringComparison.OrdinalIgnoreCase);
|
||||
if (distPackagesIndex >= 0)
|
||||
{
|
||||
var relativePath = normalizedPath[(distPackagesIndex + 14)..];
|
||||
return ConvertPathToModule(relativePath);
|
||||
}
|
||||
|
||||
// Fallback: extract filename
|
||||
var fileName = Path.GetFileNameWithoutExtension(normalizedPath);
|
||||
return string.IsNullOrEmpty(fileName) ? null : fileName;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts a relative file path to a Python module name.
|
||||
/// </summary>
|
||||
private static string ConvertPathToModule(string relativePath)
|
||||
{
|
||||
// Remove file extension
|
||||
if (relativePath.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
relativePath = relativePath[..^3];
|
||||
}
|
||||
else if (relativePath.EndsWith(".so", StringComparison.OrdinalIgnoreCase) ||
|
||||
relativePath.EndsWith(".pyd", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
// Handle native extensions like module.cpython-311-x86_64-linux-gnu.so
|
||||
var lastDot = relativePath.LastIndexOf('.');
|
||||
if (lastDot > 0)
|
||||
{
|
||||
relativePath = relativePath[..lastDot];
|
||||
// Remove cpython version suffix
|
||||
var cpythonIndex = relativePath.IndexOf(".cpython-", StringComparison.OrdinalIgnoreCase);
|
||||
if (cpythonIndex > 0)
|
||||
{
|
||||
relativePath = relativePath[..cpythonIndex];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle __init__ files
|
||||
if (relativePath.EndsWith("/__init__", StringComparison.OrdinalIgnoreCase))
|
||||
{
|
||||
relativePath = relativePath[..^9];
|
||||
}
|
||||
|
||||
// Convert path separators to dots
|
||||
return relativePath.Replace('/', '.');
|
||||
}
|
||||
|
||||
[GeneratedRegex(@"/home/[^/]+", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex HomeDirectoryPattern();
|
||||
|
||||
[GeneratedRegex(@"C:\\Users\\[^\\]+", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex WindowsUserPattern();
|
||||
|
||||
[GeneratedRegex(@"/Users/[^/]+", RegexOptions.IgnoreCase)]
|
||||
private static partial Regex MacUserPattern();
|
||||
|
||||
[GeneratedRegex(@"/root\b")]
|
||||
private static partial Regex RootPattern();
|
||||
|
||||
[GeneratedRegex(@"/tmp/[^/]+")]
|
||||
private static partial Regex TempPattern();
|
||||
|
||||
[GeneratedRegex(@"^/app(?=/|$)")]
|
||||
private static partial Regex ContainerAppPattern();
|
||||
|
||||
[GeneratedRegex(@"//+")]
|
||||
private static partial Regex MultiSlashPattern();
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.RuntimeEvidence;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a runtime event captured from Python execution.
|
||||
/// </summary>
|
||||
internal sealed record PythonRuntimeEvent(
|
||||
PythonRuntimeEventKind Kind,
|
||||
string Timestamp,
|
||||
string? ModuleName,
|
||||
string? ModulePath,
|
||||
string? ModuleSpec,
|
||||
string? ParentModule,
|
||||
string? ErrorMessage,
|
||||
string? ProcessId,
|
||||
string? ThreadId);
|
||||
|
||||
/// <summary>
|
||||
/// Kind of runtime event captured.
|
||||
/// </summary>
|
||||
internal enum PythonRuntimeEventKind
|
||||
{
|
||||
/// <summary>Module was imported.</summary>
|
||||
ModuleImport,
|
||||
|
||||
/// <summary>Module load failed.</summary>
|
||||
ModuleLoadError,
|
||||
|
||||
/// <summary>Native extension was loaded.</summary>
|
||||
NativeExtensionLoad,
|
||||
|
||||
/// <summary>Dynamic import via importlib.</summary>
|
||||
DynamicImport,
|
||||
|
||||
/// <summary>Process spawned via subprocess/multiprocessing.</summary>
|
||||
ProcessSpawn,
|
||||
|
||||
/// <summary>New thread started.</summary>
|
||||
ThreadStart,
|
||||
|
||||
/// <summary>Import hook was installed.</summary>
|
||||
ImportHookInstall,
|
||||
|
||||
/// <summary>Path was added to sys.path.</summary>
|
||||
PathModification,
|
||||
|
||||
/// <summary>Python interpreter started.</summary>
|
||||
InterpreterStart,
|
||||
|
||||
/// <summary>Python interpreter stopped.</summary>
|
||||
InterpreterStop
|
||||
}
|
||||
@@ -0,0 +1,396 @@
|
||||
using System.Collections.Immutable;
|
||||
using System.Text.Json;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Observations;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.RuntimeEvidence;
|
||||
|
||||
/// <summary>
|
||||
/// Collects and processes runtime evidence from Python execution traces.
|
||||
/// </summary>
|
||||
internal sealed class PythonRuntimeEvidenceCollector
|
||||
{
|
||||
private static readonly JsonSerializerOptions JsonOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true,
|
||||
AllowTrailingCommas = true
|
||||
};
|
||||
|
||||
private readonly List<PythonRuntimeEvent> _events = [];
|
||||
private readonly Dictionary<string, string> _pathHashes = new();
|
||||
private readonly HashSet<string> _loadedModules = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _loadedPackages = new(StringComparer.Ordinal);
|
||||
private readonly HashSet<string> _runtimeCapabilities = new(StringComparer.Ordinal);
|
||||
private readonly List<PythonObservationRuntimeError> _errors = [];
|
||||
|
||||
private string? _pythonVersion;
|
||||
private string? _platform;
|
||||
|
||||
/// <summary>
|
||||
/// Parses a JSON line from the runtime evidence output.
|
||||
/// </summary>
|
||||
/// <param name="jsonLine">A JSON line containing event data.</param>
|
||||
public void ParseLine(string jsonLine)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(jsonLine))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
using var doc = JsonDocument.Parse(jsonLine);
|
||||
var root = doc.RootElement;
|
||||
|
||||
var eventType = root.GetProperty("type").GetString();
|
||||
|
||||
switch (eventType)
|
||||
{
|
||||
case "interpreter_start":
|
||||
ParseInterpreterStart(root);
|
||||
break;
|
||||
|
||||
case "module_import":
|
||||
ParseModuleImport(root);
|
||||
break;
|
||||
|
||||
case "module_error":
|
||||
ParseModuleError(root);
|
||||
break;
|
||||
|
||||
case "native_load":
|
||||
ParseNativeLoad(root);
|
||||
break;
|
||||
|
||||
case "dynamic_import":
|
||||
ParseDynamicImport(root);
|
||||
break;
|
||||
|
||||
case "process_spawn":
|
||||
ParseProcessSpawn(root);
|
||||
break;
|
||||
|
||||
case "path_modification":
|
||||
ParsePathModification(root);
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
// Skip malformed JSON lines
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses multiple JSON lines from the runtime evidence output.
|
||||
/// </summary>
|
||||
/// <param name="output">The complete output string (NDJSON format).</param>
|
||||
public void ParseOutput(string output)
|
||||
{
|
||||
if (string.IsNullOrEmpty(output))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
|
||||
{
|
||||
ParseLine(line.Trim());
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Parses runtime evidence from a file.
|
||||
/// </summary>
|
||||
/// <param name="filePath">Path to the NDJSON evidence file.</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public async Task ParseFileAsync(string filePath, CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!File.Exists(filePath))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
await foreach (var line in File.ReadLinesAsync(filePath, cancellationToken).ConfigureAwait(false))
|
||||
{
|
||||
ParseLine(line);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Builds the runtime evidence observation from collected data.
|
||||
/// </summary>
|
||||
/// <returns>The runtime evidence observation.</returns>
|
||||
public PythonObservationRuntimeEvidence Build()
|
||||
{
|
||||
if (_events.Count == 0 && _loadedModules.Count == 0)
|
||||
{
|
||||
return PythonObservationRuntimeEvidence.Empty;
|
||||
}
|
||||
|
||||
return new PythonObservationRuntimeEvidence(
|
||||
HasEvidence: true,
|
||||
RuntimePythonVersion: _pythonVersion,
|
||||
RuntimePlatform: _platform,
|
||||
LoadedModulesCount: _loadedModules.Count,
|
||||
LoadedPackages: [.. _loadedPackages.OrderBy(p => p, StringComparer.Ordinal)],
|
||||
LoadedModules: [.. _loadedModules.OrderBy(m => m, StringComparer.Ordinal)],
|
||||
PathHashes: _pathHashes.ToImmutableDictionary(StringComparer.Ordinal),
|
||||
RuntimeCapabilities: [.. _runtimeCapabilities.OrderBy(c => c, StringComparer.Ordinal)],
|
||||
Errors: [.. _errors]);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets all captured runtime events.
|
||||
/// </summary>
|
||||
public IReadOnlyList<PythonRuntimeEvent> Events => _events;
|
||||
|
||||
private void ParseInterpreterStart(JsonElement root)
|
||||
{
|
||||
_pythonVersion = root.TryGetProperty("python_version", out var version)
|
||||
? version.GetString()
|
||||
: null;
|
||||
|
||||
_platform = root.TryGetProperty("platform", out var platform)
|
||||
? platform.GetString()
|
||||
: null;
|
||||
|
||||
var timestamp = root.TryGetProperty("timestamp", out var ts)
|
||||
? ts.GetString() ?? GetUtcTimestamp()
|
||||
: GetUtcTimestamp();
|
||||
|
||||
_events.Add(new PythonRuntimeEvent(
|
||||
Kind: PythonRuntimeEventKind.InterpreterStart,
|
||||
Timestamp: timestamp,
|
||||
ModuleName: null,
|
||||
ModulePath: null,
|
||||
ModuleSpec: null,
|
||||
ParentModule: null,
|
||||
ErrorMessage: null,
|
||||
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
|
||||
ThreadId: null));
|
||||
}
|
||||
|
||||
private void ParseModuleImport(JsonElement root)
|
||||
{
|
||||
var moduleName = root.TryGetProperty("module", out var module)
|
||||
? module.GetString()
|
||||
: null;
|
||||
|
||||
var modulePath = root.TryGetProperty("path", out var path)
|
||||
? path.GetString()
|
||||
: null;
|
||||
|
||||
var timestamp = root.TryGetProperty("timestamp", out var ts)
|
||||
? ts.GetString() ?? GetUtcTimestamp()
|
||||
: GetUtcTimestamp();
|
||||
|
||||
if (!string.IsNullOrEmpty(moduleName))
|
||||
{
|
||||
_loadedModules.Add(moduleName);
|
||||
|
||||
// Extract top-level package
|
||||
var topLevelPackage = moduleName.Split('.')[0];
|
||||
_loadedPackages.Add(topLevelPackage);
|
||||
}
|
||||
|
||||
// Add path hash
|
||||
if (!string.IsNullOrEmpty(modulePath))
|
||||
{
|
||||
var (scrubbed, hash) = PythonPathHasher.ScrubAndHash(modulePath);
|
||||
if (!string.IsNullOrEmpty(hash))
|
||||
{
|
||||
_pathHashes.TryAdd(scrubbed, hash);
|
||||
}
|
||||
}
|
||||
|
||||
_events.Add(new PythonRuntimeEvent(
|
||||
Kind: PythonRuntimeEventKind.ModuleImport,
|
||||
Timestamp: timestamp,
|
||||
ModuleName: moduleName,
|
||||
ModulePath: PythonPathHasher.ScrubPath(modulePath),
|
||||
ModuleSpec: root.TryGetProperty("spec", out var spec) ? spec.GetString() : null,
|
||||
ParentModule: root.TryGetProperty("parent", out var parent) ? parent.GetString() : null,
|
||||
ErrorMessage: null,
|
||||
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
|
||||
ThreadId: root.TryGetProperty("tid", out var tid) ? tid.GetInt64().ToString() : null));
|
||||
}
|
||||
|
||||
private void ParseModuleError(JsonElement root)
|
||||
{
|
||||
var moduleName = root.TryGetProperty("module", out var module)
|
||||
? module.GetString()
|
||||
: null;
|
||||
|
||||
var errorMessage = root.TryGetProperty("error", out var error)
|
||||
? error.GetString()
|
||||
: null;
|
||||
|
||||
var modulePath = root.TryGetProperty("path", out var path)
|
||||
? path.GetString()
|
||||
: null;
|
||||
|
||||
var timestamp = root.TryGetProperty("timestamp", out var ts)
|
||||
? ts.GetString() ?? GetUtcTimestamp()
|
||||
: GetUtcTimestamp();
|
||||
|
||||
_events.Add(new PythonRuntimeEvent(
|
||||
Kind: PythonRuntimeEventKind.ModuleLoadError,
|
||||
Timestamp: timestamp,
|
||||
ModuleName: moduleName,
|
||||
ModulePath: PythonPathHasher.ScrubPath(modulePath),
|
||||
ModuleSpec: null,
|
||||
ParentModule: null,
|
||||
ErrorMessage: errorMessage,
|
||||
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
|
||||
ThreadId: null));
|
||||
|
||||
// Add to errors
|
||||
var scrubbed = PythonPathHasher.ScrubPath(modulePath);
|
||||
var hash = PythonPathHasher.HashPath(modulePath);
|
||||
_errors.Add(new PythonObservationRuntimeError(
|
||||
Timestamp: timestamp,
|
||||
Message: errorMessage ?? $"Failed to import {moduleName}",
|
||||
Path: scrubbed,
|
||||
PathSha256: string.IsNullOrEmpty(hash) ? null : hash));
|
||||
}
|
||||
|
||||
private void ParseNativeLoad(JsonElement root)
|
||||
{
|
||||
var moduleName = root.TryGetProperty("module", out var module)
|
||||
? module.GetString()
|
||||
: null;
|
||||
|
||||
var modulePath = root.TryGetProperty("path", out var path)
|
||||
? path.GetString()
|
||||
: null;
|
||||
|
||||
var timestamp = root.TryGetProperty("timestamp", out var ts)
|
||||
? ts.GetString() ?? GetUtcTimestamp()
|
||||
: GetUtcTimestamp();
|
||||
|
||||
if (!string.IsNullOrEmpty(moduleName))
|
||||
{
|
||||
_loadedModules.Add(moduleName);
|
||||
}
|
||||
|
||||
// Track native code capability
|
||||
_runtimeCapabilities.Add("native_code");
|
||||
|
||||
// Add path hash
|
||||
if (!string.IsNullOrEmpty(modulePath))
|
||||
{
|
||||
var (scrubbed, hash) = PythonPathHasher.ScrubAndHash(modulePath);
|
||||
if (!string.IsNullOrEmpty(hash))
|
||||
{
|
||||
_pathHashes.TryAdd(scrubbed, hash);
|
||||
}
|
||||
}
|
||||
|
||||
_events.Add(new PythonRuntimeEvent(
|
||||
Kind: PythonRuntimeEventKind.NativeExtensionLoad,
|
||||
Timestamp: timestamp,
|
||||
ModuleName: moduleName,
|
||||
ModulePath: PythonPathHasher.ScrubPath(modulePath),
|
||||
ModuleSpec: null,
|
||||
ParentModule: null,
|
||||
ErrorMessage: null,
|
||||
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
|
||||
ThreadId: null));
|
||||
}
|
||||
|
||||
private void ParseDynamicImport(JsonElement root)
|
||||
{
|
||||
var moduleName = root.TryGetProperty("module", out var module)
|
||||
? module.GetString()
|
||||
: null;
|
||||
|
||||
var timestamp = root.TryGetProperty("timestamp", out var ts)
|
||||
? ts.GetString() ?? GetUtcTimestamp()
|
||||
: GetUtcTimestamp();
|
||||
|
||||
if (!string.IsNullOrEmpty(moduleName))
|
||||
{
|
||||
_loadedModules.Add(moduleName);
|
||||
}
|
||||
|
||||
// Track dynamic import capability
|
||||
_runtimeCapabilities.Add("dynamic_import");
|
||||
|
||||
_events.Add(new PythonRuntimeEvent(
|
||||
Kind: PythonRuntimeEventKind.DynamicImport,
|
||||
Timestamp: timestamp,
|
||||
ModuleName: moduleName,
|
||||
ModulePath: null,
|
||||
ModuleSpec: null,
|
||||
ParentModule: null,
|
||||
ErrorMessage: null,
|
||||
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
|
||||
ThreadId: null));
|
||||
}
|
||||
|
||||
private void ParseProcessSpawn(JsonElement root)
|
||||
{
|
||||
var timestamp = root.TryGetProperty("timestamp", out var ts)
|
||||
? ts.GetString() ?? GetUtcTimestamp()
|
||||
: GetUtcTimestamp();
|
||||
|
||||
// Track process spawn capability
|
||||
_runtimeCapabilities.Add("process_spawn");
|
||||
|
||||
var spawnType = root.TryGetProperty("spawn_type", out var st)
|
||||
? st.GetString()
|
||||
: "unknown";
|
||||
|
||||
if (spawnType == "multiprocessing")
|
||||
{
|
||||
_runtimeCapabilities.Add("multiprocessing");
|
||||
}
|
||||
|
||||
_events.Add(new PythonRuntimeEvent(
|
||||
Kind: PythonRuntimeEventKind.ProcessSpawn,
|
||||
Timestamp: timestamp,
|
||||
ModuleName: null,
|
||||
ModulePath: null,
|
||||
ModuleSpec: spawnType,
|
||||
ParentModule: null,
|
||||
ErrorMessage: null,
|
||||
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
|
||||
ThreadId: null));
|
||||
}
|
||||
|
||||
private void ParsePathModification(JsonElement root)
|
||||
{
|
||||
var path = root.TryGetProperty("path", out var p)
|
||||
? p.GetString()
|
||||
: null;
|
||||
|
||||
var timestamp = root.TryGetProperty("timestamp", out var ts)
|
||||
? ts.GetString() ?? GetUtcTimestamp()
|
||||
: GetUtcTimestamp();
|
||||
|
||||
// Add path hash
|
||||
if (!string.IsNullOrEmpty(path))
|
||||
{
|
||||
var (scrubbed, hash) = PythonPathHasher.ScrubAndHash(path);
|
||||
if (!string.IsNullOrEmpty(hash))
|
||||
{
|
||||
_pathHashes.TryAdd(scrubbed, hash);
|
||||
}
|
||||
}
|
||||
|
||||
_events.Add(new PythonRuntimeEvent(
|
||||
Kind: PythonRuntimeEventKind.PathModification,
|
||||
Timestamp: timestamp,
|
||||
ModuleName: null,
|
||||
ModulePath: PythonPathHasher.ScrubPath(path),
|
||||
ModuleSpec: null,
|
||||
ParentModule: null,
|
||||
ErrorMessage: null,
|
||||
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
|
||||
ThreadId: null));
|
||||
}
|
||||
|
||||
private static string GetUtcTimestamp()
|
||||
{
|
||||
return DateTime.UtcNow.ToString("O");
|
||||
}
|
||||
}
|
||||
@@ -40,6 +40,9 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
// Detect startup hooks (sitecustomize.py, usercustomize.py, .pth files)
|
||||
var startupHooks = PythonStartupHookDetector.Detect(context.RootPath);
|
||||
|
||||
// Analyze zipapps in workspace and container layers
|
||||
var zipappAnalysis = PythonZipappAdapter.AnalyzeAll(context.RootPath);
|
||||
|
||||
// Collect dist-info directories from both root and container layers
|
||||
var distInfoDirectories = CollectDistInfoDirectories(context.RootPath);
|
||||
|
||||
@@ -91,6 +94,9 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
// Append startup hooks warnings
|
||||
AppendStartupHooksMetadata(metadata, startupHooks);
|
||||
|
||||
// Append zipapp analysis
|
||||
AppendZipappMetadata(metadata, zipappAnalysis);
|
||||
|
||||
// Collect evidence including startup hooks
|
||||
var evidence = distribution.SortedEvidence.ToList();
|
||||
evidence.AddRange(startupHooks.ToEvidence(context));
|
||||
@@ -242,6 +248,45 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
}
|
||||
}
|
||||
|
||||
private static void AppendZipappMetadata(List<KeyValuePair<string, string?>> metadata, PythonZipappAnalysis zipappAnalysis)
|
||||
{
|
||||
if (!zipappAnalysis.HasZipapps)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.detected", "true"));
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.count", zipappAnalysis.Zipapps.Count.ToString()));
|
||||
|
||||
// Add version information from zipapp shebangs
|
||||
var versions = zipappAnalysis.Zipapps
|
||||
.Where(z => z.PythonVersion != null)
|
||||
.Select(z => z.PythonVersion!)
|
||||
.Distinct()
|
||||
.OrderBy(v => v, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
if (versions.Length > 0)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.pythonVersions", string.Join(';', versions)));
|
||||
}
|
||||
|
||||
// Add warnings
|
||||
foreach (var warning in zipappAnalysis.Warnings)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>("zipapps.warning", warning));
|
||||
}
|
||||
|
||||
// Add individual zipapp warnings
|
||||
foreach (var zipapp in zipappAnalysis.Zipapps)
|
||||
{
|
||||
foreach (var warning in zipapp.Warnings)
|
||||
{
|
||||
metadata.Add(new KeyValuePair<string, string?>($"zipapps.{zipapp.FileName}.warning", warning));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyCollection<string> CollectDistInfoDirectories(string rootPath)
|
||||
{
|
||||
var directories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
|
||||
|
||||
Reference in New Issue
Block a user