up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-11-28 09:40:40 +02:00
parent 1c6730a1d2
commit 05da719048
206 changed files with 34741 additions and 1751 deletions

View File

@@ -0,0 +1,423 @@
using System.Collections.Frozen;
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
/// <summary>
/// Detects framework usage hints from Python source code.
/// </summary>
internal sealed partial class PythonFrameworkDetector
{
// File patterns that strongly indicate frameworks
private static readonly FrozenDictionary<string, (PythonFrameworkKind Kind, PythonFrameworkConfidence Confidence)> FilePatterns =
new Dictionary<string, (PythonFrameworkKind, PythonFrameworkConfidence)>(StringComparer.OrdinalIgnoreCase)
{
// Django
["manage.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
["settings.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.Medium),
["urls.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.Medium),
["wsgi.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.Medium),
["asgi.py"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.Medium),
// Celery
["celery.py"] = (PythonFrameworkKind.Celery, PythonFrameworkConfidence.High),
["tasks.py"] = (PythonFrameworkKind.Celery, PythonFrameworkConfidence.Low),
// Gunicorn
["gunicorn.conf.py"] = (PythonFrameworkKind.Gunicorn, PythonFrameworkConfidence.Definitive),
["gunicorn_config.py"] = (PythonFrameworkKind.Gunicorn, PythonFrameworkConfidence.Definitive),
// uWSGI
["uwsgi.ini"] = (PythonFrameworkKind.Uwsgi, PythonFrameworkConfidence.Definitive),
// Pytest
["conftest.py"] = (PythonFrameworkKind.Pytest, PythonFrameworkConfidence.High),
["pytest.ini"] = (PythonFrameworkKind.Pytest, PythonFrameworkConfidence.Definitive),
// Jupyter
["*.ipynb"] = (PythonFrameworkKind.Jupyter, PythonFrameworkConfidence.Definitive),
}.ToFrozenDictionary();
// Import patterns that indicate frameworks
private static readonly FrozenDictionary<string, (PythonFrameworkKind Kind, PythonFrameworkConfidence Confidence)> ImportPatterns =
new Dictionary<string, (PythonFrameworkKind, PythonFrameworkConfidence)>(StringComparer.Ordinal)
{
// Django
["django"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
["django.conf"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
["django.urls"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
["django.views"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
["django.db"] = (PythonFrameworkKind.Django, PythonFrameworkConfidence.High),
// Flask
["flask"] = (PythonFrameworkKind.Flask, PythonFrameworkConfidence.High),
["flask_restful"] = (PythonFrameworkKind.Flask, PythonFrameworkConfidence.High),
["flask_sqlalchemy"] = (PythonFrameworkKind.Flask, PythonFrameworkConfidence.High),
// FastAPI
["fastapi"] = (PythonFrameworkKind.FastAPI, PythonFrameworkConfidence.High),
["starlette"] = (PythonFrameworkKind.Starlette, PythonFrameworkConfidence.High),
// Celery
["celery"] = (PythonFrameworkKind.Celery, PythonFrameworkConfidence.High),
// RQ
["rq"] = (PythonFrameworkKind.RQ, PythonFrameworkConfidence.High),
// Click
["click"] = (PythonFrameworkKind.Click, PythonFrameworkConfidence.High),
// Typer
["typer"] = (PythonFrameworkKind.Typer, PythonFrameworkConfidence.High),
// Pytest
["pytest"] = (PythonFrameworkKind.Pytest, PythonFrameworkConfidence.High),
// Streamlit
["streamlit"] = (PythonFrameworkKind.Streamlit, PythonFrameworkConfidence.Definitive),
// Gradio
["gradio"] = (PythonFrameworkKind.Gradio, PythonFrameworkConfidence.Definitive),
// Pydantic Settings
["pydantic_settings"] = (PythonFrameworkKind.PydanticSettings, PythonFrameworkConfidence.Definitive),
}.ToFrozenDictionary();
// Django patterns
[GeneratedRegex(@"INSTALLED_APPS\s*=\s*\[", RegexOptions.Compiled)]
private static partial Regex DjangoInstalledAppsPattern();
[GeneratedRegex(@"MIDDLEWARE\s*=\s*\[", RegexOptions.Compiled)]
private static partial Regex DjangoMiddlewarePattern();
[GeneratedRegex(@"ROOT_URLCONF\s*=", RegexOptions.Compiled)]
private static partial Regex DjangoRootUrlConfPattern();
[GeneratedRegex(@"os\.environ\.setdefault\s*\(\s*[""']DJANGO_SETTINGS_MODULE[""']", RegexOptions.Compiled)]
private static partial Regex DjangoSettingsModulePattern();
// Flask patterns
[GeneratedRegex(@"Flask\s*\(\s*__name__", RegexOptions.Compiled)]
private static partial Regex FlaskAppPattern();
[GeneratedRegex(@"Blueprint\s*\(", RegexOptions.Compiled)]
private static partial Regex FlaskBlueprintPattern();
// FastAPI patterns
[GeneratedRegex(@"FastAPI\s*\(", RegexOptions.Compiled)]
private static partial Regex FastAPIAppPattern();
[GeneratedRegex(@"APIRouter\s*\(", RegexOptions.Compiled)]
private static partial Regex FastAPIRouterPattern();
// Celery patterns
[GeneratedRegex(@"Celery\s*\(", RegexOptions.Compiled)]
private static partial Regex CeleryAppPattern();
[GeneratedRegex(@"@\s*(?:app\.task|celery\.task|shared_task)", RegexOptions.Compiled)]
private static partial Regex CeleryTaskPattern();
// AWS Lambda patterns
[GeneratedRegex(@"def\s+(lambda_handler|handler)\s*\(\s*event\s*,\s*context\s*\)", RegexOptions.Compiled)]
private static partial Regex LambdaHandlerPattern();
[GeneratedRegex(@"def\s+\w+\s*\(\s*event\s*:\s*dict\s*,\s*context\s*:\s*LambdaContext", RegexOptions.Compiled)]
private static partial Regex LambdaTypedHandlerPattern();
// Click patterns
[GeneratedRegex(@"@\s*click\.command", RegexOptions.Compiled)]
private static partial Regex ClickCommandPattern();
[GeneratedRegex(@"@\s*click\.group", RegexOptions.Compiled)]
private static partial Regex ClickGroupPattern();
// Typer patterns
[GeneratedRegex(@"typer\.Typer\s*\(", RegexOptions.Compiled)]
private static partial Regex TyperAppPattern();
[GeneratedRegex(@"@\s*app\.command", RegexOptions.Compiled)]
private static partial Regex TyperCommandPattern();
// Logging patterns
[GeneratedRegex(@"logging\.config\.dictConfig", RegexOptions.Compiled)]
private static partial Regex LoggingDictConfigPattern();
[GeneratedRegex(@"logging\.config\.fileConfig", RegexOptions.Compiled)]
private static partial Regex LoggingFileConfigPattern();
[GeneratedRegex(@"LOGGING\s*=\s*\{", RegexOptions.Compiled)]
private static partial Regex DjangoLoggingPattern();
// Gunicorn patterns
[GeneratedRegex(@"bind\s*=\s*[""']", RegexOptions.Compiled)]
private static partial Regex GunicornBindPattern();
[GeneratedRegex(@"workers\s*=", RegexOptions.Compiled)]
private static partial Regex GunicornWorkersPattern();
/// <summary>
/// Detects framework hints from Python source code.
/// </summary>
public async Task<ImmutableArray<PythonFrameworkHint>> DetectAsync(
PythonVirtualFileSystem vfs,
CancellationToken cancellationToken = default)
{
var hints = new List<PythonFrameworkHint>();
// First pass: check file patterns
foreach (var file in vfs.Files)
{
cancellationToken.ThrowIfCancellationRequested();
var fileName = Path.GetFileName(file.VirtualPath);
if (FilePatterns.TryGetValue(fileName, out var fileHint))
{
hints.Add(new PythonFrameworkHint(
Kind: fileHint.Kind,
SourceFile: file.VirtualPath,
LineNumber: null,
Evidence: $"file pattern: {fileName}",
Confidence: fileHint.Confidence));
}
// Special case for Jupyter notebooks
if (file.VirtualPath.EndsWith(".ipynb", StringComparison.OrdinalIgnoreCase))
{
hints.Add(new PythonFrameworkHint(
Kind: PythonFrameworkKind.Jupyter,
SourceFile: file.VirtualPath,
LineNumber: null,
Evidence: "Jupyter notebook file",
Confidence: PythonFrameworkConfidence.Definitive));
}
}
// Second pass: scan Python files for patterns
var pythonFiles = vfs.Files
.Where(f => f.VirtualPath.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
.ToList();
foreach (var file in pythonFiles)
{
cancellationToken.ThrowIfCancellationRequested();
var fileHints = await DetectInFileAsync(vfs, file, cancellationToken).ConfigureAwait(false);
hints.AddRange(fileHints);
}
// Deduplicate and prioritize by confidence
return hints
.GroupBy(h => (h.Kind, h.SourceFile))
.Select(g => g.OrderByDescending(h => h.Confidence).First())
.ToImmutableArray();
}
private async Task<IEnumerable<PythonFrameworkHint>> DetectInFileAsync(
PythonVirtualFileSystem vfs,
PythonVirtualFile file,
CancellationToken cancellationToken)
{
var hints = new List<PythonFrameworkHint>();
try
{
using var stream = await vfs.OpenReadAsync(file.VirtualPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return hints;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
var lines = content.Split('\n');
for (var lineNum = 0; lineNum < lines.Length; lineNum++)
{
cancellationToken.ThrowIfCancellationRequested();
var line = lines[lineNum];
var trimmed = line.TrimStart();
// Skip comments
if (trimmed.StartsWith('#'))
{
continue;
}
// Check for imports
if (trimmed.StartsWith("import ", StringComparison.Ordinal) ||
trimmed.StartsWith("from ", StringComparison.Ordinal))
{
var importHints = DetectImportPatterns(trimmed, file.VirtualPath, lineNum + 1);
hints.AddRange(importHints);
}
// Django patterns
if (DjangoInstalledAppsPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Django, file.VirtualPath, lineNum + 1,
"INSTALLED_APPS configuration", PythonFrameworkConfidence.Definitive));
}
if (DjangoSettingsModulePattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Django, file.VirtualPath, lineNum + 1,
"DJANGO_SETTINGS_MODULE", PythonFrameworkConfidence.Definitive));
}
// Flask patterns
if (FlaskAppPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Flask, file.VirtualPath, lineNum + 1,
"Flask(__name__)", PythonFrameworkConfidence.Definitive));
}
if (FlaskBlueprintPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Flask, file.VirtualPath, lineNum + 1,
"Blueprint()", PythonFrameworkConfidence.High));
}
// FastAPI patterns
if (FastAPIAppPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.FastAPI, file.VirtualPath, lineNum + 1,
"FastAPI()", PythonFrameworkConfidence.Definitive));
}
if (FastAPIRouterPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.FastAPI, file.VirtualPath, lineNum + 1,
"APIRouter()", PythonFrameworkConfidence.High));
}
// Celery patterns
if (CeleryAppPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Celery, file.VirtualPath, lineNum + 1,
"Celery()", PythonFrameworkConfidence.Definitive));
}
if (CeleryTaskPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Celery, file.VirtualPath, lineNum + 1,
"@app.task decorator", PythonFrameworkConfidence.High));
}
// AWS Lambda patterns
if (LambdaHandlerPattern().IsMatch(line) || LambdaTypedHandlerPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.AwsLambda, file.VirtualPath, lineNum + 1,
"Lambda handler function", PythonFrameworkConfidence.High));
}
// Click patterns
if (ClickCommandPattern().IsMatch(line) || ClickGroupPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Click, file.VirtualPath, lineNum + 1,
"@click.command/group", PythonFrameworkConfidence.High));
}
// Typer patterns
if (TyperAppPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Typer, file.VirtualPath, lineNum + 1,
"typer.Typer()", PythonFrameworkConfidence.Definitive));
}
if (TyperCommandPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Typer, file.VirtualPath, lineNum + 1,
"@app.command", PythonFrameworkConfidence.High));
}
// Logging patterns
if (LoggingDictConfigPattern().IsMatch(line) || LoggingFileConfigPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.LoggingConfig, file.VirtualPath, lineNum + 1,
"logging.config", PythonFrameworkConfidence.High));
}
if (DjangoLoggingPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.LoggingConfig, file.VirtualPath, lineNum + 1,
"Django LOGGING dict", PythonFrameworkConfidence.High));
}
// Gunicorn patterns (in config files)
if (file.VirtualPath.Contains("gunicorn", StringComparison.OrdinalIgnoreCase))
{
if (GunicornBindPattern().IsMatch(line) || GunicornWorkersPattern().IsMatch(line))
{
hints.Add(CreateHint(PythonFrameworkKind.Gunicorn, file.VirtualPath, lineNum + 1,
"Gunicorn configuration", PythonFrameworkConfidence.Definitive));
}
}
}
}
catch (IOException)
{
// Skip unreadable files
}
return hints;
}
private static IEnumerable<PythonFrameworkHint> DetectImportPatterns(string line, string sourceFile, int lineNumber)
{
var trimmed = line.Trim();
string? moduleName = null;
if (trimmed.StartsWith("import ", StringComparison.Ordinal))
{
var parts = trimmed[7..].Split(',');
foreach (var part in parts)
{
moduleName = part.Trim().Split(new[] { " as ", " " }, StringSplitOptions.RemoveEmptyEntries)[0];
if (ImportPatterns.TryGetValue(moduleName, out var hint))
{
yield return CreateHint(hint.Kind, sourceFile, lineNumber,
$"import {moduleName}", hint.Confidence);
}
}
}
else if (trimmed.StartsWith("from ", StringComparison.Ordinal))
{
var parts = trimmed[5..].Split(new[] { " import " }, StringSplitOptions.RemoveEmptyEntries);
if (parts.Length > 0)
{
moduleName = parts[0].Trim();
// Check base module
var baseModule = moduleName.Split('.')[0];
if (ImportPatterns.TryGetValue(baseModule, out var hint))
{
yield return CreateHint(hint.Kind, sourceFile, lineNumber,
$"from {moduleName}", hint.Confidence);
}
// Check full module path
if (ImportPatterns.TryGetValue(moduleName, out hint))
{
yield return CreateHint(hint.Kind, sourceFile, lineNumber,
$"from {moduleName}", hint.Confidence);
}
}
}
}
private static PythonFrameworkHint CreateHint(
PythonFrameworkKind kind,
string sourceFile,
int lineNumber,
string evidence,
PythonFrameworkConfidence confidence)
{
return new PythonFrameworkHint(
Kind: kind,
SourceFile: sourceFile,
LineNumber: lineNumber,
Evidence: evidence,
Confidence: confidence);
}
}

View File

@@ -0,0 +1,151 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
/// <summary>
/// Represents a detected framework or configuration hint in a Python project.
/// These are hints/suggestions, not definitive detections.
/// </summary>
/// <param name="Kind">The type of framework or configuration.</param>
/// <param name="SourceFile">The file where this hint was detected.</param>
/// <param name="LineNumber">The line number (if available).</param>
/// <param name="Evidence">The code pattern that indicated this hint.</param>
/// <param name="Confidence">Confidence level for this detection.</param>
/// <param name="Metadata">Additional metadata about the detection.</param>
internal sealed record PythonFrameworkHint(
PythonFrameworkKind Kind,
string SourceFile,
int? LineNumber,
string Evidence,
PythonFrameworkConfidence Confidence,
ImmutableDictionary<string, string>? Metadata = null)
{
/// <summary>
/// Gets whether this is a web framework.
/// </summary>
public bool IsWebFramework => Kind is
PythonFrameworkKind.Django or
PythonFrameworkKind.Flask or
PythonFrameworkKind.FastAPI or
PythonFrameworkKind.Starlette or
PythonFrameworkKind.Tornado or
PythonFrameworkKind.Bottle or
PythonFrameworkKind.Pyramid;
/// <summary>
/// Gets whether this is a task queue.
/// </summary>
public bool IsTaskQueue => Kind is
PythonFrameworkKind.Celery or
PythonFrameworkKind.RQ or
PythonFrameworkKind.Huey or
PythonFrameworkKind.Dramatiq;
/// <summary>
/// Gets whether this is a serverless runtime.
/// </summary>
public bool IsServerless => Kind is
PythonFrameworkKind.AwsLambda or
PythonFrameworkKind.AzureFunctions or
PythonFrameworkKind.GoogleCloudFunctions;
/// <summary>
/// Gets whether this is a CLI framework.
/// </summary>
public bool IsCliFramework => Kind is
PythonFrameworkKind.Click or
PythonFrameworkKind.Typer or
PythonFrameworkKind.Argparse;
/// <summary>
/// Generates metadata entries for this hint.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
{
yield return new($"{prefix}.kind", Kind.ToString());
yield return new($"{prefix}.file", SourceFile);
if (LineNumber.HasValue)
{
yield return new($"{prefix}.line", LineNumber.Value.ToString());
}
yield return new($"{prefix}.evidence", Evidence);
yield return new($"{prefix}.confidence", Confidence.ToString());
if (IsWebFramework)
{
yield return new($"{prefix}.category", "WebFramework");
}
else if (IsTaskQueue)
{
yield return new($"{prefix}.category", "TaskQueue");
}
else if (IsServerless)
{
yield return new($"{prefix}.category", "Serverless");
}
else if (IsCliFramework)
{
yield return new($"{prefix}.category", "CLI");
}
if (Metadata is not null)
{
foreach (var (key, value) in Metadata)
{
yield return new($"{prefix}.{key}", value);
}
}
}
}
/// <summary>
/// Represents an AWS Lambda handler configuration.
/// </summary>
/// <param name="HandlerPath">The handler path (module.function).</param>
/// <param name="ModulePath">The module file path.</param>
/// <param name="FunctionName">The handler function name.</param>
/// <param name="Runtime">The detected Python runtime (if available).</param>
internal sealed record PythonLambdaHandler(
string HandlerPath,
string ModulePath,
string FunctionName,
string? Runtime = null);
/// <summary>
/// Represents a Django project configuration.
/// </summary>
/// <param name="SettingsModule">The settings module path.</param>
/// <param name="InstalledApps">List of installed apps.</param>
/// <param name="Middlewares">List of middleware classes.</param>
/// <param name="RootUrlConf">The root URL configuration module.</param>
internal sealed record PythonDjangoConfig(
string SettingsModule,
ImmutableArray<string> InstalledApps,
ImmutableArray<string> Middlewares,
string? RootUrlConf = null);
/// <summary>
/// Represents a Flask application configuration.
/// </summary>
/// <param name="AppVariable">The Flask app variable name.</param>
/// <param name="ModulePath">The module containing the app.</param>
/// <param name="Blueprints">Registered blueprints.</param>
internal sealed record PythonFlaskConfig(
string AppVariable,
string ModulePath,
ImmutableArray<string> Blueprints);
/// <summary>
/// Represents a Celery configuration.
/// </summary>
/// <param name="AppVariable">The Celery app variable name.</param>
/// <param name="ModulePath">The module containing the app.</param>
/// <param name="BrokerUrl">The broker URL pattern (if detected).</param>
/// <param name="Tasks">Discovered task modules.</param>
internal sealed record PythonCeleryConfig(
string AppVariable,
string ModulePath,
string? BrokerUrl,
ImmutableArray<string> Tasks);

View File

@@ -0,0 +1,186 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
/// <summary>
/// Types of Python frameworks and configurations detected.
/// </summary>
internal enum PythonFrameworkKind
{
/// <summary>
/// Unknown framework.
/// </summary>
Unknown,
// Web Frameworks
/// <summary>
/// Django web framework.
/// </summary>
Django,
/// <summary>
/// Flask web framework.
/// </summary>
Flask,
/// <summary>
/// FastAPI async web framework.
/// </summary>
FastAPI,
/// <summary>
/// Starlette ASGI framework.
/// </summary>
Starlette,
/// <summary>
/// Tornado async web framework.
/// </summary>
Tornado,
/// <summary>
/// Bottle micro framework.
/// </summary>
Bottle,
/// <summary>
/// Pyramid web framework.
/// </summary>
Pyramid,
// Task Queues
/// <summary>
/// Celery distributed task queue.
/// </summary>
Celery,
/// <summary>
/// RQ (Redis Queue) task queue.
/// </summary>
RQ,
/// <summary>
/// Huey task queue.
/// </summary>
Huey,
/// <summary>
/// Dramatiq task queue.
/// </summary>
Dramatiq,
// Serverless
/// <summary>
/// AWS Lambda handler.
/// </summary>
AwsLambda,
/// <summary>
/// Azure Functions.
/// </summary>
AzureFunctions,
/// <summary>
/// Google Cloud Functions.
/// </summary>
GoogleCloudFunctions,
// Application Servers
/// <summary>
/// Gunicorn WSGI server.
/// </summary>
Gunicorn,
/// <summary>
/// uWSGI server.
/// </summary>
Uwsgi,
/// <summary>
/// Uvicorn ASGI server.
/// </summary>
Uvicorn,
/// <summary>
/// Hypercorn ASGI server.
/// </summary>
Hypercorn,
// CLI Frameworks
/// <summary>
/// Click CLI framework.
/// </summary>
Click,
/// <summary>
/// Typer CLI framework (Click-based).
/// </summary>
Typer,
/// <summary>
/// Argparse standard library CLI.
/// </summary>
Argparse,
// Testing Frameworks
/// <summary>
/// Pytest testing framework.
/// </summary>
Pytest,
/// <summary>
/// Unittest standard library.
/// </summary>
Unittest,
// Data/ML Frameworks
/// <summary>
/// Jupyter notebook.
/// </summary>
Jupyter,
/// <summary>
/// Streamlit data app.
/// </summary>
Streamlit,
/// <summary>
/// Gradio ML demo.
/// </summary>
Gradio,
// Configuration
/// <summary>
/// Python logging configuration.
/// </summary>
LoggingConfig,
/// <summary>
/// Pydantic settings configuration.
/// </summary>
PydanticSettings
}
/// <summary>
/// Confidence level for framework detection.
/// </summary>
internal enum PythonFrameworkConfidence
{
/// <summary>
/// Low confidence - heuristic match based on file patterns.
/// </summary>
Low = 0,
/// <summary>
/// Medium confidence - import detected but usage unclear.
/// </summary>
Medium = 1,
/// <summary>
/// High confidence - clear usage pattern detected.
/// </summary>
High = 2,
/// <summary>
/// Definitive - explicit configuration or initialization found.
/// </summary>
Definitive = 3
}

View File

@@ -0,0 +1,327 @@
using System.Collections.Immutable;
using System.Text.RegularExpressions;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
/// <summary>
/// Parses Python project configuration files (pyproject.toml, setup.cfg, setup.py).
/// </summary>
internal sealed partial class PythonProjectConfigParser
{
// pyproject.toml patterns
[GeneratedRegex(@"^\[project\]", RegexOptions.Compiled | RegexOptions.Multiline)]
private static partial Regex PyprojectProjectSection();
[GeneratedRegex(@"^\[project\.optional-dependencies\]", RegexOptions.Compiled | RegexOptions.Multiline)]
private static partial Regex PyprojectOptionalDepsSection();
[GeneratedRegex(@"^\[tool\.poetry\.extras\]", RegexOptions.Compiled | RegexOptions.Multiline)]
private static partial Regex PoetryExtrasSection();
[GeneratedRegex(@"^\[tool\.poetry\.group\.(\w+)\.dependencies\]", RegexOptions.Compiled | RegexOptions.Multiline)]
private static partial Regex PoetryGroupSection();
// Pattern to extract key = value or key = [...] lines
[GeneratedRegex(@"^(\w+)\s*=\s*\[(.*?)\]", RegexOptions.Compiled | RegexOptions.Singleline)]
private static partial Regex ArrayValuePattern();
[GeneratedRegex(@"^name\s*=\s*[""']([^""']+)[""']", RegexOptions.Compiled | RegexOptions.Multiline)]
private static partial Regex ProjectNamePattern();
[GeneratedRegex(@"^version\s*=\s*[""']([^""']+)[""']", RegexOptions.Compiled | RegexOptions.Multiline)]
private static partial Regex ProjectVersionPattern();
/// <summary>
/// Parses pyproject.toml and extracts optional dependencies.
/// </summary>
public async Task<PythonProjectConfig?> ParsePyprojectAsync(
PythonVirtualFileSystem vfs,
string pyprojectPath,
CancellationToken cancellationToken = default)
{
using var stream = await vfs.OpenReadAsync(pyprojectPath, cancellationToken).ConfigureAwait(false);
if (stream is null)
{
return null;
}
using var reader = new StreamReader(stream);
var content = await reader.ReadToEndAsync(cancellationToken).ConfigureAwait(false);
return ParsePyprojectContent(content, pyprojectPath);
}
private static PythonProjectConfig ParsePyprojectContent(string content, string filePath)
{
string? projectName = null;
string? projectVersion = null;
var optionalDependencies = new Dictionary<string, ImmutableArray<string>>();
var scripts = new Dictionary<string, string>();
var extras = new List<string>();
// Extract project name and version
var nameMatch = ProjectNamePattern().Match(content);
if (nameMatch.Success)
{
projectName = nameMatch.Groups[1].Value;
}
var versionMatch = ProjectVersionPattern().Match(content);
if (versionMatch.Success)
{
projectVersion = versionMatch.Groups[1].Value;
}
// Parse optional dependencies section
var optDepsMatch = PyprojectOptionalDepsSection().Match(content);
if (optDepsMatch.Success)
{
var sectionStart = optDepsMatch.Index + optDepsMatch.Length;
var sectionContent = ExtractSectionContent(content, sectionStart);
optionalDependencies = ParseOptionalDependencies(sectionContent);
extras.AddRange(optionalDependencies.Keys);
}
// Parse Poetry extras section
var poetryExtrasMatch = PoetryExtrasSection().Match(content);
if (poetryExtrasMatch.Success)
{
var sectionStart = poetryExtrasMatch.Index + poetryExtrasMatch.Length;
var sectionContent = ExtractSectionContent(content, sectionStart);
var poetryExtras = ParseOptionalDependencies(sectionContent);
foreach (var (key, value) in poetryExtras)
{
if (!optionalDependencies.ContainsKey(key))
{
optionalDependencies[key] = value;
extras.Add(key);
}
}
}
// Parse Poetry group dependencies
foreach (Match groupMatch in PoetryGroupSection().Matches(content))
{
var groupName = groupMatch.Groups[1].Value;
if (!extras.Contains(groupName))
{
extras.Add(groupName);
}
}
// Parse scripts section
scripts = ParseScriptsSection(content);
return new PythonProjectConfig(
FilePath: filePath,
ProjectName: projectName,
ProjectVersion: projectVersion,
OptionalDependencies: optionalDependencies.ToImmutableDictionary(),
Extras: extras.Distinct().ToImmutableArray(),
Scripts: scripts.ToImmutableDictionary());
}
private static string ExtractSectionContent(string content, int startIndex)
{
// Find the next section header or end of file
var nextSection = content.IndexOf("\n[", startIndex, StringComparison.Ordinal);
if (nextSection < 0)
{
return content[startIndex..];
}
return content[startIndex..nextSection];
}
private static Dictionary<string, ImmutableArray<string>> ParseOptionalDependencies(string sectionContent)
{
var result = new Dictionary<string, ImmutableArray<string>>();
var lines = sectionContent.Split('\n');
string? currentKey = null;
var currentValues = new List<string>();
var inArray = false;
foreach (var line in lines)
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
{
continue;
}
// Check for new key
if (!inArray && trimmed.Contains('='))
{
// Save previous key
if (currentKey is not null)
{
result[currentKey] = currentValues.ToImmutableArray();
currentValues = [];
}
var parts = trimmed.Split('=', 2);
currentKey = parts[0].Trim();
var value = parts.Length > 1 ? parts[1].Trim() : "";
if (value.StartsWith('['))
{
if (value.EndsWith(']'))
{
// Single-line array
currentValues = ParseArrayValues(value);
}
else
{
// Multi-line array
inArray = true;
currentValues = ParseArrayValues(value);
}
}
}
else if (inArray)
{
if (trimmed.EndsWith(']'))
{
currentValues.AddRange(ParseArrayValues(trimmed));
inArray = false;
}
else
{
currentValues.AddRange(ParseArrayValues(trimmed));
}
}
}
// Save last key
if (currentKey is not null)
{
result[currentKey] = currentValues.ToImmutableArray();
}
return result;
}
private static List<string> ParseArrayValues(string value)
{
var result = new List<string>();
var cleaned = value.Trim('[', ']', ' ', '\t');
if (string.IsNullOrEmpty(cleaned))
{
return result;
}
// Split by comma, handling quoted strings
var parts = cleaned.Split(',');
foreach (var part in parts)
{
var trimmed = part.Trim().Trim('"', '\'', ' ');
if (!string.IsNullOrEmpty(trimmed))
{
result.Add(trimmed);
}
}
return result;
}
private static Dictionary<string, string> ParseScriptsSection(string content)
{
var result = new Dictionary<string, string>();
// Look for [project.scripts] or [tool.poetry.scripts]
var scriptsPatterns = new[]
{
@"\[project\.scripts\]",
@"\[tool\.poetry\.scripts\]"
};
foreach (var pattern in scriptsPatterns)
{
var match = Regex.Match(content, pattern);
if (match.Success)
{
var sectionStart = match.Index + match.Length;
var sectionContent = ExtractSectionContent(content, sectionStart);
var scripts = ParseKeyValueSection(sectionContent);
foreach (var (key, value) in scripts)
{
result[key] = value;
}
}
}
return result;
}
private static Dictionary<string, string> ParseKeyValueSection(string sectionContent)
{
var result = new Dictionary<string, string>();
var lines = sectionContent.Split('\n');
foreach (var line in lines)
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#'))
{
continue;
}
if (trimmed.Contains('='))
{
var parts = trimmed.Split('=', 2);
var key = parts[0].Trim();
var value = parts.Length > 1 ? parts[1].Trim().Trim('"', '\'') : "";
result[key] = value;
}
}
return result;
}
}
/// <summary>
/// Represents parsed Python project configuration.
/// </summary>
/// <param name="FilePath">Path to the configuration file.</param>
/// <param name="ProjectName">The project name.</param>
/// <param name="ProjectVersion">The project version.</param>
/// <param name="OptionalDependencies">Optional dependencies by group name.</param>
/// <param name="Extras">List of available extras.</param>
/// <param name="Scripts">Entry point scripts.</param>
internal sealed record PythonProjectConfig(
string FilePath,
string? ProjectName,
string? ProjectVersion,
ImmutableDictionary<string, ImmutableArray<string>> OptionalDependencies,
ImmutableArray<string> Extras,
ImmutableDictionary<string, string> Scripts)
{
/// <summary>
/// Generates metadata entries for this configuration.
/// </summary>
public IEnumerable<KeyValuePair<string, string?>> ToMetadata(string prefix)
{
yield return new($"{prefix}.path", FilePath);
if (ProjectName is not null)
{
yield return new($"{prefix}.name", ProjectName);
}
if (ProjectVersion is not null)
{
yield return new($"{prefix}.version", ProjectVersion);
}
if (Extras.Length > 0)
{
yield return new($"{prefix}.extras", string.Join(",", Extras));
}
if (Scripts.Count > 0)
{
yield return new($"{prefix}.scripts", string.Join(",", Scripts.Keys));
}
}
}

View File

@@ -0,0 +1,395 @@
using System.Collections.Immutable;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Capabilities;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Entrypoints;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Framework;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Imports;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Packaging;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.VirtualFileSystem;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Observations;
/// <summary>
/// Builds AOC-compliant observation documents from analysis results.
/// </summary>
internal sealed class PythonObservationBuilder
{
private const string SchemaVersion = "python-aoc-v1";
private readonly List<PythonObservationPackage> _packages = [];
private readonly List<PythonObservationModule> _modules = [];
private readonly List<PythonObservationEntrypoint> _entrypoints = [];
private readonly List<PythonObservationDependencyEdge> _dependencyEdges = [];
private readonly List<PythonObservationImportEdge> _importEdges = [];
private readonly List<PythonObservationNativeExtension> _nativeExtensions = [];
private readonly List<PythonObservationFrameworkHint> _frameworks = [];
private readonly List<PythonObservationWarning> _warnings = [];
private readonly List<string> _securitySensitiveCapabilities = [];
private PythonObservationEnvironment? _environment;
private PythonObservationRuntimeEvidence? _runtimeEvidence;
private bool _usesProcessExecution;
private bool _usesNetworkAccess;
private bool _usesFileSystem;
private bool _usesCodeExecution;
private bool _usesDeserialization;
private bool _usesNativeCode;
private bool _usesAsyncAwait;
private bool _usesMultiprocessing;
/// <summary>
/// Adds packages from package discovery results.
/// </summary>
public PythonObservationBuilder AddPackages(IEnumerable<PythonPackageInfo> packages)
{
foreach (var pkg in packages)
{
_packages.Add(new PythonObservationPackage(
Name: pkg.Name,
Version: pkg.Version ?? "unknown",
Source: pkg.Kind.ToString(),
Platform: null,
IsDirect: pkg.IsDirectDependency,
InstallerKind: pkg.InstallerTool,
DistInfoPath: pkg.MetadataPath,
Groups: pkg.Extras,
Extras: pkg.Extras));
// Add dependency edges
foreach (var dep in pkg.Dependencies)
{
_dependencyEdges.Add(new PythonObservationDependencyEdge(
FromPackage: pkg.Name,
ToPackage: ExtractPackageName(dep),
VersionConstraint: ExtractVersionConstraint(dep),
Extra: null,
IsOptional: false));
}
}
return this;
}
/// <summary>
/// Adds modules from import graph analysis.
/// </summary>
public PythonObservationBuilder AddModules(
IEnumerable<PythonModuleNode> moduleNodes,
PythonImportGraph? importGraph = null)
{
foreach (var node in moduleNodes)
{
var imports = importGraph?.GetImportsForFile(node.VirtualPath ?? "")
.SelectMany(i => i.ImportedNames)
.ToImmutableArray() ?? ImmutableArray<string>.Empty;
_modules.Add(new PythonObservationModule(
Name: node.ModulePath,
Type: node.IsPackage ? "package" : "module",
FilePath: node.VirtualPath ?? "",
Line: null,
IsNamespacePackage: node.IsNamespacePackage,
ParentPackage: ExtractParentPackage(node.ModulePath),
Imports: imports));
}
return this;
}
/// <summary>
/// Adds import edges from the import graph.
/// </summary>
public PythonObservationBuilder AddImportEdges(IEnumerable<PythonImportEdge> edges)
{
foreach (var edge in edges)
{
_importEdges.Add(new PythonObservationImportEdge(
FromModule: edge.From,
ToModule: edge.To,
Kind: MapImportKind(edge.Import.Kind),
Confidence: MapImportConfidence(edge.Import.Confidence),
ResolvedPath: null,
SourceFile: edge.Import.SourceFile,
Line: edge.Import.LineNumber ?? 0,
ResolverTrace: ImmutableArray<string>.Empty));
}
return this;
}
/// <summary>
/// Adds entrypoints from entrypoint discovery.
/// </summary>
public PythonObservationBuilder AddEntrypoints(IEnumerable<PythonEntrypoint> entrypoints)
{
foreach (var ep in entrypoints)
{
_entrypoints.Add(new PythonObservationEntrypoint(
Path: ep.VirtualPath ?? ep.Target,
Type: ep.Kind.ToString(),
Handler: ep.Callable,
RequiredPackages: ImmutableArray<string>.Empty,
InvocationContext: ep.InvocationContext.InvocationType.ToString()));
}
return this;
}
/// <summary>
/// Adds capabilities from capability detection.
/// </summary>
public PythonObservationBuilder AddCapabilities(IEnumerable<PythonCapability> capabilities)
{
foreach (var cap in capabilities)
{
switch (cap.Kind)
{
case PythonCapabilityKind.ProcessExecution:
_usesProcessExecution = true;
break;
case PythonCapabilityKind.NetworkAccess:
_usesNetworkAccess = true;
break;
case PythonCapabilityKind.FileSystemAccess:
_usesFileSystem = true;
break;
case PythonCapabilityKind.CodeExecution:
_usesCodeExecution = true;
break;
case PythonCapabilityKind.Deserialization:
_usesDeserialization = true;
break;
case PythonCapabilityKind.Ctypes or PythonCapabilityKind.Cffi or PythonCapabilityKind.NativeCodeExecution:
_usesNativeCode = true;
break;
case PythonCapabilityKind.AsyncAwait:
_usesAsyncAwait = true;
break;
case PythonCapabilityKind.Multiprocessing:
_usesMultiprocessing = true;
break;
}
if (cap.IsSecuritySensitive && !_securitySensitiveCapabilities.Contains(cap.Kind.ToString()))
{
_securitySensitiveCapabilities.Add(cap.Kind.ToString());
}
}
return this;
}
/// <summary>
/// Adds native extensions from extension scanning.
/// </summary>
public PythonObservationBuilder AddNativeExtensions(IEnumerable<PythonNativeExtension> extensions)
{
foreach (var ext in extensions)
{
_nativeExtensions.Add(new PythonObservationNativeExtension(
ModuleName: ext.ModuleName,
Path: ext.Path,
Kind: ext.Kind.ToString(),
Platform: ext.Platform,
Architecture: ext.Architecture,
PackageName: ext.PackageName));
_usesNativeCode = true;
}
return this;
}
/// <summary>
/// Adds framework hints from framework detection.
/// </summary>
public PythonObservationBuilder AddFrameworkHints(IEnumerable<PythonFrameworkHint> hints)
{
foreach (var hint in hints)
{
string? category = null;
if (hint.IsWebFramework) category = "WebFramework";
else if (hint.IsTaskQueue) category = "TaskQueue";
else if (hint.IsServerless) category = "Serverless";
else if (hint.IsCliFramework) category = "CLI";
_frameworks.Add(new PythonObservationFrameworkHint(
Kind: hint.Kind.ToString(),
SourceFile: hint.SourceFile,
Line: hint.LineNumber,
Evidence: hint.Evidence,
Confidence: MapConfidence(hint.Confidence),
Category: category));
}
return this;
}
/// <summary>
/// Sets environment information.
/// </summary>
public PythonObservationBuilder SetEnvironment(
string? pythonVersion,
IEnumerable<string>? sitePackagesPaths = null,
IEnumerable<string>? requirementsFiles = null,
IEnumerable<string>? pyprojectFiles = null,
string? virtualenvPath = null,
string? condaPrefix = null,
bool isContainer = false)
{
_environment = new PythonObservationEnvironment(
PythonVersion: pythonVersion,
SitePackagesPaths: sitePackagesPaths?.ToImmutableArray() ?? ImmutableArray<string>.Empty,
VersionSources: ImmutableArray<PythonObservationVersionSource>.Empty,
RequirementsFiles: requirementsFiles?.ToImmutableArray() ?? ImmutableArray<string>.Empty,
PyprojectFiles: pyprojectFiles?.ToImmutableArray() ?? ImmutableArray<string>.Empty,
VirtualenvPath: virtualenvPath,
CondaPrefix: condaPrefix,
IsContainer: isContainer);
return this;
}
/// <summary>
/// Adds a warning.
/// </summary>
public PythonObservationBuilder AddWarning(
string code,
string message,
string? filePath = null,
int? line = null,
string severity = "warning")
{
_warnings.Add(new PythonObservationWarning(
Code: code,
Message: message,
FilePath: filePath,
Line: line,
Severity: severity));
return this;
}
/// <summary>
/// Sets runtime evidence from optional runtime analysis.
/// </summary>
public PythonObservationBuilder SetRuntimeEvidence(PythonObservationRuntimeEvidence evidence)
{
_runtimeEvidence = evidence;
return this;
}
/// <summary>
/// Builds the final observation document.
/// </summary>
public PythonObservationDocument Build()
{
var detectedFrameworks = _frameworks
.Select(f => f.Kind)
.Distinct()
.ToImmutableArray();
return new PythonObservationDocument(
Schema: SchemaVersion,
Packages: _packages.ToImmutableArray(),
Modules: _modules.ToImmutableArray(),
Entrypoints: _entrypoints.ToImmutableArray(),
DependencyEdges: _dependencyEdges.ToImmutableArray(),
ImportEdges: _importEdges.ToImmutableArray(),
NativeExtensions: _nativeExtensions.ToImmutableArray(),
Frameworks: _frameworks.ToImmutableArray(),
Warnings: _warnings.ToImmutableArray(),
Environment: _environment ?? new PythonObservationEnvironment(
PythonVersion: null,
SitePackagesPaths: ImmutableArray<string>.Empty,
VersionSources: ImmutableArray<PythonObservationVersionSource>.Empty,
RequirementsFiles: ImmutableArray<string>.Empty,
PyprojectFiles: ImmutableArray<string>.Empty,
VirtualenvPath: null,
CondaPrefix: null,
IsContainer: false),
Capabilities: new PythonObservationCapabilitySummary(
UsesProcessExecution: _usesProcessExecution,
UsesNetworkAccess: _usesNetworkAccess,
UsesFileSystem: _usesFileSystem,
UsesCodeExecution: _usesCodeExecution,
UsesDeserialization: _usesDeserialization,
UsesNativeCode: _usesNativeCode,
UsesAsyncAwait: _usesAsyncAwait,
UsesMultiprocessing: _usesMultiprocessing,
DetectedFrameworks: detectedFrameworks,
SecuritySensitiveCapabilities: _securitySensitiveCapabilities.ToImmutableArray()),
RuntimeEvidence: _runtimeEvidence);
}
private static PythonObservationImportKind MapImportKind(PythonImportKind kind)
{
return kind switch
{
PythonImportKind.Import => PythonObservationImportKind.Import,
PythonImportKind.FromImport => PythonObservationImportKind.FromImport,
PythonImportKind.RelativeImport => PythonObservationImportKind.RelativeImport,
PythonImportKind.ImportlibImportModule => PythonObservationImportKind.DynamicImport,
PythonImportKind.BuiltinImport => PythonObservationImportKind.DynamicImport,
_ => PythonObservationImportKind.Import
};
}
private static PythonObservationConfidence MapImportConfidence(PythonImportConfidence confidence)
{
return confidence switch
{
PythonImportConfidence.Low => PythonObservationConfidence.Low,
PythonImportConfidence.Medium => PythonObservationConfidence.Medium,
PythonImportConfidence.High => PythonObservationConfidence.High,
PythonImportConfidence.Definitive => PythonObservationConfidence.Definitive,
_ => PythonObservationConfidence.Medium
};
}
private static PythonObservationConfidence MapConfidence(PythonFrameworkConfidence confidence)
{
return confidence switch
{
PythonFrameworkConfidence.Low => PythonObservationConfidence.Low,
PythonFrameworkConfidence.Medium => PythonObservationConfidence.Medium,
PythonFrameworkConfidence.High => PythonObservationConfidence.High,
PythonFrameworkConfidence.Definitive => PythonObservationConfidence.Definitive,
_ => PythonObservationConfidence.Medium
};
}
private static string ExtractPackageName(string dependency)
{
// Extract package name from dependency spec like "requests>=2.0" or "numpy[extra]"
var name = dependency;
var bracketIdx = name.IndexOf('[');
if (bracketIdx > 0) name = name[..bracketIdx];
foreach (var op in new[] { ">=", "<=", "==", "!=", ">", "<", "~=", "^" })
{
var opIdx = name.IndexOf(op, StringComparison.Ordinal);
if (opIdx > 0) name = name[..opIdx];
}
return name.Trim();
}
private static string? ExtractVersionConstraint(string dependency)
{
foreach (var op in new[] { ">=", "<=", "==", "!=", ">", "<", "~=", "^" })
{
var opIdx = dependency.IndexOf(op, StringComparison.Ordinal);
if (opIdx > 0) return dependency[opIdx..].Trim();
}
return null;
}
private static string? ExtractParentPackage(string moduleName)
{
var lastDot = moduleName.LastIndexOf('.');
return lastDot > 0 ? moduleName[..lastDot] : null;
}
}

View File

@@ -0,0 +1,231 @@
using System.Collections.Immutable;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Observations;
/// <summary>
/// AOC-compliant observation document for Python project analysis.
/// Contains packages, modules, entrypoints, dependency edges, capabilities, and warnings.
/// </summary>
internal sealed record PythonObservationDocument(
string Schema,
ImmutableArray<PythonObservationPackage> Packages,
ImmutableArray<PythonObservationModule> Modules,
ImmutableArray<PythonObservationEntrypoint> Entrypoints,
ImmutableArray<PythonObservationDependencyEdge> DependencyEdges,
ImmutableArray<PythonObservationImportEdge> ImportEdges,
ImmutableArray<PythonObservationNativeExtension> NativeExtensions,
ImmutableArray<PythonObservationFrameworkHint> Frameworks,
ImmutableArray<PythonObservationWarning> Warnings,
PythonObservationEnvironment Environment,
PythonObservationCapabilitySummary Capabilities,
PythonObservationRuntimeEvidence? RuntimeEvidence = null);
/// <summary>
/// Python package detected in the project (from pip, conda, or other package managers).
/// </summary>
internal sealed record PythonObservationPackage(
string Name,
string Version,
string Source,
string? Platform,
bool IsDirect,
string? InstallerKind,
string? DistInfoPath,
ImmutableArray<string> Groups,
ImmutableArray<string> Extras);
/// <summary>
/// Python module or package detected in the project.
/// </summary>
internal sealed record PythonObservationModule(
string Name,
string Type,
string FilePath,
int? Line,
bool IsNamespacePackage,
string? ParentPackage,
ImmutableArray<string> Imports);
/// <summary>
/// Entrypoint detected in the Python project.
/// </summary>
internal sealed record PythonObservationEntrypoint(
string Path,
string Type,
string? Handler,
ImmutableArray<string> RequiredPackages,
string? InvocationContext);
/// <summary>
/// Package dependency edge (declared in requirements or pyproject).
/// </summary>
internal sealed record PythonObservationDependencyEdge(
string FromPackage,
string ToPackage,
string? VersionConstraint,
string? Extra,
bool IsOptional);
/// <summary>
/// Import edge between modules with reason codes and confidence.
/// </summary>
internal sealed record PythonObservationImportEdge(
string FromModule,
string ToModule,
PythonObservationImportKind Kind,
PythonObservationConfidence Confidence,
string? ResolvedPath,
string SourceFile,
int Line,
ImmutableArray<string> ResolverTrace);
/// <summary>
/// Import edge types.
/// </summary>
internal enum PythonObservationImportKind
{
/// <summary>Standard import statement.</summary>
Import,
/// <summary>From X import Y statement.</summary>
FromImport,
/// <summary>Relative import within package.</summary>
RelativeImport,
/// <summary>Dynamic import via importlib.</summary>
DynamicImport,
/// <summary>Namespace package implicit import.</summary>
NamespacePackage,
/// <summary>Native extension load.</summary>
NativeExtension,
/// <summary>Heuristic/hint-based import (not definitively resolved).</summary>
Hint
}
/// <summary>
/// Confidence level for observations.
/// </summary>
internal enum PythonObservationConfidence
{
/// <summary>Low confidence - heuristic match.</summary>
Low = 0,
/// <summary>Medium confidence - likely correct.</summary>
Medium = 1,
/// <summary>High confidence - clear evidence.</summary>
High = 2,
/// <summary>Definitive - direct evidence found.</summary>
Definitive = 3
}
/// <summary>
/// Native extension detected in the project.
/// </summary>
internal sealed record PythonObservationNativeExtension(
string ModuleName,
string Path,
string Kind,
string? Platform,
string? Architecture,
string? PackageName);
/// <summary>
/// Framework hint detected in the project.
/// </summary>
internal sealed record PythonObservationFrameworkHint(
string Kind,
string SourceFile,
int? Line,
string Evidence,
PythonObservationConfidence Confidence,
string? Category);
/// <summary>
/// Analysis warning generated during scanning.
/// </summary>
internal sealed record PythonObservationWarning(
string Code,
string Message,
string? FilePath,
int? Line,
string Severity);
/// <summary>
/// Environment profile with Python version, package manager settings, and paths.
/// </summary>
internal sealed record PythonObservationEnvironment(
string? PythonVersion,
ImmutableArray<string> SitePackagesPaths,
ImmutableArray<PythonObservationVersionSource> VersionSources,
ImmutableArray<string> RequirementsFiles,
ImmutableArray<string> PyprojectFiles,
string? VirtualenvPath,
string? CondaPrefix,
bool IsContainer);
/// <summary>
/// Python version source with provenance.
/// </summary>
internal sealed record PythonObservationVersionSource(
string? Version,
string Source,
string SourceType);
/// <summary>
/// Capability summary for the Python project.
/// </summary>
internal sealed record PythonObservationCapabilitySummary(
bool UsesProcessExecution,
bool UsesNetworkAccess,
bool UsesFileSystem,
bool UsesCodeExecution,
bool UsesDeserialization,
bool UsesNativeCode,
bool UsesAsyncAwait,
bool UsesMultiprocessing,
ImmutableArray<string> DetectedFrameworks,
ImmutableArray<string> SecuritySensitiveCapabilities);
/// <summary>
/// Optional runtime evidence section for Python.
/// </summary>
internal sealed record PythonObservationRuntimeEvidence(
bool HasEvidence,
string? RuntimePythonVersion,
string? RuntimePlatform,
int LoadedModulesCount,
ImmutableArray<string> LoadedPackages,
ImmutableArray<string> LoadedModules,
ImmutableDictionary<string, string> PathHashes,
ImmutableArray<string> RuntimeCapabilities,
ImmutableArray<PythonObservationRuntimeError> Errors)
{
/// <summary>
/// Empty runtime evidence instance.
/// </summary>
public static PythonObservationRuntimeEvidence Empty { get; } = new(
HasEvidence: false,
RuntimePythonVersion: null,
RuntimePlatform: null,
LoadedModulesCount: 0,
LoadedPackages: ImmutableArray<string>.Empty,
LoadedModules: ImmutableArray<string>.Empty,
PathHashes: ImmutableDictionary<string, string>.Empty,
RuntimeCapabilities: ImmutableArray<string>.Empty,
Errors: ImmutableArray<PythonObservationRuntimeError>.Empty);
}
/// <summary>
/// Runtime error captured during execution.
/// </summary>
internal sealed record PythonObservationRuntimeError(
string Timestamp,
string Message,
string? Path,
string? PathSha256);

View File

@@ -0,0 +1,73 @@
using System.Text.Json;
using System.Text.Json.Serialization;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.Observations;
/// <summary>
/// Serializes Python observation documents to JSON.
/// </summary>
internal static class PythonObservationSerializer
{
private static readonly JsonSerializerOptions SerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = true,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
Converters =
{
new JsonStringEnumConverter(JsonNamingPolicy.CamelCase)
}
};
private static readonly JsonSerializerOptions CompactSerializerOptions = new()
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
WriteIndented = false,
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
Converters =
{
new JsonStringEnumConverter(JsonNamingPolicy.CamelCase)
}
};
/// <summary>
/// Serializes the observation document to JSON.
/// </summary>
public static string Serialize(PythonObservationDocument document, bool compact = false)
{
var options = compact ? CompactSerializerOptions : SerializerOptions;
return JsonSerializer.Serialize(document, options);
}
/// <summary>
/// Serializes the observation document to a stream.
/// </summary>
public static async Task SerializeAsync(
PythonObservationDocument document,
Stream stream,
bool compact = false,
CancellationToken cancellationToken = default)
{
var options = compact ? CompactSerializerOptions : SerializerOptions;
await JsonSerializer.SerializeAsync(stream, document, options, cancellationToken).ConfigureAwait(false);
}
/// <summary>
/// Deserializes a JSON string to an observation document.
/// </summary>
public static PythonObservationDocument? Deserialize(string json)
{
return JsonSerializer.Deserialize<PythonObservationDocument>(json, SerializerOptions);
}
/// <summary>
/// Deserializes a stream to an observation document.
/// </summary>
public static async Task<PythonObservationDocument?> DeserializeAsync(
Stream stream,
CancellationToken cancellationToken = default)
{
return await JsonSerializer.DeserializeAsync<PythonObservationDocument>(
stream, SerializerOptions, cancellationToken).ConfigureAwait(false);
}
}

View File

@@ -0,0 +1,528 @@
using System.IO.Compression;
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal;
/// <summary>
/// Analyzes Python zipapp archives (.pyz, .pyzw) for runtime information,
/// entry points, and startup behavior.
/// </summary>
internal static partial class PythonZipappAdapter
{
private static readonly string[] LayerRootCandidates = { "layers", ".layers", "layer" };
/// <summary>
/// Discovers zipapp files in the workspace and container layers.
/// </summary>
public static IReadOnlyCollection<string> DiscoverZipapps(string rootPath)
{
var discovered = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
// Search in root path
DiscoverInDirectory(rootPath, discovered);
// Search in container layers
foreach (var layerRoot in EnumerateLayerRoots(rootPath))
{
DiscoverInDirectory(layerRoot, discovered);
// Check common locations within layers
var appDir = Path.Combine(layerRoot, "app");
if (Directory.Exists(appDir))
{
DiscoverInDirectory(appDir, discovered);
}
var optDir = Path.Combine(layerRoot, "opt");
if (Directory.Exists(optDir))
{
DiscoverInDirectory(optDir, discovered);
}
var usrLocalBin = Path.Combine(layerRoot, "usr", "local", "bin");
if (Directory.Exists(usrLocalBin))
{
DiscoverInDirectory(usrLocalBin, discovered);
}
}
return discovered
.OrderBy(static path => path, StringComparer.Ordinal)
.ToArray();
}
/// <summary>
/// Analyzes a zipapp archive for runtime information.
/// </summary>
public static PythonZipappInfo? AnalyzeZipapp(string zipappPath)
{
if (!File.Exists(zipappPath))
{
return null;
}
try
{
var shebang = ExtractShebang(zipappPath);
var pythonVersion = shebang != null ? ParsePythonVersionFromShebang(shebang) : null;
var hasMain = false;
var hasInit = false;
var entryModule = (string?)null;
var warnings = new List<string>();
var dependencies = new List<string>();
// Open as zip archive to inspect contents
using var stream = File.OpenRead(zipappPath);
// Skip shebang if present
var firstByte = stream.ReadByte();
if (firstByte == '#')
{
// Skip to end of shebang line
while (stream.ReadByte() is int b && b != '\n' && b != -1)
{
}
}
else
{
stream.Position = 0;
}
using var archive = new ZipArchive(stream, ZipArchiveMode.Read);
foreach (var entry in archive.Entries)
{
var name = entry.FullName.Replace('\\', '/');
if (string.Equals(name, "__main__.py", StringComparison.OrdinalIgnoreCase))
{
hasMain = true;
entryModule = TryExtractEntryModule(entry);
}
else if (string.Equals(name, "__init__.py", StringComparison.OrdinalIgnoreCase))
{
hasInit = true;
}
else if (name.EndsWith("/__main__.py", StringComparison.OrdinalIgnoreCase))
{
// Package with __main__.py
var package = Path.GetDirectoryName(name)?.Replace('/', '.');
if (!string.IsNullOrEmpty(package))
{
entryModule ??= package;
}
}
else if (name.EndsWith("/requirements.txt", StringComparison.OrdinalIgnoreCase) ||
string.Equals(name, "requirements.txt", StringComparison.OrdinalIgnoreCase))
{
var reqs = ExtractRequirements(entry);
dependencies.AddRange(reqs);
}
}
// Generate warnings
if (!hasMain && !hasInit)
{
warnings.Add("Zipapp missing __main__.py; may not be directly executable");
}
if (shebang != null && shebang.Contains("/env ", StringComparison.OrdinalIgnoreCase))
{
warnings.Add("Zipapp uses /usr/bin/env shebang; Python version may vary by environment");
}
var isWindows = zipappPath.EndsWith(".pyzw", StringComparison.OrdinalIgnoreCase);
if (isWindows)
{
warnings.Add("Zipapp is Windows-specific (.pyzw); uses pythonw.exe without console");
}
return new PythonZipappInfo(
Path: zipappPath,
FileName: Path.GetFileName(zipappPath),
Shebang: shebang,
PythonVersion: pythonVersion,
HasMainPy: hasMain,
EntryModule: entryModule,
IsWindowsApp: isWindows,
EmbeddedDependencies: dependencies,
Warnings: warnings);
}
catch (IOException)
{
return null;
}
catch (InvalidDataException)
{
return null;
}
}
/// <summary>
/// Analyzes all zipapps in the workspace.
/// </summary>
public static PythonZipappAnalysis AnalyzeAll(string rootPath)
{
var zipapps = new List<PythonZipappInfo>();
var allWarnings = new List<string>();
foreach (var zipappPath in DiscoverZipapps(rootPath))
{
var info = AnalyzeZipapp(zipappPath);
if (info != null)
{
zipapps.Add(info);
allWarnings.AddRange(info.Warnings);
}
}
if (zipapps.Count > 1)
{
allWarnings.Add($"Multiple zipapps detected ({zipapps.Count}); entry point resolution may be ambiguous");
}
return new PythonZipappAnalysis(zipapps, allWarnings);
}
private static string? ExtractShebang(string path)
{
try
{
using var stream = File.OpenRead(path);
using var reader = new StreamReader(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks: false, leaveOpen: true);
var firstLine = reader.ReadLine();
if (firstLine != null && firstLine.StartsWith("#!"))
{
return firstLine[2..].Trim();
}
return null;
}
catch (IOException)
{
return null;
}
}
private static string? ParsePythonVersionFromShebang(string shebang)
{
// Match patterns like:
// /usr/bin/python3.11
// /usr/bin/env python3.10
// python3.9
var match = PythonVersionPattern().Match(shebang);
if (match.Success)
{
return match.Groups["version"].Value;
}
// Check for generic python3 or python
if (shebang.Contains("python3", StringComparison.OrdinalIgnoreCase))
{
return "3";
}
if (shebang.Contains("python", StringComparison.OrdinalIgnoreCase))
{
return null; // Could be Python 2 or 3
}
return null;
}
private static string? TryExtractEntryModule(ZipArchiveEntry entry)
{
try
{
using var stream = entry.Open();
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
// Look for common patterns:
// from package.module import main
// import package.main
// runpy.run_module('package')
var runpyMatch = RunpyPattern().Match(content);
if (runpyMatch.Success)
{
return runpyMatch.Groups["module"].Value;
}
var fromImportMatch = FromImportPattern().Match(content);
if (fromImportMatch.Success)
{
return fromImportMatch.Groups["module"].Value;
}
return null;
}
catch (IOException)
{
return null;
}
}
private static List<string> ExtractRequirements(ZipArchiveEntry entry)
{
var results = new List<string>();
try
{
using var stream = entry.Open();
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
var lines = content.Split('\n', StringSplitOptions.RemoveEmptyEntries);
foreach (var line in lines)
{
var trimmed = line.Trim();
if (string.IsNullOrEmpty(trimmed) || trimmed.StartsWith('#') || trimmed.StartsWith('-'))
{
continue;
}
// Extract package name (before any version specifier)
var match = PackageNamePattern().Match(trimmed);
if (match.Success)
{
results.Add(match.Groups["name"].Value);
}
}
}
catch (IOException)
{
// Ignore read errors
}
return results;
}
private static void DiscoverInDirectory(string directory, HashSet<string> discovered)
{
if (!Directory.Exists(directory))
{
return;
}
try
{
foreach (var file in Directory.EnumerateFiles(directory, "*.pyz"))
{
discovered.Add(file);
}
foreach (var file in Directory.EnumerateFiles(directory, "*.pyzw"))
{
discovered.Add(file);
}
// Also check in subdirectories (up to 3 levels)
foreach (var subdir in Directory.EnumerateDirectories(directory))
{
DiscoverInSubdirectory(subdir, discovered, 1);
}
}
catch (IOException)
{
// Ignore
}
catch (UnauthorizedAccessException)
{
// Ignore
}
}
private static void DiscoverInSubdirectory(string directory, HashSet<string> discovered, int depth)
{
if (depth > 3)
{
return;
}
try
{
foreach (var file in Directory.EnumerateFiles(directory, "*.pyz"))
{
discovered.Add(file);
}
foreach (var file in Directory.EnumerateFiles(directory, "*.pyzw"))
{
discovered.Add(file);
}
foreach (var subdir in Directory.EnumerateDirectories(directory))
{
var dirName = Path.GetFileName(subdir);
// Skip common non-relevant directories
if (dirName.StartsWith('.') ||
string.Equals(dirName, "node_modules", StringComparison.OrdinalIgnoreCase) ||
string.Equals(dirName, "__pycache__", StringComparison.OrdinalIgnoreCase) ||
string.Equals(dirName, "venv", StringComparison.OrdinalIgnoreCase) ||
string.Equals(dirName, ".venv", StringComparison.OrdinalIgnoreCase))
{
continue;
}
DiscoverInSubdirectory(subdir, discovered, depth + 1);
}
}
catch (IOException)
{
// Ignore
}
catch (UnauthorizedAccessException)
{
// Ignore
}
}
private static IEnumerable<string> EnumerateLayerRoots(string workspaceRoot)
{
foreach (var candidate in LayerRootCandidates)
{
var root = Path.Combine(workspaceRoot, candidate);
if (!Directory.Exists(root))
{
continue;
}
IEnumerable<string>? directories;
try
{
directories = Directory.EnumerateDirectories(root);
}
catch (IOException)
{
continue;
}
catch (UnauthorizedAccessException)
{
continue;
}
foreach (var layerDirectory in directories)
{
var fsDirectory = Path.Combine(layerDirectory, "fs");
yield return Directory.Exists(fsDirectory) ? fsDirectory : layerDirectory;
}
}
}
[GeneratedRegex(@"python(?<version>\d+\.\d+)", RegexOptions.IgnoreCase)]
private static partial Regex PythonVersionPattern();
[GeneratedRegex(@"runpy\.run_module\(['""](?<module>[^'""]+)['""]", RegexOptions.IgnoreCase)]
private static partial Regex RunpyPattern();
[GeneratedRegex(@"from\s+(?<module>[\w.]+)\s+import", RegexOptions.IgnoreCase)]
private static partial Regex FromImportPattern();
[GeneratedRegex(@"^(?<name>[\w\-_.]+)", RegexOptions.IgnoreCase)]
private static partial Regex PackageNamePattern();
}
/// <summary>
/// Information about a Python zipapp archive.
/// </summary>
internal sealed record PythonZipappInfo(
string Path,
string FileName,
string? Shebang,
string? PythonVersion,
bool HasMainPy,
string? EntryModule,
bool IsWindowsApp,
IReadOnlyCollection<string> EmbeddedDependencies,
IReadOnlyCollection<string> Warnings)
{
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
{
var entries = new List<KeyValuePair<string, string?>>
{
new("zipapp.path", Path),
new("zipapp.hasMain", HasMainPy.ToString().ToLowerInvariant())
};
if (Shebang != null)
{
entries.Add(new("zipapp.shebang", Shebang));
}
if (PythonVersion != null)
{
entries.Add(new("zipapp.pythonVersion", PythonVersion));
}
if (EntryModule != null)
{
entries.Add(new("zipapp.entryModule", EntryModule));
}
if (IsWindowsApp)
{
entries.Add(new("zipapp.windowsApp", "true"));
}
if (EmbeddedDependencies.Count > 0)
{
entries.Add(new("zipapp.embeddedDeps.count", EmbeddedDependencies.Count.ToString()));
}
return entries;
}
}
/// <summary>
/// Analysis results for all zipapps in a workspace.
/// </summary>
internal sealed class PythonZipappAnalysis
{
public PythonZipappAnalysis(
IReadOnlyCollection<PythonZipappInfo> zipapps,
IReadOnlyCollection<string> warnings)
{
Zipapps = zipapps;
Warnings = warnings;
}
public IReadOnlyCollection<PythonZipappInfo> Zipapps { get; }
public IReadOnlyCollection<string> Warnings { get; }
public bool HasZipapps => Zipapps.Count > 0;
public bool HasWarnings => Warnings.Count > 0;
public IReadOnlyCollection<KeyValuePair<string, string?>> ToMetadata()
{
var entries = new List<KeyValuePair<string, string?>>();
if (Zipapps.Count > 0)
{
entries.Add(new("zipapps.count", Zipapps.Count.ToString()));
var withShebang = Zipapps.Count(z => z.Shebang != null);
if (withShebang > 0)
{
entries.Add(new("zipapps.withShebang", withShebang.ToString()));
}
var windowsApps = Zipapps.Count(z => z.IsWindowsApp);
if (windowsApps > 0)
{
entries.Add(new("zipapps.windowsApps", windowsApps.ToString()));
}
}
for (var i = 0; i < Warnings.Count; i++)
{
entries.Add(new($"zipapps.warning[{i}]", Warnings.ElementAt(i)));
}
return entries;
}
}

View File

@@ -0,0 +1,194 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.RuntimeEvidence;
/// <summary>
/// Provides the Python import hook script for runtime evidence collection.
/// </summary>
internal static class PythonImportHookScript
{
/// <summary>
/// Gets the Python import hook script that captures module load events.
/// This script outputs NDJSON to stdout.
/// </summary>
public static string GetScript() => Script;
/// <summary>
/// Gets the Python import hook script that writes to a file.
/// </summary>
/// <param name="outputPath">The path to write output to.</param>
/// <returns>The modified script.</returns>
public static string GetFileScript(string outputPath)
{
var escapedPath = outputPath.Replace("\\", "\\\\").Replace("'", "\\'");
return Script.Replace(
"_stellaops_output = None",
$"_stellaops_output = open('{escapedPath}', 'w', buffering=1)");
}
// The Python script is stored as a verbatim string to avoid issues with # characters
private const string Script = @"
# StellaOps Python Import Hook
# This script captures module import events for static analysis validation.
# Output format: NDJSON (Newline-Delimited JSON)
import sys
import json
import threading
import os
from datetime import datetime, timezone
_stellaops_output = None
_stellaops_lock = threading.Lock()
_stellaops_seen = set()
def _stellaops_emit(event_type, **kwargs):
""""""Emit an event as JSON.""""""
event = {
'type': event_type,
'timestamp': datetime.now(timezone.utc).isoformat(),
'pid': os.getpid(),
**kwargs
}
with _stellaops_lock:
line = json.dumps(event, default=str)
if _stellaops_output:
_stellaops_output.write(line + '\n')
_stellaops_output.flush()
else:
print(line, flush=True)
def _stellaops_get_module_path(module):
""""""Get the file path for a module if available.""""""
try:
if hasattr(module, '__file__') and module.__file__:
return module.__file__
if hasattr(module, '__spec__') and module.__spec__:
if hasattr(module.__spec__, 'origin') and module.__spec__.origin:
return module.__spec__.origin
except Exception:
pass
return None
class StellaOpsMetaPathFinder:
""""""Meta path finder that logs all import attempts.""""""
def find_module(self, fullname, path=None):
if fullname not in _stellaops_seen:
_stellaops_seen.add(fullname)
_stellaops_emit('import_attempt', module=fullname, path=str(path) if path else None)
return None
def find_spec(self, fullname, path, target=None):
return None
def _stellaops_wrap_import():
""""""Wrap the built-in __import__ to capture all imports.""""""
original_import = __builtins__.__import__ if hasattr(__builtins__, '__import__') else __builtins__['__import__']
def wrapped_import(name, globals=None, locals=None, fromlist=(), level=0):
module = original_import(name, globals, locals, fromlist, level)
if name not in _stellaops_seen:
_stellaops_seen.add(name)
path = _stellaops_get_module_path(module)
is_native = path and (path.endswith('.so') or path.endswith('.pyd'))
event_type = 'native_load' if is_native else 'module_import'
_stellaops_emit(
event_type,
module=name,
path=path,
parent=module.__package__ if hasattr(module, '__package__') else None,
tid=threading.get_ident()
)
return module
if hasattr(__builtins__, '__import__'):
__builtins__.__import__ = wrapped_import
else:
__builtins__['__import__'] = wrapped_import
def _stellaops_wrap_subprocess():
""""""Wrap subprocess module to capture process spawns.""""""
try:
import subprocess
original_popen = subprocess.Popen
class WrappedPopen(original_popen):
def __init__(self, *args, **kwargs):
_stellaops_emit('process_spawn', spawn_type='subprocess', args=str(args[0]) if args else None)
super().__init__(*args, **kwargs)
subprocess.Popen = WrappedPopen
except Exception:
pass
def _stellaops_wrap_multiprocessing():
""""""Wrap multiprocessing module to capture process spawns.""""""
try:
import multiprocessing
original_process = multiprocessing.Process
class WrappedProcess(original_process):
def __init__(self, *args, **kwargs):
_stellaops_emit('process_spawn', spawn_type='multiprocessing', target=str(kwargs.get('target')))
super().__init__(*args, **kwargs)
multiprocessing.Process = WrappedProcess
except Exception:
pass
def stellaops_start_tracing():
""""""Initialize runtime evidence collection.""""""
# Emit interpreter start event
_stellaops_emit(
'interpreter_start',
python_version=sys.version,
platform=sys.platform,
executable=sys.executable
)
# Install import hook
_stellaops_wrap_import()
# Install meta path finder
sys.meta_path.insert(0, StellaOpsMetaPathFinder())
# Wrap subprocess and multiprocessing (optional)
_stellaops_wrap_subprocess()
_stellaops_wrap_multiprocessing()
# Record already-loaded modules
for name, module in list(sys.modules.items()):
if module is not None and name not in _stellaops_seen:
_stellaops_seen.add(name)
path = _stellaops_get_module_path(module)
if path:
is_native = path.endswith('.so') or path.endswith('.pyd')
event_type = 'native_load' if is_native else 'module_import'
_stellaops_emit(
event_type,
module=name,
path=path,
preloaded=True
)
# Auto-start if this module is imported
if __name__ != '__main__':
stellaops_start_tracing()
else:
# If run directly, start tracing and wait
stellaops_start_tracing()
import sys
print('StellaOps import hook active. Press Ctrl+C to stop.', file=sys.stderr)
try:
import time
while True:
time.sleep(1)
except KeyboardInterrupt:
pass
";
}

View File

@@ -0,0 +1,194 @@
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.RuntimeEvidence;
/// <summary>
/// Provides path scrubbing and hashing for privacy-preserving runtime evidence.
/// </summary>
internal static partial class PythonPathHasher
{
private static readonly string[] SensitivePatterns =
[
@"/home/[^/]+",
@"/Users/[^/]+",
@"C:\\Users\\[^\\]+",
@"/root",
@"/tmp/[^/]+",
@"/var/[^/]+/[^/]+",
];
/// <summary>
/// Scrubs sensitive path components and returns a normalized path.
/// </summary>
/// <param name="path">The path to scrub.</param>
/// <returns>A scrubbed path with sensitive components replaced.</returns>
public static string ScrubPath(string? path)
{
if (string.IsNullOrEmpty(path))
{
return string.Empty;
}
var result = path;
// Replace home directories
result = HomeDirectoryPattern().Replace(result, "[HOME]");
result = WindowsUserPattern().Replace(result, "[HOME]");
result = MacUserPattern().Replace(result, "[HOME]");
result = RootPattern().Replace(result, "[ROOT]");
// Replace temp directories
result = TempPattern().Replace(result, "[TEMP]");
// Replace container-specific paths
result = ContainerAppPattern().Replace(result, "[APP]");
// Normalize path separators
result = result.Replace('\\', '/');
return result;
}
/// <summary>
/// Computes a SHA-256 hash of a path for deterministic identification.
/// </summary>
/// <param name="path">The path to hash.</param>
/// <returns>The SHA-256 hash as a hex string.</returns>
public static string HashPath(string? path)
{
if (string.IsNullOrEmpty(path))
{
return string.Empty;
}
var normalizedPath = NormalizePath(path);
var bytes = Encoding.UTF8.GetBytes(normalizedPath);
var hash = SHA256.HashData(bytes);
return Convert.ToHexStringLower(hash);
}
/// <summary>
/// Creates a scrubbed and hashed mapping for a path.
/// </summary>
/// <param name="path">The original path.</param>
/// <returns>A tuple of (scrubbedPath, hash).</returns>
public static (string ScrubbedPath, string Hash) ScrubAndHash(string? path)
{
var scrubbed = ScrubPath(path);
var hash = HashPath(path);
return (scrubbed, hash);
}
/// <summary>
/// Normalizes a path for consistent hashing.
/// </summary>
private static string NormalizePath(string path)
{
// Convert to lowercase and normalize separators
var result = path.ToLowerInvariant().Replace('\\', '/');
// Remove trailing slashes
result = result.TrimEnd('/');
// Collapse multiple slashes
result = MultiSlashPattern().Replace(result, "/");
return result;
}
/// <summary>
/// Extracts the module name from a file path.
/// </summary>
/// <param name="path">The file path.</param>
/// <returns>The module name, or null if not determinable.</returns>
public static string? ExtractModuleName(string? path)
{
if (string.IsNullOrEmpty(path))
{
return null;
}
// Normalize path
var normalizedPath = path.Replace('\\', '/');
// Check for site-packages
var sitePackagesIndex = normalizedPath.IndexOf("site-packages/", StringComparison.OrdinalIgnoreCase);
if (sitePackagesIndex >= 0)
{
var relativePath = normalizedPath[(sitePackagesIndex + 14)..];
return ConvertPathToModule(relativePath);
}
// Check for dist-packages
var distPackagesIndex = normalizedPath.IndexOf("dist-packages/", StringComparison.OrdinalIgnoreCase);
if (distPackagesIndex >= 0)
{
var relativePath = normalizedPath[(distPackagesIndex + 14)..];
return ConvertPathToModule(relativePath);
}
// Fallback: extract filename
var fileName = Path.GetFileNameWithoutExtension(normalizedPath);
return string.IsNullOrEmpty(fileName) ? null : fileName;
}
/// <summary>
/// Converts a relative file path to a Python module name.
/// </summary>
private static string ConvertPathToModule(string relativePath)
{
// Remove file extension
if (relativePath.EndsWith(".py", StringComparison.OrdinalIgnoreCase))
{
relativePath = relativePath[..^3];
}
else if (relativePath.EndsWith(".so", StringComparison.OrdinalIgnoreCase) ||
relativePath.EndsWith(".pyd", StringComparison.OrdinalIgnoreCase))
{
// Handle native extensions like module.cpython-311-x86_64-linux-gnu.so
var lastDot = relativePath.LastIndexOf('.');
if (lastDot > 0)
{
relativePath = relativePath[..lastDot];
// Remove cpython version suffix
var cpythonIndex = relativePath.IndexOf(".cpython-", StringComparison.OrdinalIgnoreCase);
if (cpythonIndex > 0)
{
relativePath = relativePath[..cpythonIndex];
}
}
}
// Handle __init__ files
if (relativePath.EndsWith("/__init__", StringComparison.OrdinalIgnoreCase))
{
relativePath = relativePath[..^9];
}
// Convert path separators to dots
return relativePath.Replace('/', '.');
}
[GeneratedRegex(@"/home/[^/]+", RegexOptions.IgnoreCase)]
private static partial Regex HomeDirectoryPattern();
[GeneratedRegex(@"C:\\Users\\[^\\]+", RegexOptions.IgnoreCase)]
private static partial Regex WindowsUserPattern();
[GeneratedRegex(@"/Users/[^/]+", RegexOptions.IgnoreCase)]
private static partial Regex MacUserPattern();
[GeneratedRegex(@"/root\b")]
private static partial Regex RootPattern();
[GeneratedRegex(@"/tmp/[^/]+")]
private static partial Regex TempPattern();
[GeneratedRegex(@"^/app(?=/|$)")]
private static partial Regex ContainerAppPattern();
[GeneratedRegex(@"//+")]
private static partial Regex MultiSlashPattern();
}

View File

@@ -0,0 +1,51 @@
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.RuntimeEvidence;
/// <summary>
/// Represents a runtime event captured from Python execution.
/// </summary>
internal sealed record PythonRuntimeEvent(
PythonRuntimeEventKind Kind,
string Timestamp,
string? ModuleName,
string? ModulePath,
string? ModuleSpec,
string? ParentModule,
string? ErrorMessage,
string? ProcessId,
string? ThreadId);
/// <summary>
/// Kind of runtime event captured.
/// </summary>
internal enum PythonRuntimeEventKind
{
/// <summary>Module was imported.</summary>
ModuleImport,
/// <summary>Module load failed.</summary>
ModuleLoadError,
/// <summary>Native extension was loaded.</summary>
NativeExtensionLoad,
/// <summary>Dynamic import via importlib.</summary>
DynamicImport,
/// <summary>Process spawned via subprocess/multiprocessing.</summary>
ProcessSpawn,
/// <summary>New thread started.</summary>
ThreadStart,
/// <summary>Import hook was installed.</summary>
ImportHookInstall,
/// <summary>Path was added to sys.path.</summary>
PathModification,
/// <summary>Python interpreter started.</summary>
InterpreterStart,
/// <summary>Python interpreter stopped.</summary>
InterpreterStop
}

View File

@@ -0,0 +1,396 @@
using System.Collections.Immutable;
using System.Text.Json;
using StellaOps.Scanner.Analyzers.Lang.Python.Internal.Observations;
namespace StellaOps.Scanner.Analyzers.Lang.Python.Internal.RuntimeEvidence;
/// <summary>
/// Collects and processes runtime evidence from Python execution traces.
/// </summary>
internal sealed class PythonRuntimeEvidenceCollector
{
private static readonly JsonSerializerOptions JsonOptions = new()
{
PropertyNameCaseInsensitive = true,
AllowTrailingCommas = true
};
private readonly List<PythonRuntimeEvent> _events = [];
private readonly Dictionary<string, string> _pathHashes = new();
private readonly HashSet<string> _loadedModules = new(StringComparer.Ordinal);
private readonly HashSet<string> _loadedPackages = new(StringComparer.Ordinal);
private readonly HashSet<string> _runtimeCapabilities = new(StringComparer.Ordinal);
private readonly List<PythonObservationRuntimeError> _errors = [];
private string? _pythonVersion;
private string? _platform;
/// <summary>
/// Parses a JSON line from the runtime evidence output.
/// </summary>
/// <param name="jsonLine">A JSON line containing event data.</param>
public void ParseLine(string jsonLine)
{
if (string.IsNullOrWhiteSpace(jsonLine))
{
return;
}
try
{
using var doc = JsonDocument.Parse(jsonLine);
var root = doc.RootElement;
var eventType = root.GetProperty("type").GetString();
switch (eventType)
{
case "interpreter_start":
ParseInterpreterStart(root);
break;
case "module_import":
ParseModuleImport(root);
break;
case "module_error":
ParseModuleError(root);
break;
case "native_load":
ParseNativeLoad(root);
break;
case "dynamic_import":
ParseDynamicImport(root);
break;
case "process_spawn":
ParseProcessSpawn(root);
break;
case "path_modification":
ParsePathModification(root);
break;
}
}
catch (JsonException)
{
// Skip malformed JSON lines
}
}
/// <summary>
/// Parses multiple JSON lines from the runtime evidence output.
/// </summary>
/// <param name="output">The complete output string (NDJSON format).</param>
public void ParseOutput(string output)
{
if (string.IsNullOrEmpty(output))
{
return;
}
foreach (var line in output.Split('\n', StringSplitOptions.RemoveEmptyEntries))
{
ParseLine(line.Trim());
}
}
/// <summary>
/// Parses runtime evidence from a file.
/// </summary>
/// <param name="filePath">Path to the NDJSON evidence file.</param>
/// <param name="cancellationToken">Cancellation token.</param>
public async Task ParseFileAsync(string filePath, CancellationToken cancellationToken = default)
{
if (!File.Exists(filePath))
{
return;
}
await foreach (var line in File.ReadLinesAsync(filePath, cancellationToken).ConfigureAwait(false))
{
ParseLine(line);
}
}
/// <summary>
/// Builds the runtime evidence observation from collected data.
/// </summary>
/// <returns>The runtime evidence observation.</returns>
public PythonObservationRuntimeEvidence Build()
{
if (_events.Count == 0 && _loadedModules.Count == 0)
{
return PythonObservationRuntimeEvidence.Empty;
}
return new PythonObservationRuntimeEvidence(
HasEvidence: true,
RuntimePythonVersion: _pythonVersion,
RuntimePlatform: _platform,
LoadedModulesCount: _loadedModules.Count,
LoadedPackages: [.. _loadedPackages.OrderBy(p => p, StringComparer.Ordinal)],
LoadedModules: [.. _loadedModules.OrderBy(m => m, StringComparer.Ordinal)],
PathHashes: _pathHashes.ToImmutableDictionary(StringComparer.Ordinal),
RuntimeCapabilities: [.. _runtimeCapabilities.OrderBy(c => c, StringComparer.Ordinal)],
Errors: [.. _errors]);
}
/// <summary>
/// Gets all captured runtime events.
/// </summary>
public IReadOnlyList<PythonRuntimeEvent> Events => _events;
private void ParseInterpreterStart(JsonElement root)
{
_pythonVersion = root.TryGetProperty("python_version", out var version)
? version.GetString()
: null;
_platform = root.TryGetProperty("platform", out var platform)
? platform.GetString()
: null;
var timestamp = root.TryGetProperty("timestamp", out var ts)
? ts.GetString() ?? GetUtcTimestamp()
: GetUtcTimestamp();
_events.Add(new PythonRuntimeEvent(
Kind: PythonRuntimeEventKind.InterpreterStart,
Timestamp: timestamp,
ModuleName: null,
ModulePath: null,
ModuleSpec: null,
ParentModule: null,
ErrorMessage: null,
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
ThreadId: null));
}
private void ParseModuleImport(JsonElement root)
{
var moduleName = root.TryGetProperty("module", out var module)
? module.GetString()
: null;
var modulePath = root.TryGetProperty("path", out var path)
? path.GetString()
: null;
var timestamp = root.TryGetProperty("timestamp", out var ts)
? ts.GetString() ?? GetUtcTimestamp()
: GetUtcTimestamp();
if (!string.IsNullOrEmpty(moduleName))
{
_loadedModules.Add(moduleName);
// Extract top-level package
var topLevelPackage = moduleName.Split('.')[0];
_loadedPackages.Add(topLevelPackage);
}
// Add path hash
if (!string.IsNullOrEmpty(modulePath))
{
var (scrubbed, hash) = PythonPathHasher.ScrubAndHash(modulePath);
if (!string.IsNullOrEmpty(hash))
{
_pathHashes.TryAdd(scrubbed, hash);
}
}
_events.Add(new PythonRuntimeEvent(
Kind: PythonRuntimeEventKind.ModuleImport,
Timestamp: timestamp,
ModuleName: moduleName,
ModulePath: PythonPathHasher.ScrubPath(modulePath),
ModuleSpec: root.TryGetProperty("spec", out var spec) ? spec.GetString() : null,
ParentModule: root.TryGetProperty("parent", out var parent) ? parent.GetString() : null,
ErrorMessage: null,
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
ThreadId: root.TryGetProperty("tid", out var tid) ? tid.GetInt64().ToString() : null));
}
private void ParseModuleError(JsonElement root)
{
var moduleName = root.TryGetProperty("module", out var module)
? module.GetString()
: null;
var errorMessage = root.TryGetProperty("error", out var error)
? error.GetString()
: null;
var modulePath = root.TryGetProperty("path", out var path)
? path.GetString()
: null;
var timestamp = root.TryGetProperty("timestamp", out var ts)
? ts.GetString() ?? GetUtcTimestamp()
: GetUtcTimestamp();
_events.Add(new PythonRuntimeEvent(
Kind: PythonRuntimeEventKind.ModuleLoadError,
Timestamp: timestamp,
ModuleName: moduleName,
ModulePath: PythonPathHasher.ScrubPath(modulePath),
ModuleSpec: null,
ParentModule: null,
ErrorMessage: errorMessage,
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
ThreadId: null));
// Add to errors
var scrubbed = PythonPathHasher.ScrubPath(modulePath);
var hash = PythonPathHasher.HashPath(modulePath);
_errors.Add(new PythonObservationRuntimeError(
Timestamp: timestamp,
Message: errorMessage ?? $"Failed to import {moduleName}",
Path: scrubbed,
PathSha256: string.IsNullOrEmpty(hash) ? null : hash));
}
private void ParseNativeLoad(JsonElement root)
{
var moduleName = root.TryGetProperty("module", out var module)
? module.GetString()
: null;
var modulePath = root.TryGetProperty("path", out var path)
? path.GetString()
: null;
var timestamp = root.TryGetProperty("timestamp", out var ts)
? ts.GetString() ?? GetUtcTimestamp()
: GetUtcTimestamp();
if (!string.IsNullOrEmpty(moduleName))
{
_loadedModules.Add(moduleName);
}
// Track native code capability
_runtimeCapabilities.Add("native_code");
// Add path hash
if (!string.IsNullOrEmpty(modulePath))
{
var (scrubbed, hash) = PythonPathHasher.ScrubAndHash(modulePath);
if (!string.IsNullOrEmpty(hash))
{
_pathHashes.TryAdd(scrubbed, hash);
}
}
_events.Add(new PythonRuntimeEvent(
Kind: PythonRuntimeEventKind.NativeExtensionLoad,
Timestamp: timestamp,
ModuleName: moduleName,
ModulePath: PythonPathHasher.ScrubPath(modulePath),
ModuleSpec: null,
ParentModule: null,
ErrorMessage: null,
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
ThreadId: null));
}
private void ParseDynamicImport(JsonElement root)
{
var moduleName = root.TryGetProperty("module", out var module)
? module.GetString()
: null;
var timestamp = root.TryGetProperty("timestamp", out var ts)
? ts.GetString() ?? GetUtcTimestamp()
: GetUtcTimestamp();
if (!string.IsNullOrEmpty(moduleName))
{
_loadedModules.Add(moduleName);
}
// Track dynamic import capability
_runtimeCapabilities.Add("dynamic_import");
_events.Add(new PythonRuntimeEvent(
Kind: PythonRuntimeEventKind.DynamicImport,
Timestamp: timestamp,
ModuleName: moduleName,
ModulePath: null,
ModuleSpec: null,
ParentModule: null,
ErrorMessage: null,
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
ThreadId: null));
}
private void ParseProcessSpawn(JsonElement root)
{
var timestamp = root.TryGetProperty("timestamp", out var ts)
? ts.GetString() ?? GetUtcTimestamp()
: GetUtcTimestamp();
// Track process spawn capability
_runtimeCapabilities.Add("process_spawn");
var spawnType = root.TryGetProperty("spawn_type", out var st)
? st.GetString()
: "unknown";
if (spawnType == "multiprocessing")
{
_runtimeCapabilities.Add("multiprocessing");
}
_events.Add(new PythonRuntimeEvent(
Kind: PythonRuntimeEventKind.ProcessSpawn,
Timestamp: timestamp,
ModuleName: null,
ModulePath: null,
ModuleSpec: spawnType,
ParentModule: null,
ErrorMessage: null,
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
ThreadId: null));
}
private void ParsePathModification(JsonElement root)
{
var path = root.TryGetProperty("path", out var p)
? p.GetString()
: null;
var timestamp = root.TryGetProperty("timestamp", out var ts)
? ts.GetString() ?? GetUtcTimestamp()
: GetUtcTimestamp();
// Add path hash
if (!string.IsNullOrEmpty(path))
{
var (scrubbed, hash) = PythonPathHasher.ScrubAndHash(path);
if (!string.IsNullOrEmpty(hash))
{
_pathHashes.TryAdd(scrubbed, hash);
}
}
_events.Add(new PythonRuntimeEvent(
Kind: PythonRuntimeEventKind.PathModification,
Timestamp: timestamp,
ModuleName: null,
ModulePath: PythonPathHasher.ScrubPath(path),
ModuleSpec: null,
ParentModule: null,
ErrorMessage: null,
ProcessId: root.TryGetProperty("pid", out var pid) ? pid.GetInt32().ToString() : null,
ThreadId: null));
}
private static string GetUtcTimestamp()
{
return DateTime.UtcNow.ToString("O");
}
}

View File

@@ -40,6 +40,9 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
// Detect startup hooks (sitecustomize.py, usercustomize.py, .pth files)
var startupHooks = PythonStartupHookDetector.Detect(context.RootPath);
// Analyze zipapps in workspace and container layers
var zipappAnalysis = PythonZipappAdapter.AnalyzeAll(context.RootPath);
// Collect dist-info directories from both root and container layers
var distInfoDirectories = CollectDistInfoDirectories(context.RootPath);
@@ -91,6 +94,9 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
// Append startup hooks warnings
AppendStartupHooksMetadata(metadata, startupHooks);
// Append zipapp analysis
AppendZipappMetadata(metadata, zipappAnalysis);
// Collect evidence including startup hooks
var evidence = distribution.SortedEvidence.ToList();
evidence.AddRange(startupHooks.ToEvidence(context));
@@ -242,6 +248,45 @@ public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
}
}
private static void AppendZipappMetadata(List<KeyValuePair<string, string?>> metadata, PythonZipappAnalysis zipappAnalysis)
{
if (!zipappAnalysis.HasZipapps)
{
return;
}
metadata.Add(new KeyValuePair<string, string?>("zipapps.detected", "true"));
metadata.Add(new KeyValuePair<string, string?>("zipapps.count", zipappAnalysis.Zipapps.Count.ToString()));
// Add version information from zipapp shebangs
var versions = zipappAnalysis.Zipapps
.Where(z => z.PythonVersion != null)
.Select(z => z.PythonVersion!)
.Distinct()
.OrderBy(v => v, StringComparer.Ordinal)
.ToArray();
if (versions.Length > 0)
{
metadata.Add(new KeyValuePair<string, string?>("zipapps.pythonVersions", string.Join(';', versions)));
}
// Add warnings
foreach (var warning in zipappAnalysis.Warnings)
{
metadata.Add(new KeyValuePair<string, string?>("zipapps.warning", warning));
}
// Add individual zipapp warnings
foreach (var zipapp in zipappAnalysis.Zipapps)
{
foreach (var warning in zipapp.Warnings)
{
metadata.Add(new KeyValuePair<string, string?>($"zipapps.{zipapp.FileName}.warning", warning));
}
}
}
private static IReadOnlyCollection<string> CollectDistInfoDirectories(string rootPath)
{
var directories = new HashSet<string>(StringComparer.OrdinalIgnoreCase);