Restructure solution layout by module
This commit is contained in:
@@ -0,0 +1,32 @@
|
||||
# StellaOps.Scanner.Analyzers.Lang.Python — Agent Charter
|
||||
|
||||
## Role
|
||||
Implement the Python analyzer plug-in that inspects installed distributions, RECORD hashes, entry points, and editable installs to feed Scanner SBOM views.
|
||||
|
||||
## Scope
|
||||
- Parse `*.dist-info` and `*.data` directories, validating `METADATA`, `RECORD`, and `entry_points.txt`.
|
||||
- Detect editable installs and pip caches, reconciling metadata with actual files.
|
||||
- Integrate EntryTrace usage hints for runtime entry points and flag missing RECORD hashes.
|
||||
- Package plug-in manifest and ensure deterministic fixtures + benchmarks.
|
||||
|
||||
## Out of Scope
|
||||
- Language analyzers for other ecosystems.
|
||||
- Policy evaluation, vulnerability correlation, or packaging into UI flows.
|
||||
- Building Python interpreters or executing scripts (analysis is static only).
|
||||
|
||||
## Expectations
|
||||
- Deterministic RECORD hashing with streaming IO; fallback heuristics clearly flagged.
|
||||
- Performance target: ≥75 MB/s RECORD verification, end-to-end fixture <2.0 s.
|
||||
- Offline-first: no PyPI calls; relies on local metadata only.
|
||||
- Rich telemetry (components counted, hash mismatches) following Scanner metrics schema.
|
||||
- Keep `TASKS.md` and `SPRINTS_LANG_IMPLEMENTATION_PLAN.md` in sync.
|
||||
|
||||
## Dependencies
|
||||
- Shared language analyzer infrastructure.
|
||||
- EntryTrace usage hints (for script activation).
|
||||
- Worker dispatcher for plug-in loading.
|
||||
|
||||
## Testing & Artifacts
|
||||
- Golden fixtures for venv, virtualenv, pipx, and editable installs.
|
||||
- Benchmark results comparing hash-check throughput against competitor tools.
|
||||
- Offline Kit guidance for bundling standard library metadata if required.
|
||||
@@ -0,0 +1,8 @@
|
||||
global using System;
|
||||
global using System.Collections.Generic;
|
||||
global using System.IO;
|
||||
global using System.Linq;
|
||||
global using System.Threading;
|
||||
global using System.Threading.Tasks;
|
||||
|
||||
global using StellaOps.Scanner.Analyzers.Lang;
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,17 @@
|
||||
using System;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Plugin;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python;
|
||||
|
||||
public sealed class PythonAnalyzerPlugin : ILanguageAnalyzerPlugin
|
||||
{
|
||||
public string Name => "StellaOps.Scanner.Analyzers.Lang.Python";
|
||||
|
||||
public bool IsAvailable(IServiceProvider services) => services is not null;
|
||||
|
||||
public ILanguageAnalyzer CreateAnalyzer(IServiceProvider services)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(services);
|
||||
return new PythonLanguageAnalyzer();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
using System.Text.Json;
|
||||
using StellaOps.Scanner.Analyzers.Lang.Python.Internal;
|
||||
|
||||
namespace StellaOps.Scanner.Analyzers.Lang.Python;
|
||||
|
||||
public sealed class PythonLanguageAnalyzer : ILanguageAnalyzer
|
||||
{
|
||||
private static readonly EnumerationOptions Enumeration = new()
|
||||
{
|
||||
RecurseSubdirectories = true,
|
||||
IgnoreInaccessible = true,
|
||||
AttributesToSkip = FileAttributes.Device | FileAttributes.ReparsePoint
|
||||
};
|
||||
|
||||
public string Id => "python";
|
||||
|
||||
public string DisplayName => "Python Analyzer";
|
||||
|
||||
public ValueTask AnalyzeAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(context);
|
||||
ArgumentNullException.ThrowIfNull(writer);
|
||||
|
||||
return AnalyzeInternalAsync(context, writer, cancellationToken);
|
||||
}
|
||||
|
||||
private static async ValueTask AnalyzeInternalAsync(LanguageAnalyzerContext context, LanguageComponentWriter writer, CancellationToken cancellationToken)
|
||||
{
|
||||
var distInfoDirectories = Directory
|
||||
.EnumerateDirectories(context.RootPath, "*.dist-info", Enumeration)
|
||||
.OrderBy(static path => path, StringComparer.Ordinal)
|
||||
.ToArray();
|
||||
|
||||
foreach (var distInfoPath in distInfoDirectories)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
PythonDistribution? distribution;
|
||||
try
|
||||
{
|
||||
distribution = await PythonDistributionLoader.LoadAsync(context, distInfoPath, cancellationToken).ConfigureAwait(false);
|
||||
}
|
||||
catch (IOException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (JsonException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
catch (UnauthorizedAccessException)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (distribution is null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
writer.AddFromPurl(
|
||||
analyzerId: "python",
|
||||
purl: distribution.Purl,
|
||||
name: distribution.Name,
|
||||
version: distribution.Version,
|
||||
type: "pypi",
|
||||
metadata: distribution.SortedMetadata,
|
||||
evidence: distribution.SortedEvidence,
|
||||
usedByEntrypoint: distribution.UsedByEntrypoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<LangVersion>preview</LangVersion>
|
||||
<Nullable>enable</Nullable>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
<EnableDefaultItems>false</EnableDefaultItems>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Include="**\\*.cs" Exclude="obj\\**;bin\\**" />
|
||||
<EmbeddedResource Include="**\\*.json" Exclude="obj\\**;bin\\**" />
|
||||
<None Include="**\\*" Exclude="**\\*.cs;**\\*.json;bin\\**;obj\\**" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\StellaOps.Scanner.Analyzers.Lang\StellaOps.Scanner.Analyzers.Lang.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
@@ -0,0 +1,31 @@
|
||||
# Python Analyzer Task Flow
|
||||
|
||||
| Seq | ID | Status | Depends on | Description | Exit Criteria |
|
||||
|-----|----|--------|------------|-------------|---------------|
|
||||
| 1 | SCANNER-ANALYZERS-LANG-10-303A | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-307 | STREAM-based parser for `*.dist-info` (`METADATA`, `WHEEL`, `entry_points.txt`) with normalization + evidence capture. | Parser handles CPython 3.8–3.12 metadata variations; fixtures confirm canonical ordering and UTF-8 handling. |
|
||||
| 2 | SCANNER-ANALYZERS-LANG-10-303B | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-303A | RECORD hash verifier with chunked hashing, Zip64 support, and mismatch diagnostics. | Verifier processes 5 GB RECORD fixture without allocations >2 MB; mismatches produce deterministic evidence records. |
|
||||
| 3 | SCANNER-ANALYZERS-LANG-10-303C | DONE (2025-10-21) | SCANNER-ANALYZERS-LANG-10-303B | Editable install + pip cache detection; integrate EntryTrace hints for runtime usage flags. | Editable installs resolved to source path; usage flags propagated; regression tests cover mixed editable + wheel installs. |
|
||||
| 4 | SCANNER-ANALYZERS-LANG-10-307P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-303C | Shared helper integration (license metadata, quiet provenance, component merging). | Shared helpers reused; analyzer-specific metadata minimal; deterministic merge tests pass. |
|
||||
| 5 | SCANNER-ANALYZERS-LANG-10-308P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-307P | Golden fixtures + determinism harness for Python analyzer; add benchmark and hash throughput reporting. | Fixtures under `Fixtures/lang/python/`; determinism CI guard; benchmark CSV added with threshold alerts. |
|
||||
| 6 | SCANNER-ANALYZERS-LANG-10-309P | DONE (2025-10-23) | SCANNER-ANALYZERS-LANG-10-308P | Package plug-in (manifest, DI registration) and document Offline Kit bundling of Python stdlib metadata if needed. | Manifest copied to `plugins/scanner/analyzers/lang/`; Worker loads analyzer; Offline Kit doc updated. |
|
||||
|
||||
## Python Entry-Point Analyzer (Sprint 43)
|
||||
> **Imposed rule:** work of this type or tasks of this type on this component — and everywhere else it should be applied.
|
||||
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|
||||
|----|--------|----------|------------|-------------|---------------|
|
||||
| SCANNER-ANALYZERS-PYTHON-23-001 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-LANG-10-309P | Build input normalizer & virtual filesystem for wheels, sdists, editable installs, zipapps, site-packages trees, and container roots. Detect Python version targets (`pyproject.toml`, `runtime.txt`, Dockerfile) + virtualenv layout deterministically. | Normalizer ingests fixtures (venv, wheel, sdist, zipapp, container layer) without extraction; records python_version, root metadata, and namespace resolution hints; determinism harness updated. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-002 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-001 | Entrypoint discovery: module `__main__`, console_scripts entry points, `scripts`, zipapp main, `manage.py`/gunicorn/celery patterns. Capture invocation context (module vs package, argv wrappers). | Fixtures produce entrypoint list with kind (console, module, package, zipapp, framework) and deterministic ordering; warnings for missing targets recorded. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-003 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-001 | Static import graph builder using AST and bytecode fallback. Support `import`, `from ... import`, relative imports, `importlib.import_module`, `__import__` with literal args, `pkgutil.extend_path`. | AST scanner emits edges for explicit imports; literal importlib calls covered; unresolved/dynamic patterns yield `dynamic-import` warnings with candidate prefixes; regression fixtures pass. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-004 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-003 | Python resolver engine (importlib semantics) handling namespace packages (PEP 420), package discovery order, `.pth` files, `sys.path` composition, zipimport, and site-packages precedence across virtualenv/container roots. | Resolver reproduces importlib behaviour on fixture matrix (namespace pkg, zipimport, multi-site-dir); includes explain traces; determinism tests for path ordering succeed. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-005 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-004 | Packaging adapters: pip editable (`.egg-link`), Poetry/Flit layout, Conda prefix, `.dist-info/RECORD` cross-check, container layer overlays. | Adapters resolve editable links, conda pkgs, layered site-packages; edges capture provider path + metadata; warnings emitted for missing RECORD entries. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-006 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-003 | Detect native extensions (`*.so`, `*.pyd`), CFFI modules, ctypes loaders, embedded WASM, and runtime capability signals (subprocess, multiprocessing, ctypes, eval). | Fixtures with native/CFFI/ctypes emit `native-extension`, `cffi`, `ctypes` hints; capability flags recorded; metadata captures ABI/platform info. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-007 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-002 | Framework/config heuristics: Django, Flask, FastAPI, Celery, AWS Lambda handlers, Gunicorn, Click/Typer CLIs, logging configs, pyproject optional dependencies. Tagged as hints only. | Framework fixtures produce hint records with source files (settings.py, pyproject extras, celery app); no resolver impact; determinism maintained. |
|
||||
|
||||
## Python Observation & Runtime (Sprint 44)
|
||||
| ID | Status | Owner(s) | Depends on | Description | Exit Criteria |
|
||||
|----|--------|----------|------------|-------------|---------------|
|
||||
| SCANNER-ANALYZERS-PYTHON-23-008 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-004 | Produce AOC-compliant observations: entrypoints, components (modules/packages/native), edges (import, namespace, dynamic-hint, native-extension) with reason codes/confidence and resolver traces. | Observation JSON for fixtures deterministic; includes explain trace per edge and namespace resolution metadata; passes AOC compliance lint. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-009 | TODO | Python Analyzer Guild, QA Guild | SCANNER-ANALYZERS-PYTHON-23-008 | Fixture suite + perf benchmarks covering virtualenv, namespace packages, zipapp, editable installs, containers, lambda handler. | Fixture set committed under `fixtures/lang/python/ep`; determinism CI and perf (<250ms medium project) gates enabled. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-010 | TODO | Python Analyzer Guild, Signals Guild | SCANNER-ANALYZERS-PYTHON-23-008 | Optional runtime evidence: import hook capturing module load events with path scrubbing, optional bytecode instrumentation for `importlib` hooks, multiprocessing tracer. | Runtime harness records module loads for sample app; paths hashed; runtime edges merge without altering resolver precedence; privacy doc updated. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-011 | TODO | Python Analyzer Guild, DevOps Guild | SCANNER-ANALYZERS-PYTHON-23-008 | Package analyzer plug-in, add CLI commands (`stella python inspect|resolve|trace`), update Offline Kit guidance. | Plugin manifest deployed; CLI commands documented & smoke tested; Offline Kit instructions cover Python analyzer usage; worker restart verified. |
|
||||
| SCANNER-ANALYZERS-PYTHON-23-012 | TODO | Python Analyzer Guild | SCANNER-ANALYZERS-PYTHON-23-001 | Container/zipapp adapter enhancements: parse OCI layers for Python runtime, detect `PYTHONPATH`/`PYTHONHOME` env, record warnings for sitecustomize/startup hooks. | Container fixtures output runtime metadata (python binary, env vars) and warnings for startup hooks; zipapp fixture resolves internal modules; determinism retained. |
|
||||
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"schemaVersion": "1.0",
|
||||
"id": "stellaops.analyzer.lang.python",
|
||||
"displayName": "StellaOps Python Analyzer (preview)",
|
||||
"version": "0.1.0",
|
||||
"requiresRestart": true,
|
||||
"entryPoint": {
|
||||
"type": "dotnet",
|
||||
"assembly": "StellaOps.Scanner.Analyzers.Lang.Python.dll",
|
||||
"typeName": "StellaOps.Scanner.Analyzers.Lang.Python.PythonAnalyzerPlugin"
|
||||
},
|
||||
"capabilities": [
|
||||
"language-analyzer",
|
||||
"python",
|
||||
"pypi"
|
||||
],
|
||||
"metadata": {
|
||||
"org.stellaops.analyzer.language": "python",
|
||||
"org.stellaops.analyzer.kind": "language",
|
||||
"org.stellaops.restart.required": "true",
|
||||
"org.stellaops.analyzer.status": "preview"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user