feat(docs): Add comprehensive documentation for Vexer, Vulnerability Explorer, and Zastava modules
- Introduced AGENTS.md, README.md, TASKS.md, and implementation_plan.md for Vexer, detailing mission, responsibilities, key components, and operational notes. - Established similar documentation structure for Vulnerability Explorer and Zastava modules, including their respective workflows, integrations, and observability notes. - Created risk scoring profiles documentation outlining the core workflow, factor model, governance, and deliverables. - Ensured all modules adhere to the Aggregation-Only Contract and maintain determinism and provenance in outputs.
This commit is contained in:
		
							
								
								
									
										152
									
								
								docs/modules/scanner/operations/entrypoint-runtime-overview.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										152
									
								
								docs/modules/scanner/operations/entrypoint-runtime-overview.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,152 @@ | ||||
| # Runtime Detector Overview | ||||
|  | ||||
| Runtime classification converts a reduced command into a concrete language or framework identity with supporting evidence. This document describes the shared contracts, helper utilities, calibration strategy, and integration points; language-specific heuristics live in the `entrypoint-lang-*.md` files. | ||||
|  | ||||
| ## 1) Contracts | ||||
|  | ||||
| ```csharp | ||||
| public enum LanguageType { | ||||
|   Java, DotNet, Node, Python, PhpFpm, Ruby, Go, Rust, CCpp, | ||||
|   Nginx, Deno, Elixir, Supervisor, Other | ||||
| } | ||||
|  | ||||
| public sealed record ResolvedCommand( | ||||
|   string[] Argv, | ||||
|   string   Argv0, | ||||
|   string?  AbsolutePath, | ||||
|   bool     IsElf, | ||||
|   bool     IsPe, | ||||
|   bool     IsScript, | ||||
|   string?  Shebang, | ||||
|   string   WorkingDir | ||||
| ); | ||||
|  | ||||
| public sealed record LanguageHit( | ||||
|   LanguageType Type, | ||||
|   double       RawScore, | ||||
|   string       ResolvedBinary, | ||||
|   string[]     Args, | ||||
|   List<string> Evidence, | ||||
|   string?      AppArtifactPath = null, | ||||
|   string?      MainModule = null, | ||||
|   Dictionary<string,string>? Extra = null | ||||
| ); | ||||
| ``` | ||||
|  | ||||
| ### Interface | ||||
|  | ||||
| ```csharp | ||||
| public interface ILanguageSubDetector { | ||||
|   LanguageHit? TryDetect( | ||||
|     ResolvedCommand cmd, OverlayVfs vfs, EnvBag env, ImageContext img, CancellationToken ct = default); | ||||
| } | ||||
|  | ||||
| public sealed class LanguageDetector { | ||||
|   private readonly ILanguageSubDetector[] _detectors = { | ||||
|     new JavaDetector(), | ||||
|     new DotNetDetector(), | ||||
|     new NodeDetector(), | ||||
|     new PythonDetector(), | ||||
|     new PhpFpmDetector(), | ||||
|     new RubyDetector(), | ||||
|     new NginxDetector(), | ||||
|     new GoDetector(), | ||||
|     new RustDetector(), | ||||
|     new DenoDetector(), | ||||
|     new ElixirDetector(), | ||||
|     new CCppDetector(), | ||||
|     new SupervisorDetector() | ||||
|   }; | ||||
|   private readonly ScoreCalibrator _cal = ScoreCalibrator.Default; | ||||
|  | ||||
|   public LanguageHit Detect(ResolvedCommand cmd, OverlayVfs vfs, EnvBag env, ImageContext img, out double confidence) { | ||||
|     var hits = _detectors.Select(d => d.TryDetect(cmd, vfs, env, img)).Where(h => h is not null).ToList()!; | ||||
|     LanguageHit best = hits.Count == 0 | ||||
|       ? new LanguageHit(LanguageType.Other, 0.10, cmd.AbsolutePath ?? cmd.Argv0, cmd.Argv.Skip(1).ToArray(), | ||||
|                         new() { "No strong runtime family signals detected." }) | ||||
|       : hits.OrderByDescending(_cal.Calibrate).First(); | ||||
|  | ||||
|     confidence = _cal.Calibrate(best); | ||||
|     foreach (var alt in hits.Where(h => h != best).OrderByDescending(_cal.Calibrate)) | ||||
|       best.Evidence.Add($"Alternative: {alt!.Type} (score={_cal.Calibrate(alt):0.00}) — {string.Join("; ", alt.Evidence.Take(2))}…"); | ||||
|     return best; | ||||
|   } | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## 2) Helpers | ||||
|  | ||||
| ```csharp | ||||
| static class VfsHelpers { | ||||
|   public static bool FileExists(OverlayVfs vfs, string path) => vfs.Exists(path); | ||||
|   public static bool TryOpen(OverlayVfs vfs, string path, out Stream? stream) { | ||||
|     if (!vfs.Exists(path)) { stream = null; return false; } | ||||
|     stream = vfs.OpenRead(path); | ||||
|     return true; | ||||
|   } | ||||
|   public static string Join(string cwd, string maybeRel) => | ||||
|     Path.IsPathRooted(maybeRel) ? maybeRel : Path.GetFullPath(Path.Combine(cwd, maybeRel)); | ||||
| } | ||||
|  | ||||
| static class ArgvHelpers { | ||||
|   public static int IndexOf(this string[] argv, string flag) => | ||||
|     Array.FindIndex(argv, a => a == flag); | ||||
|   public static string? Next(this string[] argv, int idx) => | ||||
|     (idx >= 0 && idx + 1 < argv.Length) ? argv[idx + 1] : null; | ||||
|   public static bool AnyEndsWith(this IEnumerable<string> args, string suffix, bool ignoreCase = true) => | ||||
|     args.Any(a => a.EndsWith(suffix, ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal)); | ||||
|   public static bool Is(this string? candidate, params string[] names) => | ||||
|     candidate is not null && names.Any(n => string.Equals(Path.GetFileName(candidate), n, StringComparison.OrdinalIgnoreCase)); | ||||
| } | ||||
| ``` | ||||
|  | ||||
| ## 3) Scoring & calibration | ||||
|  | ||||
| - Each sub-detector returns a `RawScore` (0..1) based on family-specific heuristics. | ||||
| - Feed raw scores into a calibrator (Platt scaling or isotonic regression) trained on labelled corpora to get calibrated probabilities. | ||||
| - Persist calibration metadata per detector to avoid drift. | ||||
| - When no detector fires, return `LanguageType.Other` with low confidence and an evidence note. | ||||
|  | ||||
| ## 4) Cross-checks | ||||
|  | ||||
| Enhance precision by combining detector results with filesystem and configuration signals: | ||||
|  | ||||
| - Compare declared `EXPOSE` ports with runtime defaults (e.g., `80/443` for Nginx, `8080` for Java app servers). | ||||
| - Inspect service-specific configuration (`nginx.conf`, `php-fpm.conf`, `web.config`, `Gemfile`, `package.json`, `pyproject.toml`). | ||||
| - For Java and .NET, verify artefact presence and manifest metadata; for Go/Rust check static binary traits. | ||||
| - Re-run detectors after ShellFlow rewrites to ensure post-`exec` commands are analysed. | ||||
|  | ||||
| ## 5) Windows nuances | ||||
|  | ||||
| - Use `config.Shell` to detect PowerShell vs CMD; adjust interpreter lookup accordingly. | ||||
| - PE probing is mandatory—PowerShell scripts often front .NET or native binaries. | ||||
| - Consider case-insensitive paths and `\` separators. | ||||
|  | ||||
| ## 6) Integration points | ||||
|  | ||||
| - Static reducer passes `ResolvedCommand` → runtime detector. | ||||
| - Dynamic reducer pipes steady-state commands through the same interface. | ||||
| - Output `LanguageHit` populates the `TerminalProcess` along with `confidence`. | ||||
| - Downstream consumers (Policy Engine, Vuln Explorer) merge runtime type into their evidence trail. | ||||
|  | ||||
| ## 7) Next steps | ||||
|  | ||||
| Language-specific heuristics live in: | ||||
|  | ||||
| | Runtime | Document | | ||||
| | --- | --- | | ||||
| | Java | `entrypoint-lang-java.md` | | ||||
| | .NET / C# | `entrypoint-lang-dotnet.md` | | ||||
| | Node.js | `entrypoint-lang-node.md` | | ||||
| | Python | `entrypoint-lang-python.md` | | ||||
| | PHP-FPM | `entrypoint-lang-phpfpm.md` | | ||||
| | Ruby | `entrypoint-lang-ruby.md` | | ||||
| | Go | `entrypoint-lang-go.md` | | ||||
| | Rust | `entrypoint-lang-rust.md` | | ||||
| | C/C++ | `entrypoint-lang-ccpp.md` | | ||||
| | Nginx | `entrypoint-lang-nginx.md` | | ||||
| | Deno | `entrypoint-lang-deno.md` | | ||||
| | Elixir/Erlang (BEAM) | `entrypoint-lang-elixir.md` | | ||||
| | Supervisors | `entrypoint-lang-supervisor.md` | | ||||
|  | ||||
| Each runtime file documents the heuristics, artefacts, and edge cases specific to that family. | ||||
		Reference in New Issue
	
	Block a user