Align AOC tasks for Excititor and Concelier
This commit is contained in:
@@ -1,152 +1,152 @@
|
||||
# Runtime Detector Overview
|
||||
|
||||
Runtime classification converts a reduced command into a concrete language or framework identity with supporting evidence. This document describes the shared contracts, helper utilities, calibration strategy, and integration points; language-specific heuristics live in the `entrypoint-lang-*.md` files.
|
||||
|
||||
## 1) Contracts
|
||||
|
||||
```csharp
|
||||
public enum LanguageType {
|
||||
Java, DotNet, Node, Python, PhpFpm, Ruby, Go, Rust, CCpp,
|
||||
Nginx, Deno, Elixir, Supervisor, Other
|
||||
}
|
||||
|
||||
public sealed record ResolvedCommand(
|
||||
string[] Argv,
|
||||
string Argv0,
|
||||
string? AbsolutePath,
|
||||
bool IsElf,
|
||||
bool IsPe,
|
||||
bool IsScript,
|
||||
string? Shebang,
|
||||
string WorkingDir
|
||||
);
|
||||
|
||||
public sealed record LanguageHit(
|
||||
LanguageType Type,
|
||||
double RawScore,
|
||||
string ResolvedBinary,
|
||||
string[] Args,
|
||||
List<string> Evidence,
|
||||
string? AppArtifactPath = null,
|
||||
string? MainModule = null,
|
||||
Dictionary<string,string>? Extra = null
|
||||
);
|
||||
```
|
||||
|
||||
### Interface
|
||||
|
||||
```csharp
|
||||
public interface ILanguageSubDetector {
|
||||
LanguageHit? TryDetect(
|
||||
ResolvedCommand cmd, OverlayVfs vfs, EnvBag env, ImageContext img, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
public sealed class LanguageDetector {
|
||||
private readonly ILanguageSubDetector[] _detectors = {
|
||||
new JavaDetector(),
|
||||
new DotNetDetector(),
|
||||
new NodeDetector(),
|
||||
new PythonDetector(),
|
||||
new PhpFpmDetector(),
|
||||
new RubyDetector(),
|
||||
new NginxDetector(),
|
||||
new GoDetector(),
|
||||
new RustDetector(),
|
||||
new DenoDetector(),
|
||||
new ElixirDetector(),
|
||||
new CCppDetector(),
|
||||
new SupervisorDetector()
|
||||
};
|
||||
private readonly ScoreCalibrator _cal = ScoreCalibrator.Default;
|
||||
|
||||
public LanguageHit Detect(ResolvedCommand cmd, OverlayVfs vfs, EnvBag env, ImageContext img, out double confidence) {
|
||||
var hits = _detectors.Select(d => d.TryDetect(cmd, vfs, env, img)).Where(h => h is not null).ToList()!;
|
||||
LanguageHit best = hits.Count == 0
|
||||
? new LanguageHit(LanguageType.Other, 0.10, cmd.AbsolutePath ?? cmd.Argv0, cmd.Argv.Skip(1).ToArray(),
|
||||
new() { "No strong runtime family signals detected." })
|
||||
: hits.OrderByDescending(_cal.Calibrate).First();
|
||||
|
||||
confidence = _cal.Calibrate(best);
|
||||
foreach (var alt in hits.Where(h => h != best).OrderByDescending(_cal.Calibrate))
|
||||
best.Evidence.Add($"Alternative: {alt!.Type} (score={_cal.Calibrate(alt):0.00}) — {string.Join("; ", alt.Evidence.Take(2))}…");
|
||||
return best;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 2) Helpers
|
||||
|
||||
```csharp
|
||||
static class VfsHelpers {
|
||||
public static bool FileExists(OverlayVfs vfs, string path) => vfs.Exists(path);
|
||||
public static bool TryOpen(OverlayVfs vfs, string path, out Stream? stream) {
|
||||
if (!vfs.Exists(path)) { stream = null; return false; }
|
||||
stream = vfs.OpenRead(path);
|
||||
return true;
|
||||
}
|
||||
public static string Join(string cwd, string maybeRel) =>
|
||||
Path.IsPathRooted(maybeRel) ? maybeRel : Path.GetFullPath(Path.Combine(cwd, maybeRel));
|
||||
}
|
||||
|
||||
static class ArgvHelpers {
|
||||
public static int IndexOf(this string[] argv, string flag) =>
|
||||
Array.FindIndex(argv, a => a == flag);
|
||||
public static string? Next(this string[] argv, int idx) =>
|
||||
(idx >= 0 && idx + 1 < argv.Length) ? argv[idx + 1] : null;
|
||||
public static bool AnyEndsWith(this IEnumerable<string> args, string suffix, bool ignoreCase = true) =>
|
||||
args.Any(a => a.EndsWith(suffix, ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal));
|
||||
public static bool Is(this string? candidate, params string[] names) =>
|
||||
candidate is not null && names.Any(n => string.Equals(Path.GetFileName(candidate), n, StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
```
|
||||
|
||||
## 3) Scoring & calibration
|
||||
|
||||
- Each sub-detector returns a `RawScore` (0..1) based on family-specific heuristics.
|
||||
- Feed raw scores into a calibrator (Platt scaling or isotonic regression) trained on labelled corpora to get calibrated probabilities.
|
||||
- Persist calibration metadata per detector to avoid drift.
|
||||
- When no detector fires, return `LanguageType.Other` with low confidence and an evidence note.
|
||||
|
||||
## 4) Cross-checks
|
||||
|
||||
Enhance precision by combining detector results with filesystem and configuration signals:
|
||||
|
||||
- Compare declared `EXPOSE` ports with runtime defaults (e.g., `80/443` for Nginx, `8080` for Java app servers).
|
||||
- Inspect service-specific configuration (`nginx.conf`, `php-fpm.conf`, `web.config`, `Gemfile`, `package.json`, `pyproject.toml`).
|
||||
- For Java and .NET, verify artefact presence and manifest metadata; for Go/Rust check static binary traits.
|
||||
- Re-run detectors after ShellFlow rewrites to ensure post-`exec` commands are analysed.
|
||||
|
||||
## 5) Windows nuances
|
||||
|
||||
- Use `config.Shell` to detect PowerShell vs CMD; adjust interpreter lookup accordingly.
|
||||
- PE probing is mandatory—PowerShell scripts often front .NET or native binaries.
|
||||
- Consider case-insensitive paths and `\` separators.
|
||||
|
||||
## 6) Integration points
|
||||
|
||||
- Static reducer passes `ResolvedCommand` → runtime detector.
|
||||
- Dynamic reducer pipes steady-state commands through the same interface.
|
||||
- Output `LanguageHit` populates the `TerminalProcess` along with `confidence`.
|
||||
- Downstream consumers (Policy Engine, Vuln Explorer) merge runtime type into their evidence trail.
|
||||
|
||||
## 7) Next steps
|
||||
|
||||
Language-specific heuristics live in:
|
||||
|
||||
| Runtime | Document |
|
||||
| --- | --- |
|
||||
| Java | `entrypoint-lang-java.md` |
|
||||
| .NET / C# | `entrypoint-lang-dotnet.md` |
|
||||
| Node.js | `entrypoint-lang-node.md` |
|
||||
| Python | `entrypoint-lang-python.md` |
|
||||
| PHP-FPM | `entrypoint-lang-phpfpm.md` |
|
||||
| Ruby | `entrypoint-lang-ruby.md` |
|
||||
| Go | `entrypoint-lang-go.md` |
|
||||
| Rust | `entrypoint-lang-rust.md` |
|
||||
| C/C++ | `entrypoint-lang-ccpp.md` |
|
||||
| Nginx | `entrypoint-lang-nginx.md` |
|
||||
| Deno | `entrypoint-lang-deno.md` |
|
||||
| Elixir/Erlang (BEAM) | `entrypoint-lang-elixir.md` |
|
||||
| Supervisors | `entrypoint-lang-supervisor.md` |
|
||||
|
||||
Each runtime file documents the heuristics, artefacts, and edge cases specific to that family.
|
||||
# Runtime Detector Overview
|
||||
|
||||
Runtime classification converts a reduced command into a concrete language or framework identity with supporting evidence. This document describes the shared contracts, helper utilities, calibration strategy, and integration points; language-specific heuristics live in the `entrypoint-lang-*.md` files.
|
||||
|
||||
## 1) Contracts
|
||||
|
||||
```csharp
|
||||
public enum LanguageType {
|
||||
Java, DotNet, Node, Python, PhpFpm, Ruby, Go, Rust, CCpp,
|
||||
Nginx, Deno, Elixir, Supervisor, Other
|
||||
}
|
||||
|
||||
public sealed record ResolvedCommand(
|
||||
string[] Argv,
|
||||
string Argv0,
|
||||
string? AbsolutePath,
|
||||
bool IsElf,
|
||||
bool IsPe,
|
||||
bool IsScript,
|
||||
string? Shebang,
|
||||
string WorkingDir
|
||||
);
|
||||
|
||||
public sealed record LanguageHit(
|
||||
LanguageType Type,
|
||||
double RawScore,
|
||||
string ResolvedBinary,
|
||||
string[] Args,
|
||||
List<string> Evidence,
|
||||
string? AppArtifactPath = null,
|
||||
string? MainModule = null,
|
||||
Dictionary<string,string>? Extra = null
|
||||
);
|
||||
```
|
||||
|
||||
### Interface
|
||||
|
||||
```csharp
|
||||
public interface ILanguageSubDetector {
|
||||
LanguageHit? TryDetect(
|
||||
ResolvedCommand cmd, OverlayVfs vfs, EnvBag env, ImageContext img, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
public sealed class LanguageDetector {
|
||||
private readonly ILanguageSubDetector[] _detectors = {
|
||||
new JavaDetector(),
|
||||
new DotNetDetector(),
|
||||
new NodeDetector(),
|
||||
new PythonDetector(),
|
||||
new PhpFpmDetector(),
|
||||
new RubyDetector(),
|
||||
new NginxDetector(),
|
||||
new GoDetector(),
|
||||
new RustDetector(),
|
||||
new DenoDetector(),
|
||||
new ElixirDetector(),
|
||||
new CCppDetector(),
|
||||
new SupervisorDetector()
|
||||
};
|
||||
private readonly ScoreCalibrator _cal = ScoreCalibrator.Default;
|
||||
|
||||
public LanguageHit Detect(ResolvedCommand cmd, OverlayVfs vfs, EnvBag env, ImageContext img, out double confidence) {
|
||||
var hits = _detectors.Select(d => d.TryDetect(cmd, vfs, env, img)).Where(h => h is not null).ToList()!;
|
||||
LanguageHit best = hits.Count == 0
|
||||
? new LanguageHit(LanguageType.Other, 0.10, cmd.AbsolutePath ?? cmd.Argv0, cmd.Argv.Skip(1).ToArray(),
|
||||
new() { "No strong runtime family signals detected." })
|
||||
: hits.OrderByDescending(_cal.Calibrate).First();
|
||||
|
||||
confidence = _cal.Calibrate(best);
|
||||
foreach (var alt in hits.Where(h => h != best).OrderByDescending(_cal.Calibrate))
|
||||
best.Evidence.Add($"Alternative: {alt!.Type} (score={_cal.Calibrate(alt):0.00}) — {string.Join("; ", alt.Evidence.Take(2))}…");
|
||||
return best;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 2) Helpers
|
||||
|
||||
```csharp
|
||||
static class VfsHelpers {
|
||||
public static bool FileExists(OverlayVfs vfs, string path) => vfs.Exists(path);
|
||||
public static bool TryOpen(OverlayVfs vfs, string path, out Stream? stream) {
|
||||
if (!vfs.Exists(path)) { stream = null; return false; }
|
||||
stream = vfs.OpenRead(path);
|
||||
return true;
|
||||
}
|
||||
public static string Join(string cwd, string maybeRel) =>
|
||||
Path.IsPathRooted(maybeRel) ? maybeRel : Path.GetFullPath(Path.Combine(cwd, maybeRel));
|
||||
}
|
||||
|
||||
static class ArgvHelpers {
|
||||
public static int IndexOf(this string[] argv, string flag) =>
|
||||
Array.FindIndex(argv, a => a == flag);
|
||||
public static string? Next(this string[] argv, int idx) =>
|
||||
(idx >= 0 && idx + 1 < argv.Length) ? argv[idx + 1] : null;
|
||||
public static bool AnyEndsWith(this IEnumerable<string> args, string suffix, bool ignoreCase = true) =>
|
||||
args.Any(a => a.EndsWith(suffix, ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal));
|
||||
public static bool Is(this string? candidate, params string[] names) =>
|
||||
candidate is not null && names.Any(n => string.Equals(Path.GetFileName(candidate), n, StringComparison.OrdinalIgnoreCase));
|
||||
}
|
||||
```
|
||||
|
||||
## 3) Scoring & calibration
|
||||
|
||||
- Each sub-detector returns a `RawScore` (0..1) based on family-specific heuristics.
|
||||
- Feed raw scores into a calibrator (Platt scaling or isotonic regression) trained on labelled corpora to get calibrated probabilities.
|
||||
- Persist calibration metadata per detector to avoid drift.
|
||||
- When no detector fires, return `LanguageType.Other` with low confidence and an evidence note.
|
||||
|
||||
## 4) Cross-checks
|
||||
|
||||
Enhance precision by combining detector results with filesystem and configuration signals:
|
||||
|
||||
- Compare declared `EXPOSE` ports with runtime defaults (e.g., `80/443` for Nginx, `8080` for Java app servers).
|
||||
- Inspect service-specific configuration (`nginx.conf`, `php-fpm.conf`, `web.config`, `Gemfile`, `package.json`, `pyproject.toml`).
|
||||
- For Java and .NET, verify artefact presence and manifest metadata; for Go/Rust check static binary traits.
|
||||
- Re-run detectors after ShellFlow rewrites to ensure post-`exec` commands are analysed.
|
||||
|
||||
## 5) Windows nuances
|
||||
|
||||
- Use `config.Shell` to detect PowerShell vs CMD; adjust interpreter lookup accordingly.
|
||||
- PE probing is mandatory—PowerShell scripts often front .NET or native binaries.
|
||||
- Consider case-insensitive paths and `\` separators.
|
||||
|
||||
## 6) Integration points
|
||||
|
||||
- Static reducer passes `ResolvedCommand` → runtime detector.
|
||||
- Dynamic reducer pipes steady-state commands through the same interface.
|
||||
- Output `LanguageHit` populates the `TerminalProcess` along with `confidence`.
|
||||
- Downstream consumers (Policy Engine, Vuln Explorer) merge runtime type into their evidence trail.
|
||||
|
||||
## 7) Next steps
|
||||
|
||||
Language-specific heuristics live in:
|
||||
|
||||
| Runtime | Document |
|
||||
| --- | --- |
|
||||
| Java | `entrypoint-lang-java.md` |
|
||||
| .NET / C# | `entrypoint-lang-dotnet.md` |
|
||||
| Node.js | `entrypoint-lang-node.md` |
|
||||
| Python | `entrypoint-lang-python.md` |
|
||||
| PHP-FPM | `entrypoint-lang-phpfpm.md` |
|
||||
| Ruby | `entrypoint-lang-ruby.md` |
|
||||
| Go | `entrypoint-lang-go.md` |
|
||||
| Rust | `entrypoint-lang-rust.md` |
|
||||
| C/C++ | `entrypoint-lang-ccpp.md` |
|
||||
| Nginx | `entrypoint-lang-nginx.md` |
|
||||
| Deno | `entrypoint-lang-deno.md` |
|
||||
| Elixir/Erlang (BEAM) | `entrypoint-lang-elixir.md` |
|
||||
| Supervisors | `entrypoint-lang-supervisor.md` |
|
||||
|
||||
Each runtime file documents the heuristics, artefacts, and edge cases specific to that family.
|
||||
|
||||
Reference in New Issue
Block a user