Resolve Concelier/Excititor merge conflicts

2025-10-20 14:19:25 +03:00
parent 67d581d2e8 5fd4032c7c
commit 09b6a28172
2687 changed files with 212646 additions and 85913 deletions
--- a/bench/Scanner.Analyzers/README.md
+++ b/bench/Scanner.Analyzers/README.md
@@ -0,0 +1,36 @@
+# Scanner Analyzer Microbench Harness
+
+The bench harness exercises the language analyzers against representative filesystem layouts so that regressions are caught before they ship.
+
+## Layout
+- `run-bench.js` – Node.js script that traverses the sample `node_modules/` and `site-packages/` trees, replicating the package discovery work performed by the upcoming analyzers.
+- `config.json` – Declarative list of scenarios the harness executes. Each scenario points at a directory in `samples/`.
+- `baseline.csv` – Reference numbers captured on the 4 vCPU warm rig described in `docs/12_PERFORMANCE_WORKBOOK.md`. CI publishes fresh CSVs so perf trends stay visible.
+
+## Running locally
+
+```bash
+cd bench/Scanner.Analyzers
+node run-bench.js --out baseline.csv --samples ../..
+```
+
+The harness prints a table to stdout and writes the CSV (if `--out` is specified) with the following headers:
+
+```
+scenario,iterations,sample_count,mean_ms,p95_ms,max_ms
+```
+
+Use `--iterations` to override the default (5 passes per scenario) and `--threshold-ms` to customize the failure budget. Budgets default to 5 000 ms, aligned with the SBOM compose objective.
+
+## Adding scenarios
+1. Drop the fixture tree under `samples/<area>/...`.
+2. Append a new scenario entry to `config.json` describing:
+   - `id` – snake_case scenario name (also used in CSV).
+   - `label` – human-friendly description shown in logs.
+   - `root` – path to the directory that will be scanned.
+   - `matcher` – glob describing files that will be parsed (POSIX `**` patterns).
+   - `parser` – `node` or `python` to choose the metadata reader.
+3. Re-run `node run-bench.js --out baseline.csv`.
+4. Commit both the fixture and updated baseline.
+
+The harness is intentionally dependency-free to remain runnable inside minimal CI runners.
--- a/bench/Scanner.Analyzers/baseline.csv
+++ b/bench/Scanner.Analyzers/baseline.csv
@@ -0,0 +1,3 @@
+scenario,iterations,sample_count,mean_ms,p95_ms,max_ms
+node_monorepo_walk,5,4,233.9428,319.8564,344.4611
+python_site_packages_walk,5,3,72.9166,74.8970,74.9884
--- a/bench/Scanner.Analyzers/config.json
+++ b/bench/Scanner.Analyzers/config.json
@@ -0,0 +1,20 @@
+{
+  "thresholdMs": 5000,
+  "iterations": 5,
+  "scenarios": [
+    {
+      "id": "node_monorepo_walk",
+      "label": "Node.js monorepo package.json harvest",
+      "root": "samples/runtime/npm-monorepo/node_modules",
+      "matcher": "**/package.json",
+      "parser": "node"
+    },
+    {
+      "id": "python_site_packages_walk",
+      "label": "Python site-packages dist-info crawl",
+      "root": "samples/runtime/python-venv/lib/python3.11/site-packages",
+      "matcher": "**/*.dist-info/METADATA",
+      "parser": "python"
+    }
+  ]
+}
--- a/bench/Scanner.Analyzers/lang/README.md
+++ b/bench/Scanner.Analyzers/lang/README.md
@@ -0,0 +1,12 @@
+# Scanner Language Analyzer Benchmarks
+
+This directory will capture benchmark results for language analyzers (Node, Python, Go, .NET, Rust).
+
+Pending tasks:
+- LA1: Node analyzer microbench CSV + flamegraph.
+- LA2: Python hash throughput CSV.
+- LA3: Go build info extraction benchmarks.
+- LA4: .NET RID dedupe performance matrix.
+- LA5: Rust heuristic coverage comparisons.
+
+Results should be committed as deterministic CSV/JSON outputs with accompanying methodology notes.
--- a/bench/Scanner.Analyzers/run-bench.js
+++ b/bench/Scanner.Analyzers/run-bench.js
@@ -0,0 +1,249 @@
+#!/usr/bin/env node
+'use strict';
+
+const fs = require('fs');
+const path = require('path');
+const { performance } = require('perf_hooks');
+
+function globToRegExp(pattern) {
+  let working = pattern
+    .replace(/\*\*/g, ':::DOUBLE_WILDCARD:::')
+    .replace(/\*/g, ':::SINGLE_WILDCARD:::');
+  working = working.replace(/([.+^${}()|[\]\\])/g, '\\$1');
+  working = working
+    .replace(/:::DOUBLE_WILDCARD:::\//g, '(?:.*/)?')
+    .replace(/:::DOUBLE_WILDCARD:::/g, '.*')
+    .replace(/:::SINGLE_WILDCARD:::/g, '[^/]*');
+  return new RegExp(`^${working}$`);
+}
+
+function walkFiles(root, matcher) {
+  const out = [];
+  const stack = [root];
+  while (stack.length) {
+    const current = stack.pop();
+    const stat = fs.statSync(current, { throwIfNoEntry: true });
+    if (stat.isDirectory()) {
+      const entries = fs.readdirSync(current);
+      for (const entry of entries) {
+        stack.push(path.join(current, entry));
+      }
+    } else if (stat.isFile()) {
+      const relativePath = path.relative(root, current).replace(/\\/g, '/');
+      if (matcher.test(relativePath)) {
+        out.push(current);
+      }
+    }
+  }
+  return out;
+}
+
+function parseArgs(argv) {
+  const args = {
+    config: path.join(__dirname, 'config.json'),
+    iterations: undefined,
+    thresholdMs: undefined,
+    out: undefined,
+    repoRoot: path.join(__dirname, '..', '..'),
+  };
+
+  for (let i = 2; i < argv.length; i++) {
+    const current = argv[i];
+    switch (current) {
+      case '--config':
+        args.config = argv[++i];
+        break;
+      case '--iterations':
+        args.iterations = Number(argv[++i]);
+        break;
+      case '--threshold-ms':
+        args.thresholdMs = Number(argv[++i]);
+        break;
+      case '--out':
+        args.out = argv[++i];
+        break;
+      case '--repo-root':
+      case '--samples':
+        args.repoRoot = argv[++i];
+        break;
+      default:
+        throw new Error(`Unknown argument: ${current}`);
+    }
+  }
+
+  return args;
+}
+
+function loadConfig(configPath) {
+  const json = fs.readFileSync(configPath, 'utf8');
+  const cfg = JSON.parse(json);
+  if (!Array.isArray(cfg.scenarios) || cfg.scenarios.length === 0) {
+    throw new Error('config.scenarios must be a non-empty array');
+  }
+  return cfg;
+}
+
+function ensureWithinRepo(repoRoot, target) {
+  const relative = path.relative(repoRoot, target);
+  if (relative === '' || relative === '.') {
+    return true;
+  }
+  return !relative.startsWith('..') && !path.isAbsolute(relative);
+}
+
+function parseNodePackage(contents) {
+  const parsed = JSON.parse(contents);
+  if (!parsed.name || !parsed.version) {
+    throw new Error('package.json missing name/version');
+  }
+  return { name: parsed.name, version: parsed.version };
+}
+
+function parsePythonMetadata(contents) {
+  let name;
+  let version;
+  for (const line of contents.split(/\r?\n/)) {
+    if (!name && line.startsWith('Name:')) {
+      name = line.slice(5).trim();
+    } else if (!version && line.startsWith('Version:')) {
+      version = line.slice(8).trim();
+    }
+    if (name && version) {
+      break;
+    }
+  }
+  if (!name || !version) {
+    throw new Error('METADATA missing Name/Version headers');
+  }
+  return { name, version };
+}
+
+function formatRow(row) {
+  const cols = [
+    row.id.padEnd(28),
+    row.sampleCount.toString().padStart(5),
+    row.meanMs.toFixed(2).padStart(9),
+    row.p95Ms.toFixed(2).padStart(9),
+    row.maxMs.toFixed(2).padStart(9),
+  ];
+  return cols.join(' | ');
+}
+
+function percentile(sortedDurations, percentile) {
+  if (sortedDurations.length === 0) {
+    return 0;
+  }
+  const rank = (percentile / 100) * (sortedDurations.length - 1);
+  const lower = Math.floor(rank);
+  const upper = Math.ceil(rank);
+  const weight = rank - lower;
+  if (upper >= sortedDurations.length) {
+    return sortedDurations[lower];
+  }
+  return sortedDurations[lower] + weight * (sortedDurations[upper] - sortedDurations[lower]);
+}
+
+function main() {
+  const args = parseArgs(process.argv);
+  const cfg = loadConfig(args.config);
+  const iterations = args.iterations ?? cfg.iterations ?? 5;
+  const thresholdMs = args.thresholdMs ?? cfg.thresholdMs ?? 5000;
+
+  const results = [];
+  const failures = [];
+
+  for (const scenario of cfg.scenarios) {
+    const scenarioRoot = path.resolve(args.repoRoot, scenario.root);
+    if (!ensureWithinRepo(args.repoRoot, scenarioRoot)) {
+      throw new Error(`Scenario root ${scenario.root} escapes repo root ${args.repoRoot}`);
+    }
+    if (!fs.existsSync(scenarioRoot)) {
+      throw new Error(`Scenario root ${scenarioRoot} does not exist`);
+    }
+
+    const matcher = globToRegExp(scenario.matcher.replace(/\\/g, '/'));
+    const durations = [];
+    let sampleCount = 0;
+
+    for (let attempt = 0; attempt < iterations; attempt++) {
+      const start = performance.now();
+      const files = walkFiles(scenarioRoot, matcher);
+      if (files.length === 0) {
+        throw new Error(`Scenario ${scenario.id} matched no files`);
+      }
+
+      for (const filePath of files) {
+        const contents = fs.readFileSync(filePath, 'utf8');
+        if (scenario.parser === 'node') {
+          parseNodePackage(contents);
+        } else if (scenario.parser === 'python') {
+          parsePythonMetadata(contents);
+        } else {
+          throw new Error(`Unknown parser ${scenario.parser} for scenario ${scenario.id}`);
+        }
+      }
+      const end = performance.now();
+      durations.push(end - start);
+      sampleCount = files.length;
+    }
+
+    durations.sort((a, b) => a - b);
+    const mean = durations.reduce((acc, value) => acc + value, 0) / durations.length;
+    const p95 = percentile(durations, 95);
+    const max = durations[durations.length - 1];
+
+    if (max > thresholdMs) {
+      failures.push(`${scenario.id} exceeded threshold: ${(max).toFixed(2)} ms > ${thresholdMs} ms`);
+    }
+
+    results.push({
+      id: scenario.id,
+      label: scenario.label,
+      sampleCount,
+      meanMs: mean,
+      p95Ms: p95,
+      maxMs: max,
+      iterations,
+    });
+  }
+
+  console.log('Scenario                     | Count |   Mean(ms) |    P95(ms) |     Max(ms)');
+  console.log('---------------------------- | ----- | --------- | --------- | ----------');
+  for (const row of results) {
+    console.log(formatRow(row));
+  }
+
+  if (args.out) {
+    const header = 'scenario,iterations,sample_count,mean_ms,p95_ms,max_ms\n';
+    const csvRows = results
+      .map((row) =>
+        [
+          row.id,
+          row.iterations,
+          row.sampleCount,
+          row.meanMs.toFixed(4),
+          row.p95Ms.toFixed(4),
+          row.maxMs.toFixed(4),
+        ].join(',')
+      )
+      .join('\n');
+    fs.writeFileSync(args.out, header + csvRows + '\n', 'utf8');
+  }
+
+  if (failures.length > 0) {
+    console.error('\nPerformance threshold exceeded:');
+    for (const failure of failures) {
+      console.error(` - ${failure}`);
+    }
+    process.exitCode = 1;
+  }
+}
+
+if (require.main === module) {
+  try {
+    main();
+  } catch (err) {
+    console.error(err instanceof Error ? err.message : err);
+    process.exit(1);
+  }
+}