Resolve Concelier/Excititor merge conflicts
This commit is contained in:
		
							
								
								
									
										36
									
								
								bench/Scanner.Analyzers/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								bench/Scanner.Analyzers/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| # Scanner Analyzer Microbench Harness | ||||
|  | ||||
| The bench harness exercises the language analyzers against representative filesystem layouts so that regressions are caught before they ship. | ||||
|  | ||||
| ## Layout | ||||
| - `run-bench.js` – Node.js script that traverses the sample `node_modules/` and `site-packages/` trees, replicating the package discovery work performed by the upcoming analyzers. | ||||
| - `config.json` – Declarative list of scenarios the harness executes. Each scenario points at a directory in `samples/`. | ||||
| - `baseline.csv` – Reference numbers captured on the 4 vCPU warm rig described in `docs/12_PERFORMANCE_WORKBOOK.md`. CI publishes fresh CSVs so perf trends stay visible. | ||||
|  | ||||
| ## Running locally | ||||
|  | ||||
| ```bash | ||||
| cd bench/Scanner.Analyzers | ||||
| node run-bench.js --out baseline.csv --samples ../.. | ||||
| ``` | ||||
|  | ||||
| The harness prints a table to stdout and writes the CSV (if `--out` is specified) with the following headers: | ||||
|  | ||||
| ``` | ||||
| scenario,iterations,sample_count,mean_ms,p95_ms,max_ms | ||||
| ``` | ||||
|  | ||||
| Use `--iterations` to override the default (5 passes per scenario) and `--threshold-ms` to customize the failure budget. Budgets default to 5 000 ms, aligned with the SBOM compose objective. | ||||
|  | ||||
| ## Adding scenarios | ||||
| 1. Drop the fixture tree under `samples/<area>/...`. | ||||
| 2. Append a new scenario entry to `config.json` describing: | ||||
|    - `id` – snake_case scenario name (also used in CSV). | ||||
|    - `label` – human-friendly description shown in logs. | ||||
|    - `root` – path to the directory that will be scanned. | ||||
|    - `matcher` – glob describing files that will be parsed (POSIX `**` patterns). | ||||
|    - `parser` – `node` or `python` to choose the metadata reader. | ||||
| 3. Re-run `node run-bench.js --out baseline.csv`. | ||||
| 4. Commit both the fixture and updated baseline. | ||||
|  | ||||
| The harness is intentionally dependency-free to remain runnable inside minimal CI runners. | ||||
							
								
								
									
										3
									
								
								bench/Scanner.Analyzers/baseline.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								bench/Scanner.Analyzers/baseline.csv
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| scenario,iterations,sample_count,mean_ms,p95_ms,max_ms | ||||
| node_monorepo_walk,5,4,233.9428,319.8564,344.4611 | ||||
| python_site_packages_walk,5,3,72.9166,74.8970,74.9884 | ||||
| 
 | 
							
								
								
									
										20
									
								
								bench/Scanner.Analyzers/config.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								bench/Scanner.Analyzers/config.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | ||||
| { | ||||
|   "thresholdMs": 5000, | ||||
|   "iterations": 5, | ||||
|   "scenarios": [ | ||||
|     { | ||||
|       "id": "node_monorepo_walk", | ||||
|       "label": "Node.js monorepo package.json harvest", | ||||
|       "root": "samples/runtime/npm-monorepo/node_modules", | ||||
|       "matcher": "**/package.json", | ||||
|       "parser": "node" | ||||
|     }, | ||||
|     { | ||||
|       "id": "python_site_packages_walk", | ||||
|       "label": "Python site-packages dist-info crawl", | ||||
|       "root": "samples/runtime/python-venv/lib/python3.11/site-packages", | ||||
|       "matcher": "**/*.dist-info/METADATA", | ||||
|       "parser": "python" | ||||
|     } | ||||
|   ] | ||||
| } | ||||
							
								
								
									
										12
									
								
								bench/Scanner.Analyzers/lang/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								bench/Scanner.Analyzers/lang/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,12 @@ | ||||
| # Scanner Language Analyzer Benchmarks | ||||
|  | ||||
| This directory will capture benchmark results for language analyzers (Node, Python, Go, .NET, Rust). | ||||
|  | ||||
| Pending tasks: | ||||
| - LA1: Node analyzer microbench CSV + flamegraph. | ||||
| - LA2: Python hash throughput CSV. | ||||
| - LA3: Go build info extraction benchmarks. | ||||
| - LA4: .NET RID dedupe performance matrix. | ||||
| - LA5: Rust heuristic coverage comparisons. | ||||
|  | ||||
| Results should be committed as deterministic CSV/JSON outputs with accompanying methodology notes. | ||||
							
								
								
									
										249
									
								
								bench/Scanner.Analyzers/run-bench.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										249
									
								
								bench/Scanner.Analyzers/run-bench.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,249 @@ | ||||
| #!/usr/bin/env node | ||||
| 'use strict'; | ||||
|  | ||||
| const fs = require('fs'); | ||||
| const path = require('path'); | ||||
| const { performance } = require('perf_hooks'); | ||||
|  | ||||
| function globToRegExp(pattern) { | ||||
|   let working = pattern | ||||
|     .replace(/\*\*/g, ':::DOUBLE_WILDCARD:::') | ||||
|     .replace(/\*/g, ':::SINGLE_WILDCARD:::'); | ||||
|   working = working.replace(/([.+^${}()|[\]\\])/g, '\\$1'); | ||||
|   working = working | ||||
|     .replace(/:::DOUBLE_WILDCARD:::\//g, '(?:.*/)?') | ||||
|     .replace(/:::DOUBLE_WILDCARD:::/g, '.*') | ||||
|     .replace(/:::SINGLE_WILDCARD:::/g, '[^/]*'); | ||||
|   return new RegExp(`^${working}$`); | ||||
| } | ||||
|  | ||||
| function walkFiles(root, matcher) { | ||||
|   const out = []; | ||||
|   const stack = [root]; | ||||
|   while (stack.length) { | ||||
|     const current = stack.pop(); | ||||
|     const stat = fs.statSync(current, { throwIfNoEntry: true }); | ||||
|     if (stat.isDirectory()) { | ||||
|       const entries = fs.readdirSync(current); | ||||
|       for (const entry of entries) { | ||||
|         stack.push(path.join(current, entry)); | ||||
|       } | ||||
|     } else if (stat.isFile()) { | ||||
|       const relativePath = path.relative(root, current).replace(/\\/g, '/'); | ||||
|       if (matcher.test(relativePath)) { | ||||
|         out.push(current); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   return out; | ||||
| } | ||||
|  | ||||
| function parseArgs(argv) { | ||||
|   const args = { | ||||
|     config: path.join(__dirname, 'config.json'), | ||||
|     iterations: undefined, | ||||
|     thresholdMs: undefined, | ||||
|     out: undefined, | ||||
|     repoRoot: path.join(__dirname, '..', '..'), | ||||
|   }; | ||||
|  | ||||
|   for (let i = 2; i < argv.length; i++) { | ||||
|     const current = argv[i]; | ||||
|     switch (current) { | ||||
|       case '--config': | ||||
|         args.config = argv[++i]; | ||||
|         break; | ||||
|       case '--iterations': | ||||
|         args.iterations = Number(argv[++i]); | ||||
|         break; | ||||
|       case '--threshold-ms': | ||||
|         args.thresholdMs = Number(argv[++i]); | ||||
|         break; | ||||
|       case '--out': | ||||
|         args.out = argv[++i]; | ||||
|         break; | ||||
|       case '--repo-root': | ||||
|       case '--samples': | ||||
|         args.repoRoot = argv[++i]; | ||||
|         break; | ||||
|       default: | ||||
|         throw new Error(`Unknown argument: ${current}`); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   return args; | ||||
| } | ||||
|  | ||||
| function loadConfig(configPath) { | ||||
|   const json = fs.readFileSync(configPath, 'utf8'); | ||||
|   const cfg = JSON.parse(json); | ||||
|   if (!Array.isArray(cfg.scenarios) || cfg.scenarios.length === 0) { | ||||
|     throw new Error('config.scenarios must be a non-empty array'); | ||||
|   } | ||||
|   return cfg; | ||||
| } | ||||
|  | ||||
| function ensureWithinRepo(repoRoot, target) { | ||||
|   const relative = path.relative(repoRoot, target); | ||||
|   if (relative === '' || relative === '.') { | ||||
|     return true; | ||||
|   } | ||||
|   return !relative.startsWith('..') && !path.isAbsolute(relative); | ||||
| } | ||||
|  | ||||
| function parseNodePackage(contents) { | ||||
|   const parsed = JSON.parse(contents); | ||||
|   if (!parsed.name || !parsed.version) { | ||||
|     throw new Error('package.json missing name/version'); | ||||
|   } | ||||
|   return { name: parsed.name, version: parsed.version }; | ||||
| } | ||||
|  | ||||
| function parsePythonMetadata(contents) { | ||||
|   let name; | ||||
|   let version; | ||||
|   for (const line of contents.split(/\r?\n/)) { | ||||
|     if (!name && line.startsWith('Name:')) { | ||||
|       name = line.slice(5).trim(); | ||||
|     } else if (!version && line.startsWith('Version:')) { | ||||
|       version = line.slice(8).trim(); | ||||
|     } | ||||
|     if (name && version) { | ||||
|       break; | ||||
|     } | ||||
|   } | ||||
|   if (!name || !version) { | ||||
|     throw new Error('METADATA missing Name/Version headers'); | ||||
|   } | ||||
|   return { name, version }; | ||||
| } | ||||
|  | ||||
| function formatRow(row) { | ||||
|   const cols = [ | ||||
|     row.id.padEnd(28), | ||||
|     row.sampleCount.toString().padStart(5), | ||||
|     row.meanMs.toFixed(2).padStart(9), | ||||
|     row.p95Ms.toFixed(2).padStart(9), | ||||
|     row.maxMs.toFixed(2).padStart(9), | ||||
|   ]; | ||||
|   return cols.join(' | '); | ||||
| } | ||||
|  | ||||
| function percentile(sortedDurations, percentile) { | ||||
|   if (sortedDurations.length === 0) { | ||||
|     return 0; | ||||
|   } | ||||
|   const rank = (percentile / 100) * (sortedDurations.length - 1); | ||||
|   const lower = Math.floor(rank); | ||||
|   const upper = Math.ceil(rank); | ||||
|   const weight = rank - lower; | ||||
|   if (upper >= sortedDurations.length) { | ||||
|     return sortedDurations[lower]; | ||||
|   } | ||||
|   return sortedDurations[lower] + weight * (sortedDurations[upper] - sortedDurations[lower]); | ||||
| } | ||||
|  | ||||
| function main() { | ||||
|   const args = parseArgs(process.argv); | ||||
|   const cfg = loadConfig(args.config); | ||||
|   const iterations = args.iterations ?? cfg.iterations ?? 5; | ||||
|   const thresholdMs = args.thresholdMs ?? cfg.thresholdMs ?? 5000; | ||||
|  | ||||
|   const results = []; | ||||
|   const failures = []; | ||||
|  | ||||
|   for (const scenario of cfg.scenarios) { | ||||
|     const scenarioRoot = path.resolve(args.repoRoot, scenario.root); | ||||
|     if (!ensureWithinRepo(args.repoRoot, scenarioRoot)) { | ||||
|       throw new Error(`Scenario root ${scenario.root} escapes repo root ${args.repoRoot}`); | ||||
|     } | ||||
|     if (!fs.existsSync(scenarioRoot)) { | ||||
|       throw new Error(`Scenario root ${scenarioRoot} does not exist`); | ||||
|     } | ||||
|  | ||||
|     const matcher = globToRegExp(scenario.matcher.replace(/\\/g, '/')); | ||||
|     const durations = []; | ||||
|     let sampleCount = 0; | ||||
|  | ||||
|     for (let attempt = 0; attempt < iterations; attempt++) { | ||||
|       const start = performance.now(); | ||||
|       const files = walkFiles(scenarioRoot, matcher); | ||||
|       if (files.length === 0) { | ||||
|         throw new Error(`Scenario ${scenario.id} matched no files`); | ||||
|       } | ||||
|  | ||||
|       for (const filePath of files) { | ||||
|         const contents = fs.readFileSync(filePath, 'utf8'); | ||||
|         if (scenario.parser === 'node') { | ||||
|           parseNodePackage(contents); | ||||
|         } else if (scenario.parser === 'python') { | ||||
|           parsePythonMetadata(contents); | ||||
|         } else { | ||||
|           throw new Error(`Unknown parser ${scenario.parser} for scenario ${scenario.id}`); | ||||
|         } | ||||
|       } | ||||
|       const end = performance.now(); | ||||
|       durations.push(end - start); | ||||
|       sampleCount = files.length; | ||||
|     } | ||||
|  | ||||
|     durations.sort((a, b) => a - b); | ||||
|     const mean = durations.reduce((acc, value) => acc + value, 0) / durations.length; | ||||
|     const p95 = percentile(durations, 95); | ||||
|     const max = durations[durations.length - 1]; | ||||
|  | ||||
|     if (max > thresholdMs) { | ||||
|       failures.push(`${scenario.id} exceeded threshold: ${(max).toFixed(2)} ms > ${thresholdMs} ms`); | ||||
|     } | ||||
|  | ||||
|     results.push({ | ||||
|       id: scenario.id, | ||||
|       label: scenario.label, | ||||
|       sampleCount, | ||||
|       meanMs: mean, | ||||
|       p95Ms: p95, | ||||
|       maxMs: max, | ||||
|       iterations, | ||||
|     }); | ||||
|   } | ||||
|  | ||||
|   console.log('Scenario                     | Count |   Mean(ms) |    P95(ms) |     Max(ms)'); | ||||
|   console.log('---------------------------- | ----- | --------- | --------- | ----------'); | ||||
|   for (const row of results) { | ||||
|     console.log(formatRow(row)); | ||||
|   } | ||||
|  | ||||
|   if (args.out) { | ||||
|     const header = 'scenario,iterations,sample_count,mean_ms,p95_ms,max_ms\n'; | ||||
|     const csvRows = results | ||||
|       .map((row) => | ||||
|         [ | ||||
|           row.id, | ||||
|           row.iterations, | ||||
|           row.sampleCount, | ||||
|           row.meanMs.toFixed(4), | ||||
|           row.p95Ms.toFixed(4), | ||||
|           row.maxMs.toFixed(4), | ||||
|         ].join(',') | ||||
|       ) | ||||
|       .join('\n'); | ||||
|     fs.writeFileSync(args.out, header + csvRows + '\n', 'utf8'); | ||||
|   } | ||||
|  | ||||
|   if (failures.length > 0) { | ||||
|     console.error('\nPerformance threshold exceeded:'); | ||||
|     for (const failure of failures) { | ||||
|       console.error(` - ${failure}`); | ||||
|     } | ||||
|     process.exitCode = 1; | ||||
|   } | ||||
| } | ||||
|  | ||||
| if (require.main === module) { | ||||
|   try { | ||||
|     main(); | ||||
|   } catch (err) { | ||||
|     console.error(err instanceof Error ? err.message : err); | ||||
|     process.exit(1); | ||||
|   } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user