Files
StellaOps Bot 909d9b6220
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled
up
2025-12-01 21:16:22 +02:00

148 lines
4.9 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Stella Ops · Reachability Benchmark</title>
<style>
:root {
--bg: #0f172a;
--panel: #111827;
--accent: #22d3ee;
--muted: #9ca3af;
--text: #e5e7eb;
--mono: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace;
--sans: "Inter", "Segoe UI", system-ui, -apple-system, sans-serif;
}
* { box-sizing: border-box; }
body {
margin: 0;
background: var(--bg);
color: var(--text);
font-family: var(--sans);
line-height: 1.5;
padding: 24px;
}
header { margin-bottom: 24px; }
h1 { margin: 0 0 8px; font-size: 28px; }
h2 { margin-top: 32px; margin-bottom: 12px; font-size: 20px; }
p { margin: 6px 0; color: var(--muted); }
code, pre { font-family: var(--mono); }
.panel {
background: var(--panel);
border: 1px solid #1f2937;
border-radius: 10px;
padding: 16px;
margin-bottom: 16px;
}
.grid {
display: grid;
gap: 12px;
}
@media (min-width: 720px) {
.grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
}
.leaderboard table {
width: 100%;
border-collapse: collapse;
}
.leaderboard th, .leaderboard td {
padding: 8px;
border-bottom: 1px solid #1f2937;
text-align: left;
font-size: 14px;
}
.leaderboard th { color: var(--muted); font-weight: 600; }
.pill {
display: inline-block;
padding: 2px 8px;
border-radius: 999px;
background: rgba(34, 211, 238, 0.15);
color: var(--accent);
font-size: 12px;
font-weight: 600;
}
.badge-warning { background: rgba(234,179,8,0.18); color: #facc15; }
.list { padding-left: 18px; color: var(--muted); }
</style>
</head>
<body>
<header>
<div class="pill">Offline ready</div>
<h1>Stella Ops · Reachability Benchmark</h1>
<p>Deterministic, reproducible cases and scoring harness for reachability analysis tools.</p>
</header>
<section class="panel">
<h2>Quick Start</h2>
<ol class="list">
<li>Build cases deterministically: <code>python tools/build/build_all.py --cases cases</code></li>
<li>Run your analyzer and emit <code>submission.json</code> in <code>schemas/submission.schema.json</code> format.</li>
<li>Score: <code>tools/scorer/rb_score.py --truth benchmark/truth/&lt;aggregate&gt;.json --submission submission.json</code></li>
<li>Compare: <code>tools/scorer/rb_compare.py --truth ... --submissions submission.json baselines/*/submission.json --output leaderboard.json</code></li>
</ol>
<p>All tooling is offline-friendly; no network calls or external fonts.</p>
</section>
<section class="grid">
<div class="panel">
<h2>Downloads</h2>
<ul class="list">
<li>Cases: <code>cases/</code></li>
<li>Schemas: <code>schemas/</code></li>
<li>Truth: <code>benchmark/truth/</code></li>
<li>Baselines: <code>baselines/</code> (Semgrep, Stella, CodeQL)</li>
<li>CI script: <code>ci/run-ci.sh</code></li>
</ul>
</div>
<div class="panel">
<h2>Determinism Checklist</h2>
<ul class="list">
<li>Set <code>SOURCE_DATE_EPOCH</code> in builds.</li>
<li>Disable tool telemetry/version checks.</li>
<li>Sort cases and sinks before emission.</li>
<li>Keep outputs local; no registry or network pulls.</li>
</ul>
</div>
</section>
<section class="panel leaderboard">
<h2>Leaderboard</h2>
<p id="lb-note" class="muted">Looking for <code>leaderboard.json</code> in this directory…</p>
<div id="lb-table"></div>
</section>
<script>
const note = document.getElementById('lb-note');
const tableHost = document.getElementById('lb-table');
fetch('leaderboard.json')
.then(r => r.ok ? r.json() : Promise.reject(r.status))
.then(data => {
note.textContent = `Truth version: ${data.truth_version || 'n/a'} · Entries: ${data.entries.length}`;
const rows = data.entries.map((e, i) => `
<tr>
<td>${i + 1}</td>
<td>${e.name}</td>
<td>${e.tool_name} ${e.tool_version}</td>
<td>${e.f1.toFixed(4)}</td>
<td>${e.precision.toFixed(4)}</td>
<td>${e.recall.toFixed(4)}</td>
<td>${e.determinism_rate.toFixed(4)}</td>
<td>${e.explainability_avg.toFixed(4)}</td>
</tr>`).join('');
tableHost.innerHTML = `
<table>
<thead>
<tr><th>#</th><th>Name</th><th>Tool</th><th>F1</th><th>P</th><th>R</th><th>Det</th><th>Explain</th></tr>
</thead>
<tbody>${rows}</tbody>
</table>`;
})
.catch(() => {
note.innerHTML = 'No <code>leaderboard.json</code> found yet. Run <code>ci/run-ci.sh</code> to generate.';
});
</script>
</body>
</html>