148 lines
4.9 KiB
HTML
148 lines
4.9 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Stella Ops · Reachability Benchmark</title>
|
|
<style>
|
|
:root {
|
|
--bg: #0f172a;
|
|
--panel: #111827;
|
|
--accent: #22d3ee;
|
|
--muted: #9ca3af;
|
|
--text: #e5e7eb;
|
|
--mono: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, monospace;
|
|
--sans: "Inter", "Segoe UI", system-ui, -apple-system, sans-serif;
|
|
}
|
|
* { box-sizing: border-box; }
|
|
body {
|
|
margin: 0;
|
|
background: var(--bg);
|
|
color: var(--text);
|
|
font-family: var(--sans);
|
|
line-height: 1.5;
|
|
padding: 24px;
|
|
}
|
|
header { margin-bottom: 24px; }
|
|
h1 { margin: 0 0 8px; font-size: 28px; }
|
|
h2 { margin-top: 32px; margin-bottom: 12px; font-size: 20px; }
|
|
p { margin: 6px 0; color: var(--muted); }
|
|
code, pre { font-family: var(--mono); }
|
|
.panel {
|
|
background: var(--panel);
|
|
border: 1px solid #1f2937;
|
|
border-radius: 10px;
|
|
padding: 16px;
|
|
margin-bottom: 16px;
|
|
}
|
|
.grid {
|
|
display: grid;
|
|
gap: 12px;
|
|
}
|
|
@media (min-width: 720px) {
|
|
.grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
|
|
}
|
|
.leaderboard table {
|
|
width: 100%;
|
|
border-collapse: collapse;
|
|
}
|
|
.leaderboard th, .leaderboard td {
|
|
padding: 8px;
|
|
border-bottom: 1px solid #1f2937;
|
|
text-align: left;
|
|
font-size: 14px;
|
|
}
|
|
.leaderboard th { color: var(--muted); font-weight: 600; }
|
|
.pill {
|
|
display: inline-block;
|
|
padding: 2px 8px;
|
|
border-radius: 999px;
|
|
background: rgba(34, 211, 238, 0.15);
|
|
color: var(--accent);
|
|
font-size: 12px;
|
|
font-weight: 600;
|
|
}
|
|
.badge-warning { background: rgba(234,179,8,0.18); color: #facc15; }
|
|
.list { padding-left: 18px; color: var(--muted); }
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<header>
|
|
<div class="pill">Offline ready</div>
|
|
<h1>Stella Ops · Reachability Benchmark</h1>
|
|
<p>Deterministic, reproducible cases and scoring harness for reachability analysis tools.</p>
|
|
</header>
|
|
|
|
<section class="panel">
|
|
<h2>Quick Start</h2>
|
|
<ol class="list">
|
|
<li>Build cases deterministically: <code>python tools/build/build_all.py --cases cases</code></li>
|
|
<li>Run your analyzer and emit <code>submission.json</code> in <code>schemas/submission.schema.json</code> format.</li>
|
|
<li>Score: <code>tools/scorer/rb_score.py --truth benchmark/truth/<aggregate>.json --submission submission.json</code></li>
|
|
<li>Compare: <code>tools/scorer/rb_compare.py --truth ... --submissions submission.json baselines/*/submission.json --output leaderboard.json</code></li>
|
|
</ol>
|
|
<p>All tooling is offline-friendly; no network calls or external fonts.</p>
|
|
</section>
|
|
|
|
<section class="grid">
|
|
<div class="panel">
|
|
<h2>Downloads</h2>
|
|
<ul class="list">
|
|
<li>Cases: <code>cases/</code></li>
|
|
<li>Schemas: <code>schemas/</code></li>
|
|
<li>Truth: <code>benchmark/truth/</code></li>
|
|
<li>Baselines: <code>baselines/</code> (Semgrep, Stella, CodeQL)</li>
|
|
<li>CI script: <code>ci/run-ci.sh</code></li>
|
|
</ul>
|
|
</div>
|
|
<div class="panel">
|
|
<h2>Determinism Checklist</h2>
|
|
<ul class="list">
|
|
<li>Set <code>SOURCE_DATE_EPOCH</code> in builds.</li>
|
|
<li>Disable tool telemetry/version checks.</li>
|
|
<li>Sort cases and sinks before emission.</li>
|
|
<li>Keep outputs local; no registry or network pulls.</li>
|
|
</ul>
|
|
</div>
|
|
</section>
|
|
|
|
<section class="panel leaderboard">
|
|
<h2>Leaderboard</h2>
|
|
<p id="lb-note" class="muted">Looking for <code>leaderboard.json</code> in this directory…</p>
|
|
<div id="lb-table"></div>
|
|
</section>
|
|
|
|
<script>
|
|
const note = document.getElementById('lb-note');
|
|
const tableHost = document.getElementById('lb-table');
|
|
|
|
fetch('leaderboard.json')
|
|
.then(r => r.ok ? r.json() : Promise.reject(r.status))
|
|
.then(data => {
|
|
note.textContent = `Truth version: ${data.truth_version || 'n/a'} · Entries: ${data.entries.length}`;
|
|
const rows = data.entries.map((e, i) => `
|
|
<tr>
|
|
<td>${i + 1}</td>
|
|
<td>${e.name}</td>
|
|
<td>${e.tool_name} ${e.tool_version}</td>
|
|
<td>${e.f1.toFixed(4)}</td>
|
|
<td>${e.precision.toFixed(4)}</td>
|
|
<td>${e.recall.toFixed(4)}</td>
|
|
<td>${e.determinism_rate.toFixed(4)}</td>
|
|
<td>${e.explainability_avg.toFixed(4)}</td>
|
|
</tr>`).join('');
|
|
tableHost.innerHTML = `
|
|
<table>
|
|
<thead>
|
|
<tr><th>#</th><th>Name</th><th>Tool</th><th>F1</th><th>P</th><th>R</th><th>Det</th><th>Explain</th></tr>
|
|
</thead>
|
|
<tbody>${rows}</tbody>
|
|
</table>`;
|
|
})
|
|
.catch(() => {
|
|
note.innerHTML = 'No <code>leaderboard.json</code> found yet. Run <code>ci/run-ci.sh</code> to generate.';
|
|
});
|
|
</script>
|
|
</body>
|
|
</html>
|