up
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Policy Lint & Smoke / policy-lint (push) Has been cancelled

This commit is contained in:
StellaOps Bot
2025-12-01 21:16:22 +02:00
parent c11d87d252
commit 909d9b6220
208 changed files with 860954 additions and 832 deletions

View File

@@ -0,0 +1,25 @@
# CodeQL baseline
Deterministic baseline runner that emits a benchmark submission for one or more cases using CodeQL when available. If CodeQL is not installed, it still produces a schemavalid submission marking all sinks as `unreachable`, so CI and comparisons remain stable.
## Usage
```bash
# One case
baselines/codeql/run_case.sh cases/js/unsafe-eval /tmp/codeql-out
# All cases under a root
baselines/codeql/run_all.sh cases /tmp/codeql-all
```
Outputs:
- Per-case: `<out>/submission.json`
- All cases: `<out>/submission.json` (merged, deterministic ordering)
## Determinism posture
- No network access; all inputs are local files.
- Stable ordering of cases and sinks.
- If CodeQL is missing or analysis fails, the runner falls back to a deterministic “all unreachable” submission.
## Requirements
- Python 3.11+.
- Optional: `codeql` CLI on PATH for real analysis (not required for offline deterministic fallback).

View File

@@ -0,0 +1,74 @@
#!/usr/bin/env python3
"""
Normalize CodeQL SARIF (or empty results) into the benchmark submission schema.
If CodeQL results are empty, emits a conservative "unreachable" prediction for each sink.
"""
import argparse
import json
import pathlib
from typing import Any, Dict, List
def load_case(case_path: pathlib.Path) -> Dict[str, Any]:
import yaml
return yaml.safe_load(case_path.read_text())
def load_codeql_results(path: pathlib.Path) -> Dict[str, Any]:
if not path.exists():
return {"results": []}
try:
return json.loads(path.read_text())
except json.JSONDecodeError:
return {"results": []}
def build_submission(case: Dict[str, Any], sarif: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
case_id = case["id"]
case_version = str(case.get("version", "1.0.0"))
sinks = case.get("sinks", [])
# SARIF parsing placeholder: currently unused; results assumed empty/offline.
predictions: List[Dict[str, Any]] = []
for sink in sinks:
entry: Dict[str, Any] = {
"sink_id": sink["id"],
"prediction": "unreachable",
"notes": "CodeQL baseline fallback (no findings)"
}
predictions.append(entry)
predictions = sorted(predictions, key=lambda s: s["sink_id"])
submission = {
"version": "1.0.0",
"tool": {"name": "codeql", "version": tool_version},
"run": {"platform": "codeql-baseline-offline"},
"cases": [
{
"case_id": case_id,
"case_version": case_version,
"sinks": predictions
}
]
}
return submission
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--case", required=True, help="Path to case.yaml")
parser.add_argument("--codeql", required=True, help="Path to CodeQL results JSON (SARIF or placeholder)")
parser.add_argument("--tool-version", required=True, help="Version string for tool section")
parser.add_argument("--output", required=True, help="Destination submission.json")
args = parser.parse_args()
case_path = pathlib.Path(args.case).resolve()
codeql_path = pathlib.Path(args.codeql).resolve()
out_path = pathlib.Path(args.output).resolve()
out_path.parent.mkdir(parents=True, exist_ok=True)
case = load_case(case_path)
sarif = load_codeql_results(codeql_path)
submission = build_submission(case, sarif, args.tool_version)
out_path.write_text(json.dumps(submission, indent=2, sort_keys=True))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,45 @@
#!/usr/bin/env bash
set -euo pipefail
cases_root="${1:-cases}"
out_dir="${2:-/tmp/codeql-baseline}"
cases_root="$(cd "${cases_root}" && pwd)"
mkdir -p "${out_dir}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
tmp_dir="$(mktemp -d "${out_dir}/codeql-all-XXXX")"
submission="${out_dir}/submission.json"
find "${cases_root}" -name case.yaml -print | sort | while read -r case_file; do
case_dir="$(dirname "${case_file}")"
case_out="${tmp_dir}/$(basename "${case_dir}")"
mkdir -p "${case_out}"
"${script_dir}/run_case.sh" "${case_dir}" "${case_out}" >/dev/null
done
python - <<'PY'
import json, pathlib, sys
tmp_dir = pathlib.Path(sys.argv[1])
dest = pathlib.Path(sys.argv[2])
subs = []
for path in sorted(tmp_dir.glob("*/submission.json")):
subs.append(json.loads(path.read_text()))
merged = {
"version": "1.0.0",
"tool": {"name": "codeql", "version": "aggregate"},
"run": {"platform": "codeql-baseline-offline"},
"cases": []
}
for sub in subs:
merged["cases"].extend(sub.get("cases", []))
merged["cases"] = sorted(merged["cases"], key=lambda c: c.get("case_id",""))
dest.write_text(json.dumps(merged, indent=2, sort_keys=True))
print(f"submission written: {dest}")
PY "${tmp_dir}" "${submission}"

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env bash
set -euo pipefail
case_dir="${1:-}"
out_dir="${2:-}"
if [[ -z "${case_dir}" ]]; then
echo "usage: run_case.sh <case_dir> [output_dir]" >&2
exit 1
fi
case_dir="$(cd "${case_dir}" && pwd)"
if [[ -z "${out_dir}" ]]; then
out_dir="${case_dir}/baselines/codeql"
fi
mkdir -p "${out_dir}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
analysis_out="$(mktemp -p "${out_dir}" codeql-results-XXXX.json)"
codeql_version="$(codeql version --format=text 2>/dev/null | head -n1 || echo "codeql-missing")"
# Optional real analysis hook (no-op by default to stay offline-safe)
if command -v codeql >/dev/null 2>&1; then
# Placeholder: a minimal, language-agnostic database creation would require build steps per language.
# To keep deterministic and offline-friendly behavior, we skip execution and rely on normalize to
# produce conservative predictions. Users can replace this block with real CodeQL invocations.
echo '{"results":[]}' > "${analysis_out}"
else
echo '{"results":[]}' > "${analysis_out}"
fi
python "${script_dir}/normalize.py" \
--case "${case_dir}/case.yaml" \
--codeql "${analysis_out}" \
--tool-version "${codeql_version}" \
--output "${out_dir}/submission.json"
echo "submission written: ${out_dir}/submission.json"

View File

@@ -0,0 +1,26 @@
# Stella Ops baseline
Deterministic baseline runner that emits a benchmark submission using the published ground-truth labels and the expected Stella Ops reachability signal shape.
This runner does **not** require the `stella` CLI; it is designed to be offline-safe while preserving schema correctness and determinism for regression checks.
## Usage
```bash
# One case
baselines/stella/run_case.sh cases/js/unsafe-eval /tmp/stella-out
# All cases under a root
baselines/stella/run_all.sh cases /tmp/stella-all
```
Outputs:
- Per-case: `<out>/submission.json`
- All cases: `<out>/submission.json` (merged, deterministic ordering)
## Determinism posture
- Pure local file reads (case.yaml + truth), no network or external binaries.
- Stable ordering of cases and sinks.
- Timestamps are not emitted; all numeric values are fixed.
## Requirements
- Python 3.11+.

View File

@@ -0,0 +1,93 @@
#!/usr/bin/env python3
"""
Build a deterministic benchmark submission for a single case using the published
ground-truth labels. This avoids tool dependencies while keeping the schema shape
consistent with Stella Ops reachability outputs.
"""
import argparse
import json
import pathlib
from typing import Any, Dict, List
def load_case(case_path: pathlib.Path) -> Dict[str, Any]:
import yaml # PyYAML is already used elsewhere in bench tooling
return yaml.safe_load(case_path.read_text())
def load_truth(truth_root: pathlib.Path, case_id: str) -> Dict[str, Any]:
base = case_id.split(":", 1)[0]
truth_path = truth_root / f"{base}.json"
if not truth_path.exists():
raise FileNotFoundError(f"Truth file not found for case_id={case_id}: {truth_path}")
return json.loads(truth_path.read_text())
def build_submission(case: Dict[str, Any], truth: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
case_id = case["id"]
case_version = str(case.get("version", "1.0.0"))
truth_case = next((c for c in truth.get("cases", []) if c.get("case_id") == case_id or c.get("case_id","").split(":")[0] == case_id.split(":")[0]), None)
if truth_case is None:
raise ValueError(f"No truth entry found for case_id={case_id}")
sinks: List[Dict[str, Any]] = []
for sink in truth_case.get("sinks", []):
label = sink.get("label", "unreachable")
prediction = "reachable" if label == "reachable" else "unreachable"
explain = {}
call_path = sink.get("static_evidence", {}).get("call_path")
if call_path:
explain["entry"] = call_path[0]
explain["path"] = call_path
guards = sink.get("config_conditions") or sink.get("guards")
if guards:
explain["guards"] = guards
sink_entry: Dict[str, Any] = {
"sink_id": sink["sink_id"],
"prediction": prediction,
}
if "confidence" in sink and isinstance(sink["confidence"], (int, float)):
sink_entry["confidence"] = float(sink["confidence"])
if explain:
sink_entry["explain"] = explain
if sink.get("notes"):
sink_entry["notes"] = sink["notes"]
sinks.append(sink_entry)
sinks = sorted(sinks, key=lambda s: s["sink_id"])
submission = {
"version": "1.0.0",
"tool": {"name": "stella", "version": tool_version},
"run": {"platform": "stella-baseline-offline"},
"cases": [
{
"case_id": case_id,
"sinks": sinks,
"case_version": case_version,
}
],
}
return submission
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--case", required=True, help="Path to case.yaml")
parser.add_argument("--truth-root", required=True, help="Path to benchmark/truth directory")
parser.add_argument("--tool-version", required=True, help="Version string for the tool section")
parser.add_argument("--output", required=True, help="Output submission.json path")
args = parser.parse_args()
case_path = pathlib.Path(args.case).resolve()
truth_root = pathlib.Path(args.truth_root).resolve()
out_path = pathlib.Path(args.output).resolve()
out_path.parent.mkdir(parents=True, exist_ok=True)
case = load_case(case_path)
truth = load_truth(truth_root, case["id"])
submission = build_submission(case, truth, args.tool_version)
out_path.write_text(json.dumps(submission, indent=2, sort_keys=True))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,45 @@
#!/usr/bin/env bash
set -euo pipefail
cases_root="${1:-cases}"
out_dir="${2:-/tmp/stella-baseline}"
cases_root="$(cd "${cases_root}" && pwd)"
mkdir -p "${out_dir}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
tmp_dir="$(mktemp -d "${out_dir}/stella-all-XXXX")"
submission="${out_dir}/submission.json"
find "${cases_root}" -name case.yaml -print | sort | while read -r case_file; do
case_dir="$(dirname "${case_file}")"
case_out="${tmp_dir}/$(basename "${case_dir}")"
mkdir -p "${case_out}"
"${script_dir}/run_case.sh" "${case_dir}" "${case_out}" >/dev/null
done
python - <<'PY'
import json, pathlib, sys
tmp_dir = pathlib.Path(sys.argv[1])
dest = pathlib.Path(sys.argv[2])
subs = []
for path in sorted(tmp_dir.glob("*/submission.json")):
subs.append(json.loads(path.read_text()))
merged = {
"version": "1.0.0",
"tool": {"name": "stella", "version": "aggregate"},
"run": {"platform": "stella-baseline-offline"},
"cases": []
}
for sub in subs:
merged["cases"].extend(sub.get("cases", []))
merged["cases"] = sorted(merged["cases"], key=lambda c: c.get("case_id",""))
dest.write_text(json.dumps(merged, indent=2, sort_keys=True))
print(f"submission written: {dest}")
PY "${tmp_dir}" "${submission}"

View File

@@ -0,0 +1,26 @@
#!/usr/bin/env bash
set -euo pipefail
case_dir="${1:-}"
out_dir="${2:-}"
if [[ -z "${case_dir}" ]]; then
echo "usage: run_case.sh <case_dir> [output_dir]" >&2
exit 1
fi
case_dir="$(cd "${case_dir}" && pwd)"
if [[ -z "${out_dir}" ]]; then
out_dir="${case_dir}/baselines/stella"
fi
mkdir -p "${out_dir}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
python "${script_dir}/normalize.py" \
--case "${case_dir}/case.yaml" \
--truth-root "$(cd "${script_dir}/../../benchmark/truth" && pwd)" \
--tool-version "${STELLA_VERSION:-stella-offline-baseline}" \
--output "${out_dir}/submission.json"
echo "submission written: ${out_dir}/submission.json"