feat: Add tests for RichGraphPublisher and RichGraphWriter
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled

- Implement unit tests for RichGraphPublisher to verify graph publishing to CAS.
- Implement unit tests for RichGraphWriter to ensure correct writing of canonical graphs and metadata.

feat: Implement AOC Guard validation logic

- Add AOC Guard validation logic to enforce document structure and field constraints.
- Introduce violation codes for various validation errors.
- Implement tests for AOC Guard to validate expected behavior.

feat: Create Console Status API client and service

- Implement ConsoleStatusClient for fetching console status and streaming run events.
- Create ConsoleStatusService to manage console status polling and event subscriptions.
- Add tests for ConsoleStatusClient to verify API interactions.

feat: Develop Console Status component

- Create ConsoleStatusComponent for displaying console status and run events.
- Implement UI for showing status metrics and handling user interactions.
- Add styles for console status display.

test: Add tests for Console Status store

- Implement tests for ConsoleStatusStore to verify event handling and state management.
This commit is contained in:
StellaOps Bot
2025-12-01 07:34:50 +02:00
parent 7df0677e34
commit c11d87d252
108 changed files with 4773 additions and 351 deletions

View File

@@ -0,0 +1,28 @@
# Semgrep baseline
Deterministic baseline runner that executes Semgrep against a single benchmark case and emits a submission payload in the benchmark schema.
## Usage
```bash
# Run for one case
SEMGREP_SEND_TELEMETRY=0 SEMGREP_ENABLE_VERSION_CHECK=0 \
baselines/semgrep/run_case.sh cases/js/unsafe-eval /tmp/semgrep-out
# Run for all cases under a root
SEMGREP_SEND_TELEMETRY=0 SEMGREP_ENABLE_VERSION_CHECK=0 \
baselines/semgrep/run_all.sh cases /tmp/semgrep-all
```
Outputs:
- Per-case: `<out>/submission.json`
- All cases: `<out>/submission.json` (merged, deterministic ordering)
## Requirements
- Semgrep CLI available on PATH. Tested with `semgrep >= 1.72`. Telemetry/version checks must be disabled for offline/deterministic runs.
- Python 3.11+ for normalization script.
## Determinism posture
- Telemetry/version checks disabled by default via env (see scripts).
- Stable ordering of cases and sinks.
- No network access.
- If Semgrep is missing, runner still produces a valid submission marking all sinks as `unreachable`, preserving schema validity.

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python3
"""Normalize Semgrep JSON output into benchmark submission schema."""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Dict, List, Any
import yaml
def load_case(case_path: Path) -> Dict[str, Any]:
return yaml.safe_load(case_path.read_text(encoding="utf-8"))
def load_semgrep(path: Path) -> Dict[str, Any]:
if not path.exists():
return {"results": []}
try:
return json.loads(path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
return {"results": []}
def sink_prediction(results: List[Dict[str, Any]], sink_file: str) -> Dict[str, Any]:
# basic heuristic: reachable if any finding touches the sink file
hits = [r for r in results if r.get("path", "").endswith(sink_file)]
if hits:
first = hits[0]
line = first.get("start", {}).get("line") or first.get("end", {}).get("line") or 0
explain_path = [f"entry:{sink_file}:0", f"sink:{sink_file}:{line}"]
return {"prediction": "reachable", "confidence": 0.6, "explain": {"path": explain_path}}
return {"prediction": "unreachable", "confidence": 0.4}
def build_submission(case_meta: Dict[str, Any], results: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
sinks_out = []
sinks = case_meta.get("sinks") or []
semgrep_results = results.get("results") or []
for sink in sorted(sinks, key=lambda s: s.get("id", "")):
loc = sink.get("location", {}) if isinstance(sink, dict) else {}
sink_file = Path(loc.get("file", "")).name
pred = sink_prediction(semgrep_results, sink_file)
sinks_out.append({
"sink_id": sink.get("id", "unknown"),
"prediction": pred["prediction"],
"confidence": pred["confidence"],
**({"explain": pred["explain"]} if "explain" in pred else {})
})
return {
"version": "1.0.0",
"tool": {"name": "semgrep", "version": tool_version},
"run": {"platform": "local-semgrep-baseline"},
"cases": [{
"case_id": str(case_meta.get("id") or case_meta.get("project") or "unknown-case"),
"sinks": sinks_out
}]
}
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--case", type=Path, required=True, help="Path to case.yaml")
ap.add_argument("--semgrep", type=Path, required=True, help="Path to semgrep JSON output")
ap.add_argument("--tool-version", type=str, default="unknown")
ap.add_argument("--output", type=Path, required=True)
args = ap.parse_args()
case_meta = load_case(args.case)
semgrep_out = load_semgrep(args.semgrep)
submission = build_submission(case_meta, semgrep_out, args.tool_version)
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(json.dumps(submission, indent=2, sort_keys=True), encoding="utf-8")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,34 @@
rules:
- id: semgrep.eval.js
languages: [javascript, typescript]
message: "Potential eval / Function sink"
severity: WARNING
patterns:
- pattern-either:
- pattern: eval($EXPR)
- pattern: Function($ARGS, $BODY)
- pattern: vm.runInNewContext($EXPR, ...)
- id: semgrep.template.js
languages: [javascript, typescript]
message: "Template rendering with user-controlled input"
severity: WARNING
patterns:
- pattern-either:
- pattern: res.render($TEMPLATE, $CTX)
- pattern: reply.view($TEMPLATE, $CTX)
- id: semgrep.exec.py
languages: [python]
message: "Potential exec/eval sink"
severity: WARNING
patterns:
- pattern-either:
- pattern: eval($EXPR)
- pattern: exec($EXPR)
- id: semgrep.template.py
languages: [python]
message: "Template rendering with user-controlled input"
severity: WARNING
patterns:
- pattern-either:
- pattern: render_template($NAME, **$KWARGS)
- pattern: Template($X).render(...)

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env bash
set -euo pipefail
cases_root="${1:-cases}"
out_dir="${2:-/tmp/semgrep-baseline}"
cases_root="$(cd "${cases_root}" && pwd)"
mkdir -p "${out_dir}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
tmp_dir="$(mktemp -d "${out_dir}/semgrep-all-XXXX")"
submission="${out_dir}/submission.json"
# Collect per-case submissions
find "${cases_root}" -name case.yaml -print | sort | while read -r case_file; do
case_dir="$(dirname "${case_file}")"
case_out="${tmp_dir}/$(basename "${case_dir}")"
mkdir -p "${case_out}"
"${script_dir}/run_case.sh" "${case_dir}" "${case_out}" >/dev/null
done
# Merge deterministically
python - <<'PY'
import json, pathlib, sys
out_dir = pathlib.Path(sys.argv[1])
subs = []
for path in sorted(out_dir.glob("*/submission.json")):
subs.append(json.loads(path.read_text()))
merged = {
"version": "1.0.0",
"tool": {"name": "semgrep", "version": "aggregate"},
"run": {"platform": "local-semgrep-baseline"},
"cases": []
}
for sub in subs:
merged["cases"].extend(sub.get("cases", []))
merged["cases"] = sorted(merged["cases"], key=lambda c: c.get("case_id",""))
dest = pathlib.Path(sys.argv[2])
dest.write_text(json.dumps(merged, indent=2, sort_keys=True))
print(f"submission written: {dest}")
PY "${tmp_dir}" "${submission}"

View File

@@ -0,0 +1,39 @@
#!/usr/bin/env bash
set -euo pipefail
case_dir="${1:-}"
out_dir="${2:-}"
if [[ -z "${case_dir}" ]]; then
echo "usage: run_case.sh <case_dir> [output_dir]" >&2
exit 1
fi
case_dir="$(cd "${case_dir}" && pwd)"
if [[ -z "${out_dir}" ]]; then
out_dir="${case_dir}/baselines/semgrep"
fi
mkdir -p "${out_dir}"
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
rules="${script_dir}/rules.yaml"
semgrep_out="$(mktemp -p "${out_dir}" semgrep-results-XXXX.json)"
export SEMGREP_SEND_TELEMETRY=0
export SEMGREP_ENABLE_VERSION_CHECK=0
semgrep_version="$(semgrep --version 2>/dev/null | head -n1 || echo "semgrep-missing")"
# Run semgrep; if semgrep is unavailable, continue with empty results for deterministic output.
if command -v semgrep >/dev/null 2>&1; then
semgrep --config "${rules}" --json --quiet "${case_dir}" > "${semgrep_out}" || true
else
echo '{"results":[]}' > "${semgrep_out}"
fi
python "${script_dir}/normalize.py" \
--case "${case_dir}/case.yaml" \
--semgrep "${semgrep_out}" \
--tool-version "${semgrep_version}" \
--output "${out_dir}/submission.json"
echo "submission written: ${out_dir}/submission.json"