feat: Add tests for RichGraphPublisher and RichGraphWriter
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
Some checks failed
AOC Guard CI / aoc-guard (push) Has been cancelled
AOC Guard CI / aoc-verify (push) Has been cancelled
Docs CI / lint-and-preview (push) Has been cancelled
Mirror Thin Bundle Sign & Verify / mirror-sign (push) Has been cancelled
Concelier Attestation Tests / attestation-tests (push) Has been cancelled
Export Center CI / export-ci (push) Has been cancelled
- Implement unit tests for RichGraphPublisher to verify graph publishing to CAS. - Implement unit tests for RichGraphWriter to ensure correct writing of canonical graphs and metadata. feat: Implement AOC Guard validation logic - Add AOC Guard validation logic to enforce document structure and field constraints. - Introduce violation codes for various validation errors. - Implement tests for AOC Guard to validate expected behavior. feat: Create Console Status API client and service - Implement ConsoleStatusClient for fetching console status and streaming run events. - Create ConsoleStatusService to manage console status polling and event subscriptions. - Add tests for ConsoleStatusClient to verify API interactions. feat: Develop Console Status component - Create ConsoleStatusComponent for displaying console status and run events. - Implement UI for showing status metrics and handling user interactions. - Add styles for console status display. test: Add tests for Console Status store - Implement tests for ConsoleStatusStore to verify event handling and state management.
This commit is contained in:
28
bench/reachability-benchmark/baselines/semgrep/README.md
Normal file
28
bench/reachability-benchmark/baselines/semgrep/README.md
Normal file
@@ -0,0 +1,28 @@
|
||||
# Semgrep baseline
|
||||
|
||||
Deterministic baseline runner that executes Semgrep against a single benchmark case and emits a submission payload in the benchmark schema.
|
||||
|
||||
## Usage
|
||||
```bash
|
||||
# Run for one case
|
||||
SEMGREP_SEND_TELEMETRY=0 SEMGREP_ENABLE_VERSION_CHECK=0 \
|
||||
baselines/semgrep/run_case.sh cases/js/unsafe-eval /tmp/semgrep-out
|
||||
|
||||
# Run for all cases under a root
|
||||
SEMGREP_SEND_TELEMETRY=0 SEMGREP_ENABLE_VERSION_CHECK=0 \
|
||||
baselines/semgrep/run_all.sh cases /tmp/semgrep-all
|
||||
```
|
||||
|
||||
Outputs:
|
||||
- Per-case: `<out>/submission.json`
|
||||
- All cases: `<out>/submission.json` (merged, deterministic ordering)
|
||||
|
||||
## Requirements
|
||||
- Semgrep CLI available on PATH. Tested with `semgrep >= 1.72`. Telemetry/version checks must be disabled for offline/deterministic runs.
|
||||
- Python 3.11+ for normalization script.
|
||||
|
||||
## Determinism posture
|
||||
- Telemetry/version checks disabled by default via env (see scripts).
|
||||
- Stable ordering of cases and sinks.
|
||||
- No network access.
|
||||
- If Semgrep is missing, runner still produces a valid submission marking all sinks as `unreachable`, preserving schema validity.
|
||||
80
bench/reachability-benchmark/baselines/semgrep/normalize.py
Normal file
80
bench/reachability-benchmark/baselines/semgrep/normalize.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Normalize Semgrep JSON output into benchmark submission schema."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
def load_case(case_path: Path) -> Dict[str, Any]:
|
||||
return yaml.safe_load(case_path.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def load_semgrep(path: Path) -> Dict[str, Any]:
|
||||
if not path.exists():
|
||||
return {"results": []}
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
return {"results": []}
|
||||
|
||||
|
||||
def sink_prediction(results: List[Dict[str, Any]], sink_file: str) -> Dict[str, Any]:
|
||||
# basic heuristic: reachable if any finding touches the sink file
|
||||
hits = [r for r in results if r.get("path", "").endswith(sink_file)]
|
||||
if hits:
|
||||
first = hits[0]
|
||||
line = first.get("start", {}).get("line") or first.get("end", {}).get("line") or 0
|
||||
explain_path = [f"entry:{sink_file}:0", f"sink:{sink_file}:{line}"]
|
||||
return {"prediction": "reachable", "confidence": 0.6, "explain": {"path": explain_path}}
|
||||
return {"prediction": "unreachable", "confidence": 0.4}
|
||||
|
||||
|
||||
def build_submission(case_meta: Dict[str, Any], results: Dict[str, Any], tool_version: str) -> Dict[str, Any]:
|
||||
sinks_out = []
|
||||
sinks = case_meta.get("sinks") or []
|
||||
semgrep_results = results.get("results") or []
|
||||
for sink in sorted(sinks, key=lambda s: s.get("id", "")):
|
||||
loc = sink.get("location", {}) if isinstance(sink, dict) else {}
|
||||
sink_file = Path(loc.get("file", "")).name
|
||||
pred = sink_prediction(semgrep_results, sink_file)
|
||||
sinks_out.append({
|
||||
"sink_id": sink.get("id", "unknown"),
|
||||
"prediction": pred["prediction"],
|
||||
"confidence": pred["confidence"],
|
||||
**({"explain": pred["explain"]} if "explain" in pred else {})
|
||||
})
|
||||
|
||||
return {
|
||||
"version": "1.0.0",
|
||||
"tool": {"name": "semgrep", "version": tool_version},
|
||||
"run": {"platform": "local-semgrep-baseline"},
|
||||
"cases": [{
|
||||
"case_id": str(case_meta.get("id") or case_meta.get("project") or "unknown-case"),
|
||||
"sinks": sinks_out
|
||||
}]
|
||||
}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--case", type=Path, required=True, help="Path to case.yaml")
|
||||
ap.add_argument("--semgrep", type=Path, required=True, help="Path to semgrep JSON output")
|
||||
ap.add_argument("--tool-version", type=str, default="unknown")
|
||||
ap.add_argument("--output", type=Path, required=True)
|
||||
args = ap.parse_args()
|
||||
|
||||
case_meta = load_case(args.case)
|
||||
semgrep_out = load_semgrep(args.semgrep)
|
||||
submission = build_submission(case_meta, semgrep_out, args.tool_version)
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(json.dumps(submission, indent=2, sort_keys=True), encoding="utf-8")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
34
bench/reachability-benchmark/baselines/semgrep/rules.yaml
Normal file
34
bench/reachability-benchmark/baselines/semgrep/rules.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
rules:
|
||||
- id: semgrep.eval.js
|
||||
languages: [javascript, typescript]
|
||||
message: "Potential eval / Function sink"
|
||||
severity: WARNING
|
||||
patterns:
|
||||
- pattern-either:
|
||||
- pattern: eval($EXPR)
|
||||
- pattern: Function($ARGS, $BODY)
|
||||
- pattern: vm.runInNewContext($EXPR, ...)
|
||||
- id: semgrep.template.js
|
||||
languages: [javascript, typescript]
|
||||
message: "Template rendering with user-controlled input"
|
||||
severity: WARNING
|
||||
patterns:
|
||||
- pattern-either:
|
||||
- pattern: res.render($TEMPLATE, $CTX)
|
||||
- pattern: reply.view($TEMPLATE, $CTX)
|
||||
- id: semgrep.exec.py
|
||||
languages: [python]
|
||||
message: "Potential exec/eval sink"
|
||||
severity: WARNING
|
||||
patterns:
|
||||
- pattern-either:
|
||||
- pattern: eval($EXPR)
|
||||
- pattern: exec($EXPR)
|
||||
- id: semgrep.template.py
|
||||
languages: [python]
|
||||
message: "Template rendering with user-controlled input"
|
||||
severity: WARNING
|
||||
patterns:
|
||||
- pattern-either:
|
||||
- pattern: render_template($NAME, **$KWARGS)
|
||||
- pattern: Template($X).render(...)
|
||||
44
bench/reachability-benchmark/baselines/semgrep/run_all.sh
Normal file
44
bench/reachability-benchmark/baselines/semgrep/run_all.sh
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
cases_root="${1:-cases}"
|
||||
out_dir="${2:-/tmp/semgrep-baseline}"
|
||||
|
||||
cases_root="$(cd "${cases_root}" && pwd)"
|
||||
mkdir -p "${out_dir}"
|
||||
|
||||
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
tmp_dir="$(mktemp -d "${out_dir}/semgrep-all-XXXX")"
|
||||
submission="${out_dir}/submission.json"
|
||||
|
||||
# Collect per-case submissions
|
||||
find "${cases_root}" -name case.yaml -print | sort | while read -r case_file; do
|
||||
case_dir="$(dirname "${case_file}")"
|
||||
case_out="${tmp_dir}/$(basename "${case_dir}")"
|
||||
mkdir -p "${case_out}"
|
||||
"${script_dir}/run_case.sh" "${case_dir}" "${case_out}" >/dev/null
|
||||
done
|
||||
|
||||
# Merge deterministically
|
||||
python - <<'PY'
|
||||
import json, pathlib, sys
|
||||
out_dir = pathlib.Path(sys.argv[1])
|
||||
subs = []
|
||||
for path in sorted(out_dir.glob("*/submission.json")):
|
||||
subs.append(json.loads(path.read_text()))
|
||||
|
||||
merged = {
|
||||
"version": "1.0.0",
|
||||
"tool": {"name": "semgrep", "version": "aggregate"},
|
||||
"run": {"platform": "local-semgrep-baseline"},
|
||||
"cases": []
|
||||
}
|
||||
for sub in subs:
|
||||
merged["cases"].extend(sub.get("cases", []))
|
||||
merged["cases"] = sorted(merged["cases"], key=lambda c: c.get("case_id",""))
|
||||
|
||||
dest = pathlib.Path(sys.argv[2])
|
||||
dest.write_text(json.dumps(merged, indent=2, sort_keys=True))
|
||||
print(f"submission written: {dest}")
|
||||
PY "${tmp_dir}" "${submission}"
|
||||
39
bench/reachability-benchmark/baselines/semgrep/run_case.sh
Normal file
39
bench/reachability-benchmark/baselines/semgrep/run_case.sh
Normal file
@@ -0,0 +1,39 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
case_dir="${1:-}"
|
||||
out_dir="${2:-}"
|
||||
|
||||
if [[ -z "${case_dir}" ]]; then
|
||||
echo "usage: run_case.sh <case_dir> [output_dir]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
case_dir="$(cd "${case_dir}" && pwd)"
|
||||
if [[ -z "${out_dir}" ]]; then
|
||||
out_dir="${case_dir}/baselines/semgrep"
|
||||
fi
|
||||
mkdir -p "${out_dir}"
|
||||
|
||||
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
rules="${script_dir}/rules.yaml"
|
||||
semgrep_out="$(mktemp -p "${out_dir}" semgrep-results-XXXX.json)"
|
||||
|
||||
export SEMGREP_SEND_TELEMETRY=0
|
||||
export SEMGREP_ENABLE_VERSION_CHECK=0
|
||||
|
||||
semgrep_version="$(semgrep --version 2>/dev/null | head -n1 || echo "semgrep-missing")"
|
||||
# Run semgrep; if semgrep is unavailable, continue with empty results for deterministic output.
|
||||
if command -v semgrep >/dev/null 2>&1; then
|
||||
semgrep --config "${rules}" --json --quiet "${case_dir}" > "${semgrep_out}" || true
|
||||
else
|
||||
echo '{"results":[]}' > "${semgrep_out}"
|
||||
fi
|
||||
|
||||
python "${script_dir}/normalize.py" \
|
||||
--case "${case_dir}/case.yaml" \
|
||||
--semgrep "${semgrep_out}" \
|
||||
--tool-version "${semgrep_version}" \
|
||||
--output "${out_dir}/submission.json"
|
||||
|
||||
echo "submission written: ${out_dir}/submission.json"
|
||||
Reference in New Issue
Block a user