diff --git a/bench/reachability-benchmark/README.md b/bench/reachability-benchmark/README.md index 1a05f8781..c9d833092 100644 --- a/bench/reachability-benchmark/README.md +++ b/bench/reachability-benchmark/README.md @@ -17,6 +17,24 @@ Deterministic, reproducible benchmark for reachability analysis tools. - `ci/` — deterministic CI workflows and scripts. - `website/` — static site (leaderboard/docs/downloads). +Sample cases added (JS track): +- `cases/js/unsafe-eval` (reachable sink) → `benchmark/truth/js-unsafe-eval.json`. +- `cases/js/guarded-eval` (unreachable by default) → `benchmark/truth/js-guarded-eval.json`. +- `cases/js/express-eval` (admin eval reachable) → `benchmark/truth/js-express-eval.json`. +- `cases/js/express-guarded` (admin eval gated by env) → `benchmark/truth/js-express-guarded.json`. +- `cases/js/fastify-template` (template rendering reachable) → `benchmark/truth/js-fastify-template.json`. + +Sample cases added (Python track): +- `cases/py/unsafe-exec` (reachable eval) → `benchmark/truth/py-unsafe-exec.json`. +- `cases/py/guarded-exec` (unreachable when FEATURE_ENABLE != 1) → `benchmark/truth/py-guarded-exec.json`. +- `cases/py/flask-template` (template rendering reachable) → `benchmark/truth/py-flask-template.json`. +- `cases/py/fastapi-guarded` (unreachable unless ALLOW_EXEC=true) → `benchmark/truth/py-fastapi-guarded.json`. +- `cases/py/django-ssti` (template rendering reachable, autoescape off) → `benchmark/truth/py-django-ssti.json`. + +Sample cases added (Java track): +- `cases/java/spring-deserialize` (reachable Java deserialization) → `benchmark/truth/java-spring-deserialize.json`. +- `cases/java/spring-guarded` (deserialization unreachable unless ALLOW_DESER=true) → `benchmark/truth/java-spring-guarded.json`. + ## Determinism & Offline Rules - No network during build/test; pin images/deps; set `SOURCE_DATE_EPOCH`. - Sort file lists; stable JSON/YAML emitters; fixed RNG seeds. diff --git a/bench/reachability-benchmark/benchmark/truth/java-spring-deserialize.json b/bench/reachability-benchmark/benchmark/truth/java-spring-deserialize.json new file mode 100644 index 000000000..4a59783b9 --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/java-spring-deserialize.json @@ -0,0 +1,32 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "java-spring-deserialize:201", + "case_version": "1.0.0", + "notes": "Java deserialization sink reachable", + "sinks": [ + { + "sink_id": "JavaDeserialize::handleRequest", + "label": "reachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "src/AppTest.java" + ], + "coverage_files": [] + }, + "static_evidence": { + "call_path": [ + "POST /api/upload", + "App.handleRequest", + "ObjectInputStream.readObject" + ] + }, + "config_conditions": [], + "notes": "No guard; base64 payload deserialized" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/java-spring-guarded.json b/bench/reachability-benchmark/benchmark/truth/java-spring-guarded.json new file mode 100644 index 000000000..b90b1fedc --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/java-spring-guarded.json @@ -0,0 +1,29 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "java-spring-guarded:202", + "case_version": "1.0.0", + "notes": "Deserialization unreachable by default", + "sinks": [ + { + "sink_id": "JavaDeserializeGuarded::handleRequest", + "label": "unreachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": ["src/AppTest.java"], + "coverage_files": [] + }, + "static_evidence": { + "call_path": [ + "POST /api/upload", + "App.handleRequest", + "guard: ALLOW_DESER!=true" + ] + }, + "config_conditions": ["ALLOW_DESER == 'true'"] + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/js-express-eval.json b/bench/reachability-benchmark/benchmark/truth/js-express-eval.json new file mode 100644 index 000000000..47752844d --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/js-express-eval.json @@ -0,0 +1,34 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "js-express-eval:003", + "case_version": "1.0.0", + "notes": "Admin eval reachable", + "sinks": [ + { + "sink_id": "ExpressEval::exec", + "label": "reachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_reach.js" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /api/admin/exec", + "createServer.exec", + "eval(code)" + ] + }, + "config_conditions": [], + "notes": "No guard on admin path" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/js-express-guarded.json b/bench/reachability-benchmark/benchmark/truth/js-express-guarded.json new file mode 100644 index 000000000..950daf905 --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/js-express-guarded.json @@ -0,0 +1,36 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "js-express-guarded:004", + "case_version": "1.0.0", + "notes": "Admin exec unreachable when ALLOW_EXEC!=true", + "sinks": [ + { + "sink_id": "ExpressGuarded::exec", + "label": "unreachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_unreachable.js" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /api/admin/exec", + "createServer.exec", + "guard: ALLOW_EXEC!=true" + ] + }, + "config_conditions": [ + "ALLOW_EXEC == 'true'" + ], + "notes": "Only reachable when ALLOW_EXEC=true" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/js-fastify-template.json b/bench/reachability-benchmark/benchmark/truth/js-fastify-template.json new file mode 100644 index 000000000..6202059e5 --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/js-fastify-template.json @@ -0,0 +1,34 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "js-fastify-template:005", + "case_version": "1.0.0", + "notes": "Template rendering reachable", + "sinks": [ + { + "sink_id": "FastifyTemplate::render", + "label": "reachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_reach.js" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /api/render", + "createServer.render", + "template replace" + ] + }, + "config_conditions": [], + "notes": "Simple template replace used as sink" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/js-guarded-eval.json b/bench/reachability-benchmark/benchmark/truth/js-guarded-eval.json new file mode 100644 index 000000000..cba79decf --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/js-guarded-eval.json @@ -0,0 +1,36 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "js-guarded-eval:002", + "case_version": "1.0.0", + "notes": "Eval sink guarded by FEATURE_ENABLE; unreachable when flag off", + "sinks": [ + { + "sink_id": "GuardedEval::handleRequest", + "label": "unreachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_unreachable.js" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /api/exec", + "app.js::handleRequest", + "guard: FEATURE_ENABLE != 1" + ] + }, + "config_conditions": [ + "FEATURE_ENABLE == '1'" + ], + "notes": "Sink only executes when FEATURE_ENABLE=1" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/js-unsafe-eval.json b/bench/reachability-benchmark/benchmark/truth/js-unsafe-eval.json new file mode 100644 index 000000000..1f6c26cd9 --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/js-unsafe-eval.json @@ -0,0 +1,34 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "js-unsafe-eval:001", + "case_version": "1.0.0", + "notes": "Unsafe eval sink reachable via POST /api/exec", + "sinks": [ + { + "sink_id": "UnsafeEval::handleRequest", + "label": "reachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_reach.js" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /api/exec", + "app.js::handleRequest", + "eval(code)" + ] + }, + "config_conditions": [], + "notes": "No guards; direct eval on user input" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/py-django-ssti.json b/bench/reachability-benchmark/benchmark/truth/py-django-ssti.json new file mode 100644 index 000000000..b59b11d73 --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/py-django-ssti.json @@ -0,0 +1,34 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "py-django-ssti:105", + "case_version": "1.0.0", + "notes": "Template rendering reachable (autoescape off)", + "sinks": [ + { + "sink_id": "DjangoSSTI::render", + "label": "reachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_reach.py" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /render", + "app.handle_request", + "render" + ] + }, + "config_conditions": [], + "notes": "Autoescape disabled" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/py-fastapi-guarded.json b/bench/reachability-benchmark/benchmark/truth/py-fastapi-guarded.json new file mode 100644 index 000000000..c152bdbe2 --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/py-fastapi-guarded.json @@ -0,0 +1,36 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "py-fastapi-guarded:104", + "case_version": "1.0.0", + "notes": "Eval unreachable unless ALLOW_EXEC=true", + "sinks": [ + { + "sink_id": "FastApiGuarded::handle_request", + "label": "unreachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_unreachable.py" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /exec", + "app.handle_request", + "guard: ALLOW_EXEC!=true" + ] + }, + "config_conditions": [ + "ALLOW_EXEC == 'true'" + ], + "notes": "Feature flag blocks sink by default" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/py-flask-template.json b/bench/reachability-benchmark/benchmark/truth/py-flask-template.json new file mode 100644 index 000000000..65d80f3ee --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/py-flask-template.json @@ -0,0 +1,34 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "py-flask-template:103", + "case_version": "1.0.0", + "notes": "Template rendering reachable", + "sinks": [ + { + "sink_id": "FlaskTemplate::render", + "label": "reachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_reach.py" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /render", + "app.handle_request", + "render" + ] + }, + "config_conditions": [], + "notes": "Simple template placeholder replacement" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/py-guarded-exec.json b/bench/reachability-benchmark/benchmark/truth/py-guarded-exec.json new file mode 100644 index 000000000..703eea620 --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/py-guarded-exec.json @@ -0,0 +1,36 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "py-guarded-exec:102", + "case_version": "1.0.0", + "notes": "Eval unreachable unless FEATURE_ENABLE=1", + "sinks": [ + { + "sink_id": "PyGuardedExec::handle_request", + "label": "unreachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_unreachable.py" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /api/exec", + "app.handle_request", + "guard: FEATURE_ENABLE != 1" + ] + }, + "config_conditions": [ + "FEATURE_ENABLE == '1'" + ], + "notes": "Feature flag required" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/benchmark/truth/py-unsafe-exec.json b/bench/reachability-benchmark/benchmark/truth/py-unsafe-exec.json new file mode 100644 index 000000000..b2802292b --- /dev/null +++ b/bench/reachability-benchmark/benchmark/truth/py-unsafe-exec.json @@ -0,0 +1,34 @@ +{ + "version": "1.0.0", + "cases": [ + { + "case_id": "py-unsafe-exec:101", + "case_version": "1.0.0", + "notes": "Eval reachable", + "sinks": [ + { + "sink_id": "PyUnsafeExec::handle_request", + "label": "reachable", + "confidence": "high", + "dynamic_evidence": { + "covered_by_tests": [ + "tests/test_reach.py" + ], + "coverage_files": [ + "outputs/coverage.json" + ] + }, + "static_evidence": { + "call_path": [ + "POST /api/exec", + "app.handle_request", + "eval(code)" + ] + }, + "config_conditions": [], + "notes": "No guards" + } + ] + } + ] +} diff --git a/bench/reachability-benchmark/cases/java/spring-deserialize/case.yaml b/bench/reachability-benchmark/cases/java/spring-deserialize/case.yaml new file mode 100644 index 000000000..4fff5c484 --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-deserialize/case.yaml @@ -0,0 +1,38 @@ +id: "java-spring-deserialize:201" +language: java +project: spring-deserialize +version: "1.0.0" +description: "Java deserialization sink reachable via POST /api/upload" +entrypoints: + - "POST /api/upload" +sinks: + - id: "JavaDeserialize::handleRequest" + path: "bench.reachability.App.handleRequest" + kind: "custom" + location: + file: src/App.java + line: 9 + notes: "java.io.ObjectInputStream on user-controlled payload" +environment: + os_image: "eclipse-temurin:21-jdk" + runtime: + java: "21" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./build/build.sh" + expected_coverage: [] + expected_traces: [] + env: + JAVA_TOOL_OPTIONS: "-ea" +ground_truth: + summary: "Deserialization reachable" + evidence_files: + - "../benchmark/truth/java-spring-deserialize.json" diff --git a/bench/reachability-benchmark/cases/java/spring-deserialize/entrypoints.yaml b/bench/reachability-benchmark/cases/java/spring-deserialize/entrypoints.yaml new file mode 100644 index 000000000..5400b133f --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-deserialize/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "java-spring-deserialize:201" +entries: + http: + - id: "POST /api/upload" + route: "/api/upload" + method: "POST" + handler: "App.handleRequest" + description: "Binary payload base64-deserialized" diff --git a/bench/reachability-benchmark/cases/java/spring-deserialize/pom.xml b/bench/reachability-benchmark/cases/java/spring-deserialize/pom.xml new file mode 100644 index 000000000..9de740dbe --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-deserialize/pom.xml @@ -0,0 +1,12 @@ + + 4.0.0 + org.stellaops.bench + spring-deserialize + 1.0.0 + jar + + 17 + 17 + + diff --git a/bench/reachability-benchmark/cases/java/spring-deserialize/src/App.java b/bench/reachability-benchmark/cases/java/spring-deserialize/src/App.java new file mode 100644 index 000000000..1ac834bfb --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-deserialize/src/App.java @@ -0,0 +1,26 @@ +package bench.reachability; + +import java.util.Map; +import java.util.Base64; +import java.io.*; + +public class App { + // Unsafe Java deserialization sink (reachable) + public static Response handleRequest(Map body) { + String payload = body.get("payload"); + if (payload == null) { + return new Response(400, "bad request"); + } + try { + byte[] data = Base64.getDecoder().decode(payload); + ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data)); + Object obj = ois.readObject(); + ois.close(); + return new Response(200, obj.toString()); + } catch (Exception ex) { + return new Response(500, ex.getClass().getSimpleName()); + } + } + + public record Response(int status, String body) {} +} diff --git a/bench/reachability-benchmark/cases/java/spring-deserialize/src/AppTest.java b/bench/reachability-benchmark/cases/java/spring-deserialize/src/AppTest.java new file mode 100644 index 000000000..1e4920230 --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-deserialize/src/AppTest.java @@ -0,0 +1,30 @@ +package bench.reachability; + +import java.io.*; +import java.util.*; +import java.util.Base64; + +// Simple hand-rolled test harness (no external deps) using Java assertions. +public class AppTest { + private static String serialize(Object obj) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(bos); + oos.writeObject(obj); + oos.close(); + return Base64.getEncoder().encodeToString(bos.toByteArray()); + } + + public static void main(String[] args) throws Exception { + String payload = serialize("hello"); + Map body = Map.of("payload", payload); + var res = App.handleRequest(body); + assert res.status() == 200 : "status"; + assert res.body().equals("hello") : "body"; + // Emit a simple marker file for trace/coverage stand-ins + File outDir = new File("outputs"); + outDir.mkdirs(); + try (FileWriter fw = new FileWriter(new File(outDir, "SINK_REACHED"))) { + fw.write("true"); + } + } +} diff --git a/bench/reachability-benchmark/cases/java/spring-guarded/case.yaml b/bench/reachability-benchmark/cases/java/spring-guarded/case.yaml new file mode 100644 index 000000000..263cc46ad --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-guarded/case.yaml @@ -0,0 +1,38 @@ +id: "java-spring-guarded:202" +language: java +project: spring-guarded +version: "1.0.0" +description: "Java deserialization guarded by ALLOW_DESER flag (unreachable by default)" +entrypoints: + - "POST /api/upload" +sinks: + - id: "JavaDeserializeGuarded::handleRequest" + path: "bench.reachability.App.handleRequest" + kind: "custom" + location: + file: src/App.java + line: 9 + notes: "ObjectInputStream gated by ALLOW_DESER" +environment: + os_image: "eclipse-temurin:21-jdk" + runtime: + java: "21" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./build/build.sh" + expected_coverage: [] + expected_traces: [] + env: + JAVA_TOOL_OPTIONS: "-ea" +ground_truth: + summary: "Guard blocks deserialization unless ALLOW_DESER=true" + evidence_files: + - "../benchmark/truth/java-spring-guarded.json" diff --git a/bench/reachability-benchmark/cases/java/spring-guarded/entrypoints.yaml b/bench/reachability-benchmark/cases/java/spring-guarded/entrypoints.yaml new file mode 100644 index 000000000..93af28cb2 --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-guarded/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "java-spring-guarded:202" +entries: + http: + - id: "POST /api/upload" + route: "/api/upload" + method: "POST" + handler: "App.handleRequest" + description: "Base64 payload deserialization guarded by ALLOW_DESER" diff --git a/bench/reachability-benchmark/cases/java/spring-guarded/pom.xml b/bench/reachability-benchmark/cases/java/spring-guarded/pom.xml new file mode 100644 index 000000000..3c3353948 --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-guarded/pom.xml @@ -0,0 +1,12 @@ + + 4.0.0 + org.stellaops.bench + spring-guarded + 1.0.0 + jar + + 17 + 17 + + diff --git a/bench/reachability-benchmark/cases/java/spring-guarded/src/App.java b/bench/reachability-benchmark/cases/java/spring-guarded/src/App.java new file mode 100644 index 000000000..f9ff222c0 --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-guarded/src/App.java @@ -0,0 +1,29 @@ +package bench.reachability; + +import java.util.Map; +import java.util.Base64; +import java.io.*; + +public class App { + // Deserialization sink guarded by feature flag + public static Response handleRequest(Map body, Map env) { + if (!"true".equals(env.getOrDefault("ALLOW_DESER", "false"))) { + return new Response(403, "forbidden"); + } + String payload = body.get("payload"); + if (payload == null) { + return new Response(400, "bad request"); + } + try { + byte[] data = Base64.getDecoder().decode(payload); + ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data)); + Object obj = ois.readObject(); + ois.close(); + return new Response(200, obj.toString()); + } catch (Exception ex) { + return new Response(500, ex.getClass().getSimpleName()); + } + } + + public record Response(int status, String body) {} +} diff --git a/bench/reachability-benchmark/cases/java/spring-guarded/src/AppTest.java b/bench/reachability-benchmark/cases/java/spring-guarded/src/AppTest.java new file mode 100644 index 000000000..bbbb807a3 --- /dev/null +++ b/bench/reachability-benchmark/cases/java/spring-guarded/src/AppTest.java @@ -0,0 +1,29 @@ +package bench.reachability; + +import java.io.*; +import java.util.*; +import java.util.Base64; + +public class AppTest { + private static String serialize(Object obj) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(bos); + oos.writeObject(obj); + oos.close(); + return Base64.getEncoder().encodeToString(bos.toByteArray()); + } + + public static void main(String[] args) throws Exception { + String payload = serialize("hi"); + Map body = Map.of("payload", payload); + Map env = Map.of("ALLOW_DESER", "false"); + var res = App.handleRequest(body, env); + assert res.status() == 403 : "status"; + assert res.body().equals("forbidden") : "body"; + File outDir = new File("outputs"); + outDir.mkdirs(); + try (FileWriter fw = new FileWriter(new File(outDir, "SINK_BLOCKED"))) { + fw.write("true"); + } + } +} diff --git a/bench/reachability-benchmark/cases/js/express-eval/case.yaml b/bench/reachability-benchmark/cases/js/express-eval/case.yaml new file mode 100644 index 000000000..416edaf4b --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-eval/case.yaml @@ -0,0 +1,38 @@ +id: "js-express-eval:003" +language: js +project: express-eval +version: "1.0.0" +description: "Admin exec endpoint evaluates user code" +entrypoints: + - "POST /api/admin/exec" +sinks: + - id: "ExpressEval::exec" + path: "src/app.js::createServer" + kind: "process" + location: + file: src/app.js + line: 17 + notes: "eval(code) on admin path" +environment: + os_image: "node:20-alpine" + runtime: + node: "20.11.0" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Admin exec endpoint reachable and executes eval" + evidence_files: + - "../benchmark/truth/js-express-eval.json" diff --git a/bench/reachability-benchmark/cases/js/express-eval/entrypoints.yaml b/bench/reachability-benchmark/cases/js/express-eval/entrypoints.yaml new file mode 100644 index 000000000..679d6fffe --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-eval/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "js-express-eval:003" +entries: + http: + - id: "POST /api/admin/exec" + route: "/api/admin/exec" + method: "POST" + handler: "createServer.exec" + description: "Admin-only exec (reachable)" diff --git a/bench/reachability-benchmark/cases/js/express-eval/package.json b/bench/reachability-benchmark/cases/js/express-eval/package.json new file mode 100644 index 000000000..2d12d83c3 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-eval/package.json @@ -0,0 +1,9 @@ +{ + "name": "rb-case-express-eval", + "version": "1.0.0", + "description": "Reachability benchmark case: express-like admin eval endpoint", + "license": "Apache-2.0", + "scripts": { + "test": "./tests/run-tests.sh" + } +} diff --git a/bench/reachability-benchmark/cases/js/express-eval/src/app.js b/bench/reachability-benchmark/cases/js/express-eval/src/app.js new file mode 100644 index 000000000..2ce59be0c --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-eval/src/app.js @@ -0,0 +1,34 @@ +'use strict'; + +// Minimal express-like router. +function makeApp() { + const routes = {}; + return { + post(path, handler) { + routes[`POST ${path}`] = handler; + }, + handle(method, path, req, res) { + const key = `${method} ${path}`; + if (routes[key]) { + return routes[key](req, res); + } + return { status: 404, body: 'not found' }; + } + }; +} + +function createServer() { + const app = makeApp(); + app.post('/api/admin/exec', (req) => { + if (!req || typeof req.body?.code !== 'string') { + return { status: 400, body: 'bad request' }; + } + // Sink: eval on admin endpoint (reachable) + // eslint-disable-next-line no-eval + const result = eval(req.body.code); + return { status: 200, body: String(result) }; + }); + return app; +} + +module.exports = { createServer }; diff --git a/bench/reachability-benchmark/cases/js/express-eval/tests/run-tests.sh b/bench/reachability-benchmark/cases/js/express-eval/tests/run-tests.sh new file mode 100644 index 000000000..bb44c525d --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-eval/tests/run-tests.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +node test_reach.js diff --git a/bench/reachability-benchmark/cases/js/express-eval/tests/test_reach.js b/bench/reachability-benchmark/cases/js/express-eval/tests/test_reach.js new file mode 100644 index 000000000..45983d859 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-eval/tests/test_reach.js @@ -0,0 +1,54 @@ +'use strict'; + +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const { createServer } = require('../src/app'); + +const OUT_DIR = path.resolve(__dirname, '../outputs'); +const TRACE_DIR = path.join(OUT_DIR, 'traces'); +const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json'); +const TRACE_FILE = path.join(TRACE_DIR, 'traces.json'); + +function ensureDirs() { + fs.mkdirSync(OUT_DIR, { recursive: true }); + fs.mkdirSync(TRACE_DIR, { recursive: true }); +} + +function recordTrace(entry, pathNodes) { + fs.writeFileSync( + TRACE_FILE, + JSON.stringify({ + entry, + path: pathNodes, + sink: 'ExpressEval::exec', + notes: 'Admin exec reached' + }, null, 2) + ); +} + +function recordCoverage(filePath, lines) { + fs.writeFileSync( + COVERAGE_FILE, + JSON.stringify({ + files: { + [filePath]: { + lines_covered: lines, + lines_total: 40 + } + } + }, null, 2) + ); +} + +(function main() { + ensureDirs(); + const app = createServer(); + const res = app.handle('POST', '/api/admin/exec', { body: { code: '21*2' } }); + assert.strictEqual(res.status, 200); + assert.strictEqual(res.body, '42'); + + recordTrace('POST /api/admin/exec', ['app.js::createServer', 'handler', 'eval(code)']); + recordCoverage('src/app.js', [5, 6, 7, 13, 18, 19]); + fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true'); +})(); diff --git a/bench/reachability-benchmark/cases/js/express-guarded/case.yaml b/bench/reachability-benchmark/cases/js/express-guarded/case.yaml new file mode 100644 index 000000000..4443c0974 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-guarded/case.yaml @@ -0,0 +1,38 @@ +id: "js-express-guarded:004" +language: js +project: express-guarded +version: "1.0.0" +description: "Admin exec guarded by ALLOW_EXEC flag; unreachable by default" +entrypoints: + - "POST /api/admin/exec" +sinks: + - id: "ExpressGuarded::exec" + path: "src/app.js::createServer" + kind: "process" + location: + file: src/app.js + line: 16 + notes: "eval(code) gated by ALLOW_EXEC" +environment: + os_image: "node:20-alpine" + runtime: + node: "20.11.0" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Guard prevents sink unless ALLOW_EXEC=true" + evidence_files: + - "../benchmark/truth/js-express-guarded.json" diff --git a/bench/reachability-benchmark/cases/js/express-guarded/entrypoints.yaml b/bench/reachability-benchmark/cases/js/express-guarded/entrypoints.yaml new file mode 100644 index 000000000..17b85ed2b --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-guarded/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "js-express-guarded:004" +entries: + http: + - id: "POST /api/admin/exec" + route: "/api/admin/exec" + method: "POST" + handler: "createServer.exec" + description: "Admin exec blocked unless ALLOW_EXEC=true" diff --git a/bench/reachability-benchmark/cases/js/express-guarded/package.json b/bench/reachability-benchmark/cases/js/express-guarded/package.json new file mode 100644 index 000000000..42e9a32d7 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-guarded/package.json @@ -0,0 +1,9 @@ +{ + "name": "rb-case-express-guarded", + "version": "1.0.0", + "description": "Reachability benchmark case: express-like admin exec guarded by env flag", + "license": "Apache-2.0", + "scripts": { + "test": "./tests/run-tests.sh" + } +} diff --git a/bench/reachability-benchmark/cases/js/express-guarded/src/app.js b/bench/reachability-benchmark/cases/js/express-guarded/src/app.js new file mode 100644 index 000000000..486791b79 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-guarded/src/app.js @@ -0,0 +1,33 @@ +'use strict'; + +function makeApp() { + const routes = {}; + return { + post(path, handler) { + routes[`POST ${path}`] = handler; + }, + handle(method, path, req) { + const key = `${method} ${path}`; + if (routes[key]) return routes[key](req); + return { status: 404, body: 'not found' }; + } + }; +} + +function createServer() { + const app = makeApp(); + app.post('/api/admin/exec', (req) => { + if (req?.env?.ALLOW_EXEC !== 'true') { + return { status: 403, body: 'forbidden' }; + } + if (typeof req?.body?.code !== 'string') { + return { status: 400, body: 'bad request' }; + } + // eslint-disable-next-line no-eval + const result = eval(req.body.code); + return { status: 200, body: String(result) }; + }); + return app; +} + +module.exports = { createServer }; diff --git a/bench/reachability-benchmark/cases/js/express-guarded/tests/run-tests.sh b/bench/reachability-benchmark/cases/js/express-guarded/tests/run-tests.sh new file mode 100644 index 000000000..1ab17a138 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-guarded/tests/run-tests.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +node test_unreachable.js diff --git a/bench/reachability-benchmark/cases/js/express-guarded/tests/test_unreachable.js b/bench/reachability-benchmark/cases/js/express-guarded/tests/test_unreachable.js new file mode 100644 index 000000000..5f11c8648 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/express-guarded/tests/test_unreachable.js @@ -0,0 +1,53 @@ +'use strict'; + +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const { createServer } = require('../src/app'); + +const OUT_DIR = path.resolve(__dirname, '../outputs'); +const TRACE_DIR = path.join(OUT_DIR, 'traces'); +const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json'); +const TRACE_FILE = path.join(TRACE_DIR, 'traces.json'); + +function ensureDirs() { + fs.mkdirSync(OUT_DIR, { recursive: true }); + fs.mkdirSync(TRACE_DIR, { recursive: true }); +} + +function recordTrace(entry, pathNodes) { + fs.writeFileSync( + TRACE_FILE, + JSON.stringify({ + entry, + path: pathNodes, + sink: 'ExpressGuarded::exec', + notes: 'Guard blocked sink' + }, null, 2) + ); +} + +function recordCoverage(filePath, lines) { + fs.writeFileSync( + COVERAGE_FILE, + JSON.stringify({ + files: { + [filePath]: { + lines_covered: lines, + lines_total: 50 + } + } + }, null, 2) + ); +} + +(function main() { + ensureDirs(); + const app = createServer(); + const res = app.handle('POST', '/api/admin/exec', { body: { code: '2+2' }, env: { ALLOW_EXEC: 'false' } }); + assert.strictEqual(res.status, 403); + assert.strictEqual(res.body, 'forbidden'); + + recordTrace('POST /api/admin/exec', ['app.js::createServer', 'guard: ALLOW_EXEC!=true']); + recordCoverage('src/app.js', [5,6,7,12,13,14,15]); +})(); diff --git a/bench/reachability-benchmark/cases/js/fastify-template/case.yaml b/bench/reachability-benchmark/cases/js/fastify-template/case.yaml new file mode 100644 index 000000000..df9b9e06d --- /dev/null +++ b/bench/reachability-benchmark/cases/js/fastify-template/case.yaml @@ -0,0 +1,38 @@ +id: "js-fastify-template:005" +language: js +project: fastify-template +version: "1.0.0" +description: "Template rendering route replaces user placeholder" +entrypoints: + - "POST /api/render" +sinks: + - id: "FastifyTemplate::render" + path: "src/app.js::createServer" + kind: "http" + location: + file: src/app.js + line: 19 + notes: "Template rendering of user input" +environment: + os_image: "node:20-alpine" + runtime: + node: "20.11.0" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Template rendering reachable via POST /api/render" + evidence_files: + - "../benchmark/truth/js-fastify-template.json" diff --git a/bench/reachability-benchmark/cases/js/fastify-template/entrypoints.yaml b/bench/reachability-benchmark/cases/js/fastify-template/entrypoints.yaml new file mode 100644 index 000000000..739ccfa6b --- /dev/null +++ b/bench/reachability-benchmark/cases/js/fastify-template/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "js-fastify-template:005" +entries: + http: + - id: "POST /api/render" + route: "/api/render" + method: "POST" + handler: "createServer.render" + description: "Template rendering endpoint" diff --git a/bench/reachability-benchmark/cases/js/fastify-template/package.json b/bench/reachability-benchmark/cases/js/fastify-template/package.json new file mode 100644 index 000000000..4b164611d --- /dev/null +++ b/bench/reachability-benchmark/cases/js/fastify-template/package.json @@ -0,0 +1,9 @@ +{ + "name": "rb-case-fastify-template", + "version": "1.0.0", + "description": "Reachability benchmark case: fastify-like template rendering", + "license": "Apache-2.0", + "scripts": { + "test": "./tests/run-tests.sh" + } +} diff --git a/bench/reachability-benchmark/cases/js/fastify-template/src/app.js b/bench/reachability-benchmark/cases/js/fastify-template/src/app.js new file mode 100644 index 000000000..dcf6b0937 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/fastify-template/src/app.js @@ -0,0 +1,33 @@ +'use strict'; + +// Simulated Fastify route registration for template injection. +function buildServer() { + const routes = {}; + return { + post(path, handler) { + routes[`POST ${path}`] = handler; + }, + inject(method, path, payload) { + const key = `${method} ${path}`; + const handler = routes[key]; + if (!handler) return { status: 404, body: 'not found' }; + return handler({ body: payload }); + } + }; +} + +function createServer() { + const server = buildServer(); + server.post('/api/render', (req) => { + const template = req?.body?.template; + if (typeof template !== 'string') { + return { status: 400, body: 'bad request' }; + } + const compiled = template.replace('{{user}}', 'guest'); + // Sink: writes rendered content to log (simulated SSR) + return { status: 200, body: compiled }; + }); + return server; +} + +module.exports = { createServer }; diff --git a/bench/reachability-benchmark/cases/js/fastify-template/tests/run-tests.sh b/bench/reachability-benchmark/cases/js/fastify-template/tests/run-tests.sh new file mode 100644 index 000000000..bb44c525d --- /dev/null +++ b/bench/reachability-benchmark/cases/js/fastify-template/tests/run-tests.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +node test_reach.js diff --git a/bench/reachability-benchmark/cases/js/fastify-template/tests/test_reach.js b/bench/reachability-benchmark/cases/js/fastify-template/tests/test_reach.js new file mode 100644 index 000000000..ed4d9925e --- /dev/null +++ b/bench/reachability-benchmark/cases/js/fastify-template/tests/test_reach.js @@ -0,0 +1,54 @@ +'use strict'; + +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const { createServer } = require('../src/app'); + +const OUT_DIR = path.resolve(__dirname, '../outputs'); +const TRACE_DIR = path.join(OUT_DIR, 'traces'); +const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json'); +const TRACE_FILE = path.join(TRACE_DIR, 'traces.json'); + +function ensureDirs() { + fs.mkdirSync(OUT_DIR, { recursive: true }); + fs.mkdirSync(TRACE_DIR, { recursive: true }); +} + +function recordTrace(entry, pathNodes) { + fs.writeFileSync( + TRACE_FILE, + JSON.stringify({ + entry, + path: pathNodes, + sink: 'FastifyTemplate::render', + notes: 'Template rendered with user input' + }, null, 2) + ); +} + +function recordCoverage(filePath, lines) { + fs.writeFileSync( + COVERAGE_FILE, + JSON.stringify({ + files: { + [filePath]: { + lines_covered: lines, + lines_total: 45 + } + } + }, null, 2) + ); +} + +(function main() { + ensureDirs(); + const server = createServer(); + const res = server.inject('POST', '/api/render', { template: 'Hello {{user}}' }); + assert.strictEqual(res.status, 200); + assert.strictEqual(res.body, 'Hello guest'); + + recordTrace('POST /api/render', ['app.js::createServer', 'render template']); + recordCoverage('src/app.js', [5,6,7,13,18,20]); + fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true'); +})(); diff --git a/bench/reachability-benchmark/cases/js/guarded-eval/case.yaml b/bench/reachability-benchmark/cases/js/guarded-eval/case.yaml new file mode 100644 index 000000000..757feb2e9 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/guarded-eval/case.yaml @@ -0,0 +1,38 @@ +id: "js-guarded-eval:002" +language: js +project: guarded-eval +version: "1.0.0" +description: "Eval sink guarded by FEATURE_ENABLE flag; unreachable when flag is off" +entrypoints: + - "POST /api/exec" +sinks: + - id: "GuardedEval::handleRequest" + path: "src/app.js::handleRequest" + kind: "process" + location: + file: src/app.js + line: 13 + notes: "eval on user input guarded by FEATURE_ENABLE" +environment: + os_image: "node:20-alpine" + runtime: + node: "20.11.0" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Guard prevents sink when FEATURE_ENABLE != 1" + evidence_files: + - "../benchmark/truth/js-guarded-eval.json" diff --git a/bench/reachability-benchmark/cases/js/guarded-eval/entrypoints.yaml b/bench/reachability-benchmark/cases/js/guarded-eval/entrypoints.yaml new file mode 100644 index 000000000..a7dad05be --- /dev/null +++ b/bench/reachability-benchmark/cases/js/guarded-eval/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "js-guarded-eval:002" +entries: + http: + - id: "POST /api/exec" + route: "/api/exec" + method: "POST" + handler: "app.js::handleRequest" + description: "Feature-flagged code execution endpoint" diff --git a/bench/reachability-benchmark/cases/js/guarded-eval/package.json b/bench/reachability-benchmark/cases/js/guarded-eval/package.json new file mode 100644 index 000000000..8e35f55e8 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/guarded-eval/package.json @@ -0,0 +1,9 @@ +{ + "name": "rb-case-guarded-eval", + "version": "1.0.0", + "description": "Reachability benchmark case: eval guarded by feature flag", + "license": "Apache-2.0", + "scripts": { + "test": "./tests/run-tests.sh" + } +} diff --git a/bench/reachability-benchmark/cases/js/guarded-eval/src/app.js b/bench/reachability-benchmark/cases/js/guarded-eval/src/app.js new file mode 100644 index 000000000..5db9f8ded --- /dev/null +++ b/bench/reachability-benchmark/cases/js/guarded-eval/src/app.js @@ -0,0 +1,19 @@ +'use strict'; + +function handleRequest(body, env = process.env) { + if (env.FEATURE_ENABLE !== '1') { + return { status: 403, body: 'disabled' }; + } + + const code = body && body.code; + if (typeof code !== 'string') { + return { status: 400, body: 'bad request' }; + } + + // This sink is reachable only when FEATURE_ENABLE=1. + // eslint-disable-next-line no-eval + const result = eval(code); + return { status: 200, body: String(result) }; +} + +module.exports = { handleRequest }; diff --git a/bench/reachability-benchmark/cases/js/guarded-eval/tests/run-tests.sh b/bench/reachability-benchmark/cases/js/guarded-eval/tests/run-tests.sh new file mode 100644 index 000000000..1ab17a138 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/guarded-eval/tests/run-tests.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +node test_unreachable.js diff --git a/bench/reachability-benchmark/cases/js/guarded-eval/tests/test_unreachable.js b/bench/reachability-benchmark/cases/js/guarded-eval/tests/test_unreachable.js new file mode 100644 index 000000000..1d348220a --- /dev/null +++ b/bench/reachability-benchmark/cases/js/guarded-eval/tests/test_unreachable.js @@ -0,0 +1,54 @@ +'use strict'; + +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const { handleRequest } = require('../src/app'); + +const OUT_DIR = path.resolve(__dirname, '../outputs'); +const TRACE_DIR = path.join(OUT_DIR, 'traces'); +const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json'); +const TRACE_FILE = path.join(TRACE_DIR, 'traces.json'); + +function ensureDirs() { + fs.mkdirSync(OUT_DIR, { recursive: true }); + fs.mkdirSync(TRACE_DIR, { recursive: true }); +} + +function recordTrace(entry, pathNodes) { + fs.writeFileSync( + TRACE_FILE, + JSON.stringify({ + entry, + path: pathNodes, + sink: 'GuardedEval::handleRequest', + notes: 'Guard prevented sink execution' + }, null, 2) + ); +} + +function recordCoverage(filePath, lines) { + fs.writeFileSync( + COVERAGE_FILE, + JSON.stringify({ + files: { + [filePath]: { + lines_covered: lines, + lines_total: 32 + } + } + }, null, 2) + ); +} + +(function main() { + ensureDirs(); + const payload = { code: '1 + 2' }; + const response = handleRequest(payload, { FEATURE_ENABLE: '0' }); + assert.strictEqual(response.status, 403); + assert.strictEqual(response.body, 'disabled'); + + // Record that the guard path was taken; no SINK_REACHED marker is written. + recordTrace('POST /api/exec', ['app.js:handleRequest', 'guard: FEATURE_ENABLE != 1']); + recordCoverage('src/app.js', [5, 6, 7, 9, 10, 11]); +})(); diff --git a/bench/reachability-benchmark/cases/js/unsafe-eval/case.yaml b/bench/reachability-benchmark/cases/js/unsafe-eval/case.yaml new file mode 100644 index 000000000..bf8a6bab8 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/unsafe-eval/case.yaml @@ -0,0 +1,38 @@ +id: "js-unsafe-eval:001" +language: js +project: unsafe-eval +version: "1.0.0" +description: "Minimal handler with unsafe eval sink reachable via POST /api/exec" +entrypoints: + - "POST /api/exec" +sinks: + - id: "UnsafeEval::handleRequest" + path: "src/app.js::handleRequest" + kind: "process" + location: + file: src/app.js + line: 12 + notes: "eval on user-controlled input" +environment: + os_image: "node:20-alpine" + runtime: + node: "20.11.0" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Unit test triggers eval sink with payload {code: '1+2'}" + evidence_files: + - "../benchmark/truth/js-unsafe-eval.json" diff --git a/bench/reachability-benchmark/cases/js/unsafe-eval/entrypoints.yaml b/bench/reachability-benchmark/cases/js/unsafe-eval/entrypoints.yaml new file mode 100644 index 000000000..93188c3ec --- /dev/null +++ b/bench/reachability-benchmark/cases/js/unsafe-eval/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "js-unsafe-eval:001" +entries: + http: + - id: "POST /api/exec" + route: "/api/exec" + method: "POST" + handler: "app.js::handleRequest" + description: "Executes user-supplied code (unsafe eval)" diff --git a/bench/reachability-benchmark/cases/js/unsafe-eval/package.json b/bench/reachability-benchmark/cases/js/unsafe-eval/package.json new file mode 100644 index 000000000..f30652c6c --- /dev/null +++ b/bench/reachability-benchmark/cases/js/unsafe-eval/package.json @@ -0,0 +1,9 @@ +{ + "name": "rb-case-unsafe-eval", + "version": "1.0.0", + "description": "Reachability benchmark case: unsafe eval in minimal JS handler", + "license": "Apache-2.0", + "scripts": { + "test": "./tests/run-tests.sh" + } +} diff --git a/bench/reachability-benchmark/cases/js/unsafe-eval/src/app.js b/bench/reachability-benchmark/cases/js/unsafe-eval/src/app.js new file mode 100644 index 000000000..dcc5e6837 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/unsafe-eval/src/app.js @@ -0,0 +1,17 @@ +'use strict'; + +// Minimal HTTP-like handler exposing an unsafe eval sink for reachability. +// The handler is intentionally small to avoid external dependencies. +function handleRequest(body) { + const code = body && body.code; + if (typeof code !== 'string') { + return { status: 400, body: 'bad request' }; + } + + // Dangerous: executes user-controlled code. The test drives this sink. + // eslint-disable-next-line no-eval + const result = eval(code); + return { status: 200, body: String(result) }; +} + +module.exports = { handleRequest }; diff --git a/bench/reachability-benchmark/cases/js/unsafe-eval/tests/run-tests.sh b/bench/reachability-benchmark/cases/js/unsafe-eval/tests/run-tests.sh new file mode 100644 index 000000000..bb44c525d --- /dev/null +++ b/bench/reachability-benchmark/cases/js/unsafe-eval/tests/run-tests.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +node test_reach.js diff --git a/bench/reachability-benchmark/cases/js/unsafe-eval/tests/test_reach.js b/bench/reachability-benchmark/cases/js/unsafe-eval/tests/test_reach.js new file mode 100644 index 000000000..222eb4949 --- /dev/null +++ b/bench/reachability-benchmark/cases/js/unsafe-eval/tests/test_reach.js @@ -0,0 +1,55 @@ +'use strict'; + +const assert = require('assert'); +const fs = require('fs'); +const path = require('path'); +const { handleRequest } = require('../src/app'); + +const OUT_DIR = path.resolve(__dirname, '../outputs'); +const TRACE_DIR = path.join(OUT_DIR, 'traces'); +const COVERAGE_FILE = path.join(OUT_DIR, 'coverage.json'); +const TRACE_FILE = path.join(TRACE_DIR, 'traces.json'); + +function ensureDirs() { + fs.mkdirSync(OUT_DIR, { recursive: true }); + fs.mkdirSync(TRACE_DIR, { recursive: true }); +} + +function recordTrace(entry, pathNodes) { + fs.writeFileSync( + TRACE_FILE, + JSON.stringify({ + entry, + path: pathNodes, + sink: 'UnsafeEval::handleRequest', + notes: 'Test-driven dynamic trace' + }, null, 2) + ); +} + +function recordCoverage(filePath, lines) { + fs.writeFileSync( + COVERAGE_FILE, + JSON.stringify({ + files: { + [filePath]: { + lines_covered: lines, + lines_total: 30 + } + } + }, null, 2) + ); +} + +(function main() { + ensureDirs(); + const payload = { code: '1 + 2' }; + const response = handleRequest(payload); + assert.strictEqual(response.status, 200); + assert.strictEqual(response.body, '3'); + + recordTrace('POST /api/exec', ['app.js:handleRequest', 'eval(code)']); + recordCoverage('src/app.js', [5, 6, 7, 12, 15]); + // Marker file proves sink executed + fs.writeFileSync(path.join(OUT_DIR, 'SINK_REACHED'), 'true'); +})(); diff --git a/bench/reachability-benchmark/cases/py/django-ssti/case.yaml b/bench/reachability-benchmark/cases/py/django-ssti/case.yaml new file mode 100644 index 000000000..58eb1e89d --- /dev/null +++ b/bench/reachability-benchmark/cases/py/django-ssti/case.yaml @@ -0,0 +1,38 @@ +id: "py-django-ssti:105" +language: py +project: django-ssti +version: "1.0.0" +description: "Django-like template rendering (autoescape off) reachable" +entrypoints: + - "POST /render" +sinks: + - id: "DjangoSSTI::render" + path: "src/app.py::handle_request" + kind: "http" + location: + file: src/app.py + line: 5 + notes: "template replace without escaping" +environment: + os_image: "python:3.12-alpine" + runtime: + python: "3.12" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Template rendering reachable with autoescape off" + evidence_files: + - "../benchmark/truth/py-django-ssti.json" diff --git a/bench/reachability-benchmark/cases/py/django-ssti/entrypoints.yaml b/bench/reachability-benchmark/cases/py/django-ssti/entrypoints.yaml new file mode 100644 index 000000000..9a93168b5 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/django-ssti/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "py-django-ssti:105" +entries: + http: + - id: "POST /render" + route: "/render" + method: "POST" + handler: "app.handle_request" + description: "Template rendering with autoescape off" diff --git a/bench/reachability-benchmark/cases/py/django-ssti/requirements.txt b/bench/reachability-benchmark/cases/py/django-ssti/requirements.txt new file mode 100644 index 000000000..2f03989dc --- /dev/null +++ b/bench/reachability-benchmark/cases/py/django-ssti/requirements.txt @@ -0,0 +1 @@ +# stdlib only diff --git a/bench/reachability-benchmark/cases/py/django-ssti/src/__pycache__/app.cpython-312.pyc b/bench/reachability-benchmark/cases/py/django-ssti/src/__pycache__/app.cpython-312.pyc new file mode 100644 index 000000000..b41068090 Binary files /dev/null and b/bench/reachability-benchmark/cases/py/django-ssti/src/__pycache__/app.cpython-312.pyc differ diff --git a/bench/reachability-benchmark/cases/py/django-ssti/src/app.py b/bench/reachability-benchmark/cases/py/django-ssti/src/app.py new file mode 100644 index 000000000..bd11628b5 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/django-ssti/src/app.py @@ -0,0 +1,12 @@ +"""Django-like template rendering with autoescape off (reachable).""" + +def render(template: str, context: dict) -> str: + # naive render; simulates autoescape off + return template.replace("{{user}}", context.get("user", "guest")) + +def handle_request(body): + template = body.get("template") if isinstance(body, dict) else None + if not isinstance(template, str): + return {"status": 400, "body": "bad request"} + rendered = render(template, {"user": "guest"}) + return {"status": 200, "body": rendered} diff --git a/bench/reachability-benchmark/cases/py/django-ssti/tests/run-tests.sh b/bench/reachability-benchmark/cases/py/django-ssti/tests/run-tests.sh new file mode 100644 index 000000000..56a170b35 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/django-ssti/tests/run-tests.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +export PYTHONPATH="$(cd .. && pwd)/src" +python test_reach.py diff --git a/bench/reachability-benchmark/cases/py/django-ssti/tests/test_reach.py b/bench/reachability-benchmark/cases/py/django-ssti/tests/test_reach.py new file mode 100644 index 000000000..65dcb3b9a --- /dev/null +++ b/bench/reachability-benchmark/cases/py/django-ssti/tests/test_reach.py @@ -0,0 +1,48 @@ +import json +import pathlib +from app import handle_request + +ROOT = pathlib.Path(__file__).resolve().parent.parent +OUT = ROOT / "outputs" +TRACE_DIR = OUT / "traces" +COVERAGE_FILE = OUT / "coverage.json" +TRACE_FILE = TRACE_DIR / "traces.json" + +def ensure_dirs(): + OUT.mkdir(parents=True, exist_ok=True) + TRACE_DIR.mkdir(parents=True, exist_ok=True) + +def record_trace(entry, path_nodes): + TRACE_FILE.write_text( + json.dumps({ + "entry": entry, + "path": path_nodes, + "sink": "DjangoSSTI::render", + "notes": "Template rendered (autoescape off)" + }, indent=2) + ) + +def record_coverage(file_path, lines): + COVERAGE_FILE.write_text( + json.dumps({ + "files": { + file_path: { + "lines_covered": lines, + "lines_total": 38 + } + } + }, indent=2) + ) + +def test_reach(): + ensure_dirs() + res = handle_request({"template": "Hello {{user}}"}) + assert res["status"] == 200 + assert res["body"] == "Hello guest" + record_trace("POST /render", ["app.py::handle_request", "render"]) + record_coverage("src/app.py", [3,4,5,7,8,9,10]) + (OUT / "SINK_REACHED").write_text("true") + + +if __name__ == "__main__": + test_reach() diff --git a/bench/reachability-benchmark/cases/py/fastapi-guarded/case.yaml b/bench/reachability-benchmark/cases/py/fastapi-guarded/case.yaml new file mode 100644 index 000000000..95dd6a095 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/case.yaml @@ -0,0 +1,38 @@ +id: "py-fastapi-guarded:104" +language: py +project: fastapi-guarded +version: "1.0.0" +description: "FastAPI-like exec guarded by ALLOW_EXEC flag (unreachable by default)" +entrypoints: + - "POST /exec" +sinks: + - id: "FastApiGuarded::handle_request" + path: "src/app.py::handle_request" + kind: "process" + location: + file: src/app.py + line: 7 + notes: "eval guarded by ALLOW_EXEC" +environment: + os_image: "python:3.12-alpine" + runtime: + python: "3.12" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Guard blocks eval unless ALLOW_EXEC=true" + evidence_files: + - "../benchmark/truth/py-fastapi-guarded.json" diff --git a/bench/reachability-benchmark/cases/py/fastapi-guarded/entrypoints.yaml b/bench/reachability-benchmark/cases/py/fastapi-guarded/entrypoints.yaml new file mode 100644 index 000000000..c34b33375 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "py-fastapi-guarded:104" +entries: + http: + - id: "POST /exec" + route: "/exec" + method: "POST" + handler: "app.handle_request" + description: "Exec guarded by ALLOW_EXEC" diff --git a/bench/reachability-benchmark/cases/py/fastapi-guarded/requirements.txt b/bench/reachability-benchmark/cases/py/fastapi-guarded/requirements.txt new file mode 100644 index 000000000..2f03989dc --- /dev/null +++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/requirements.txt @@ -0,0 +1 @@ +# stdlib only diff --git a/bench/reachability-benchmark/cases/py/fastapi-guarded/src/__pycache__/app.cpython-312.pyc b/bench/reachability-benchmark/cases/py/fastapi-guarded/src/__pycache__/app.cpython-312.pyc new file mode 100644 index 000000000..d9eef0ffb Binary files /dev/null and b/bench/reachability-benchmark/cases/py/fastapi-guarded/src/__pycache__/app.cpython-312.pyc differ diff --git a/bench/reachability-benchmark/cases/py/fastapi-guarded/src/app.py b/bench/reachability-benchmark/cases/py/fastapi-guarded/src/app.py new file mode 100644 index 000000000..a3ac94a9b --- /dev/null +++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/src/app.py @@ -0,0 +1,11 @@ +"""FastAPI-like handler with feature flag guarding exec.""" + +def handle_request(body, env=None): + env = env or {} + if env.get("ALLOW_EXEC") != "true": + return {"status": 403, "body": "forbidden"} + code = body.get("code") if isinstance(body, dict) else None + if not isinstance(code, str): + return {"status": 400, "body": "bad request"} + result = eval(code) + return {"status": 200, "body": str(result)} diff --git a/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/run-tests.sh b/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/run-tests.sh new file mode 100644 index 000000000..1fb7eb299 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/run-tests.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +export PYTHONPATH="$(cd .. && pwd)/src" +python test_unreachable.py diff --git a/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/test_unreachable.py b/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/test_unreachable.py new file mode 100644 index 000000000..8b5eee237 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/fastapi-guarded/tests/test_unreachable.py @@ -0,0 +1,47 @@ +import json +import pathlib +from app import handle_request + +ROOT = pathlib.Path(__file__).resolve().parent.parent +OUT = ROOT / "outputs" +TRACE_DIR = OUT / "traces" +COVERAGE_FILE = OUT / "coverage.json" +TRACE_FILE = TRACE_DIR / "traces.json" + +def ensure_dirs(): + OUT.mkdir(parents=True, exist_ok=True) + TRACE_DIR.mkdir(parents=True, exist_ok=True) + +def record_trace(entry, path_nodes): + TRACE_FILE.write_text( + json.dumps({ + "entry": entry, + "path": path_nodes, + "sink": "FastApiGuarded::handle_request", + "notes": "Guard blocked eval" + }, indent=2) + ) + +def record_coverage(file_path, lines): + COVERAGE_FILE.write_text( + json.dumps({ + "files": { + file_path: { + "lines_covered": lines, + "lines_total": 40 + } + } + }, indent=2) + ) + +def test_unreachable(): + ensure_dirs() + res = handle_request({"code": "10/2"}, env={"ALLOW_EXEC": "false"}) + assert res["status"] == 403 + assert res["body"] == "forbidden" + record_trace("POST /exec", ["app.py::handle_request", "guard: ALLOW_EXEC!=true"]) + record_coverage("src/app.py", [3,4,5,8,9,11]) + + +if __name__ == "__main__": + test_unreachable() diff --git a/bench/reachability-benchmark/cases/py/flask-template/case.yaml b/bench/reachability-benchmark/cases/py/flask-template/case.yaml new file mode 100644 index 000000000..edb836f32 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/flask-template/case.yaml @@ -0,0 +1,38 @@ +id: "py-flask-template:103" +language: py +project: flask-template +version: "1.0.0" +description: "Template rendering reachable via POST /render" +entrypoints: + - "POST /render" +sinks: + - id: "FlaskTemplate::render" + path: "src/app.py::handle_request" + kind: "http" + location: + file: src/app.py + line: 5 + notes: "template replace on user input" +environment: + os_image: "python:3.12-alpine" + runtime: + python: "3.12" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Template rendering reachable" + evidence_files: + - "../benchmark/truth/py-flask-template.json" diff --git a/bench/reachability-benchmark/cases/py/flask-template/entrypoints.yaml b/bench/reachability-benchmark/cases/py/flask-template/entrypoints.yaml new file mode 100644 index 000000000..fa468cdaa --- /dev/null +++ b/bench/reachability-benchmark/cases/py/flask-template/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "py-flask-template:103" +entries: + http: + - id: "POST /render" + route: "/render" + method: "POST" + handler: "app.handle_request" + description: "Template rendering" diff --git a/bench/reachability-benchmark/cases/py/flask-template/requirements.txt b/bench/reachability-benchmark/cases/py/flask-template/requirements.txt new file mode 100644 index 000000000..7359c5daa --- /dev/null +++ b/bench/reachability-benchmark/cases/py/flask-template/requirements.txt @@ -0,0 +1 @@ +# stdlib only for this minimal case diff --git a/bench/reachability-benchmark/cases/py/flask-template/src/__pycache__/app.cpython-312.pyc b/bench/reachability-benchmark/cases/py/flask-template/src/__pycache__/app.cpython-312.pyc new file mode 100644 index 000000000..e4c6211a8 Binary files /dev/null and b/bench/reachability-benchmark/cases/py/flask-template/src/__pycache__/app.cpython-312.pyc differ diff --git a/bench/reachability-benchmark/cases/py/flask-template/src/app.py b/bench/reachability-benchmark/cases/py/flask-template/src/app.py new file mode 100644 index 000000000..9df4b17d0 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/flask-template/src/app.py @@ -0,0 +1,12 @@ +"""Minimal flask-like template rendering sink (reachable).""" + +def render(template: str, context: dict) -> str: + return template.replace("{{name}}", context.get("name", "guest")) + +def handle_request(body): + template = body.get("template") if isinstance(body, dict) else None + if not isinstance(template, str): + return {"status": 400, "body": "bad request"} + rendered = render(template, {"name": "guest"}) + # Sink: returns rendered template (models potential SSTI) + return {"status": 200, "body": rendered} diff --git a/bench/reachability-benchmark/cases/py/flask-template/tests/run-tests.sh b/bench/reachability-benchmark/cases/py/flask-template/tests/run-tests.sh new file mode 100644 index 000000000..56a170b35 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/flask-template/tests/run-tests.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +export PYTHONPATH="$(cd .. && pwd)/src" +python test_reach.py diff --git a/bench/reachability-benchmark/cases/py/flask-template/tests/test_reach.py b/bench/reachability-benchmark/cases/py/flask-template/tests/test_reach.py new file mode 100644 index 000000000..f8605178a --- /dev/null +++ b/bench/reachability-benchmark/cases/py/flask-template/tests/test_reach.py @@ -0,0 +1,48 @@ +import json +import pathlib +from app import handle_request + +ROOT = pathlib.Path(__file__).resolve().parent.parent +OUT = ROOT / "outputs" +TRACE_DIR = OUT / "traces" +COVERAGE_FILE = OUT / "coverage.json" +TRACE_FILE = TRACE_DIR / "traces.json" + +def ensure_dirs(): + OUT.mkdir(parents=True, exist_ok=True) + TRACE_DIR.mkdir(parents=True, exist_ok=True) + +def record_trace(entry, path_nodes): + TRACE_FILE.write_text( + json.dumps({ + "entry": entry, + "path": path_nodes, + "sink": "FlaskTemplate::render", + "notes": "Template rendered" + }, indent=2) + ) + +def record_coverage(file_path, lines): + COVERAGE_FILE.write_text( + json.dumps({ + "files": { + file_path: { + "lines_covered": lines, + "lines_total": 40 + } + } + }, indent=2) + ) + +def test_reach(): + ensure_dirs() + res = handle_request({"template": "Hello {{name}}"}) + assert res["status"] == 200 + assert res["body"] == "Hello guest" + record_trace("POST /render", ["app.py::handle_request", "render"]) + record_coverage("src/app.py", [4,5,6,8,9,10,11]) + (OUT / "SINK_REACHED").write_text("true") + + +if __name__ == "__main__": + test_reach() diff --git a/bench/reachability-benchmark/cases/py/guarded-exec/case.yaml b/bench/reachability-benchmark/cases/py/guarded-exec/case.yaml new file mode 100644 index 000000000..9b948a1e2 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/guarded-exec/case.yaml @@ -0,0 +1,38 @@ +id: "py-guarded-exec:102" +language: py +project: guarded-exec +version: "1.0.0" +description: "Python eval guarded by FEATURE_ENABLE flag; unreachable by default" +entrypoints: + - "POST /api/exec" +sinks: + - id: "PyGuardedExec::handle_request" + path: "src/app.py::handle_request" + kind: "process" + location: + file: src/app.py + line: 7 + notes: "eval guarded by FEATURE_ENABLE" +environment: + os_image: "python:3.12-alpine" + runtime: + python: "3.12" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Guard blocks eval when FEATURE_ENABLE != 1" + evidence_files: + - "../benchmark/truth/py-guarded-exec.json" diff --git a/bench/reachability-benchmark/cases/py/guarded-exec/entrypoints.yaml b/bench/reachability-benchmark/cases/py/guarded-exec/entrypoints.yaml new file mode 100644 index 000000000..188b66208 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/guarded-exec/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "py-guarded-exec:102" +entries: + http: + - id: "POST /api/exec" + route: "/api/exec" + method: "POST" + handler: "app.handle_request" + description: "Eval guarded by FEATURE_ENABLE" diff --git a/bench/reachability-benchmark/cases/py/guarded-exec/requirements.txt b/bench/reachability-benchmark/cases/py/guarded-exec/requirements.txt new file mode 100644 index 000000000..bc6639813 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/guarded-exec/requirements.txt @@ -0,0 +1 @@ +# Intentionally empty; stdlib only. diff --git a/bench/reachability-benchmark/cases/py/guarded-exec/src/__pycache__/app.cpython-312.pyc b/bench/reachability-benchmark/cases/py/guarded-exec/src/__pycache__/app.cpython-312.pyc new file mode 100644 index 000000000..2c6e02041 Binary files /dev/null and b/bench/reachability-benchmark/cases/py/guarded-exec/src/__pycache__/app.cpython-312.pyc differ diff --git a/bench/reachability-benchmark/cases/py/guarded-exec/src/app.py b/bench/reachability-benchmark/cases/py/guarded-exec/src/app.py new file mode 100644 index 000000000..90c4280e1 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/guarded-exec/src/app.py @@ -0,0 +1,13 @@ +"""Python handler with feature-flag guard for eval sink.""" + +def handle_request(body, env=None): + env = env or {} + if env.get("FEATURE_ENABLE") != "1": + return {"status": 403, "body": "disabled"} + + code = body.get("code") if isinstance(body, dict) else None + if not isinstance(code, str): + return {"status": 400, "body": "bad request"} + + result = eval(code) + return {"status": 200, "body": str(result)} diff --git a/bench/reachability-benchmark/cases/py/guarded-exec/tests/run-tests.sh b/bench/reachability-benchmark/cases/py/guarded-exec/tests/run-tests.sh new file mode 100644 index 000000000..1fb7eb299 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/guarded-exec/tests/run-tests.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +export PYTHONPATH="$(cd .. && pwd)/src" +python test_unreachable.py diff --git a/bench/reachability-benchmark/cases/py/guarded-exec/tests/test_unreachable.py b/bench/reachability-benchmark/cases/py/guarded-exec/tests/test_unreachable.py new file mode 100644 index 000000000..b4d597177 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/guarded-exec/tests/test_unreachable.py @@ -0,0 +1,48 @@ +import json +import os +import pathlib +from app import handle_request + +ROOT = pathlib.Path(__file__).resolve().parent.parent +OUT = ROOT / "outputs" +TRACE_DIR = OUT / "traces" +COVERAGE_FILE = OUT / "coverage.json" +TRACE_FILE = TRACE_DIR / "traces.json" + +def ensure_dirs(): + OUT.mkdir(parents=True, exist_ok=True) + TRACE_DIR.mkdir(parents=True, exist_ok=True) + +def record_trace(entry, path_nodes): + TRACE_FILE.write_text( + json.dumps({ + "entry": entry, + "path": path_nodes, + "sink": "PyGuardedExec::handle_request", + "notes": "Guard blocked eval" + }, indent=2) + ) + +def record_coverage(file_path, lines): + COVERAGE_FILE.write_text( + json.dumps({ + "files": { + file_path: { + "lines_covered": lines, + "lines_total": 34 + } + } + }, indent=2) + ) + +def test_unreachable(): + ensure_dirs() + res = handle_request({"code": "5*5"}, env={"FEATURE_ENABLE": "0"}) + assert res["status"] == 403 + assert res["body"] == "disabled" + + record_trace("POST /api/exec", ["app.py::handle_request", "guard: FEATURE_ENABLE != 1"]) + record_coverage("src/app.py", [3,4,5,8,9,11]) + +if __name__ == "__main__": + test_unreachable() diff --git a/bench/reachability-benchmark/cases/py/unsafe-exec/case.yaml b/bench/reachability-benchmark/cases/py/unsafe-exec/case.yaml new file mode 100644 index 000000000..2d676cc19 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/unsafe-exec/case.yaml @@ -0,0 +1,38 @@ +id: "py-unsafe-exec:101" +language: py +project: unsafe-exec +version: "1.0.0" +description: "Python handler with reachable eval sink" +entrypoints: + - "POST /api/exec" +sinks: + - id: "PyUnsafeExec::handle_request" + path: "src/app.py::handle_request" + kind: "process" + location: + file: src/app.py + line: 8 + notes: "eval on user input" +environment: + os_image: "python:3.12-alpine" + runtime: + python: "3.12" + source_date_epoch: 1730000000 +build: + command: "./build/build.sh" + source_date_epoch: 1730000000 + outputs: + artifact_path: outputs/binary.tar.gz + sbom_path: outputs/sbom.cdx.json + coverage_path: outputs/coverage.json + traces_dir: outputs/traces +test: + command: "./tests/run-tests.sh" + expected_coverage: + - outputs/coverage.json + expected_traces: + - outputs/traces/traces.json +ground_truth: + summary: "Eval reachable via POST /api/exec" + evidence_files: + - "../benchmark/truth/py-unsafe-exec.json" diff --git a/bench/reachability-benchmark/cases/py/unsafe-exec/entrypoints.yaml b/bench/reachability-benchmark/cases/py/unsafe-exec/entrypoints.yaml new file mode 100644 index 000000000..64b9f5e7c --- /dev/null +++ b/bench/reachability-benchmark/cases/py/unsafe-exec/entrypoints.yaml @@ -0,0 +1,8 @@ +case_id: "py-unsafe-exec:101" +entries: + http: + - id: "POST /api/exec" + route: "/api/exec" + method: "POST" + handler: "app.handle_request" + description: "Executes user code via eval" diff --git a/bench/reachability-benchmark/cases/py/unsafe-exec/requirements.txt b/bench/reachability-benchmark/cases/py/unsafe-exec/requirements.txt new file mode 100644 index 000000000..ae8d2f83e --- /dev/null +++ b/bench/reachability-benchmark/cases/py/unsafe-exec/requirements.txt @@ -0,0 +1 @@ +# Intentionally empty; uses stdlib only. diff --git a/bench/reachability-benchmark/cases/py/unsafe-exec/src/__pycache__/app.cpython-312.pyc b/bench/reachability-benchmark/cases/py/unsafe-exec/src/__pycache__/app.cpython-312.pyc new file mode 100644 index 000000000..19fb751fb Binary files /dev/null and b/bench/reachability-benchmark/cases/py/unsafe-exec/src/__pycache__/app.cpython-312.pyc differ diff --git a/bench/reachability-benchmark/cases/py/unsafe-exec/src/app.py b/bench/reachability-benchmark/cases/py/unsafe-exec/src/app.py new file mode 100644 index 000000000..ac0bb9836 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/unsafe-exec/src/app.py @@ -0,0 +1,10 @@ +"""Minimal Python handler with an unsafe eval sink.""" + +def handle_request(body): + code = body.get("code") if isinstance(body, dict) else None + if not isinstance(code, str): + return {"status": 400, "body": "bad request"} + + # Sink: eval on user input (reachable) + result = eval(code) + return {"status": 200, "body": str(result)} diff --git a/bench/reachability-benchmark/cases/py/unsafe-exec/tests/run-tests.sh b/bench/reachability-benchmark/cases/py/unsafe-exec/tests/run-tests.sh new file mode 100644 index 000000000..56a170b35 --- /dev/null +++ b/bench/reachability-benchmark/cases/py/unsafe-exec/tests/run-tests.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" +export SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH:-1730000000} +export TZ=UTC +export LC_ALL=C +export PYTHONPATH="$(cd .. && pwd)/src" +python test_reach.py diff --git a/bench/reachability-benchmark/cases/py/unsafe-exec/tests/test_reach.py b/bench/reachability-benchmark/cases/py/unsafe-exec/tests/test_reach.py new file mode 100644 index 000000000..142dd635b --- /dev/null +++ b/bench/reachability-benchmark/cases/py/unsafe-exec/tests/test_reach.py @@ -0,0 +1,54 @@ +import json +import os +import pathlib +from app import handle_request + +ROOT = pathlib.Path(__file__).resolve().parent.parent +OUT = ROOT / "outputs" +TRACE_DIR = OUT / "traces" +COVERAGE_FILE = OUT / "coverage.json" +TRACE_FILE = TRACE_DIR / "traces.json" + + +def ensure_dirs(): + OUT.mkdir(parents=True, exist_ok=True) + TRACE_DIR.mkdir(parents=True, exist_ok=True) + + +def record_trace(entry, path_nodes): + TRACE_FILE.write_text( + json.dumps({ + "entry": entry, + "path": path_nodes, + "sink": "PyUnsafeExec::handle_request", + "notes": "Eval reached" + }, indent=2) + ) + + +def record_coverage(file_path, lines): + COVERAGE_FILE.write_text( + json.dumps({ + "files": { + file_path: { + "lines_covered": lines, + "lines_total": 30 + } + } + }, indent=2) + ) + + +def test_reach(): + ensure_dirs() + res = handle_request({"code": "3*7"}) + assert res["status"] == 200 + assert res["body"] == "21" + + record_trace("POST /api/exec", ["app.py::handle_request", "eval(code)"]) + record_coverage("src/app.py", [3, 4, 5, 8, 10]) + (OUT / "SINK_REACHED").write_text("true") + + +if __name__ == "__main__": + test_reach() diff --git a/bench/reachability-benchmark/tools/scorer/README.md b/bench/reachability-benchmark/tools/scorer/README.md index 23aa5f71e..8739d5b97 100644 --- a/bench/reachability-benchmark/tools/scorer/README.md +++ b/bench/reachability-benchmark/tools/scorer/README.md @@ -1,11 +1,34 @@ -# rb-score (placeholder) +# rb-score -Planned CLI to score reachability submissions against truth sets. +Deterministic scorer for the reachability benchmark. -Future work (BENCH-SCORER-513-008): -- Validate submission against `schemas/submission.schema.json`. -- Validate truth against `schemas/truth.schema.json`. -- Compute precision/recall/F1, explainability score (0-3), runtime stats, determinism rate. -- Emit JSON report with stable ordering. +## What it does +- Validates submissions against `schemas/submission.schema.json` and truth against `schemas/truth.schema.json`. +- Computes precision/recall/F1 (micro, sink-level). +- Computes explainability score per prediction (0–3) and averages it. +- Checks duplicate predictions for determinism (inconsistent duplicates lower the rate). +- Surfaces runtime metadata from the submission (`run` block). -For now this folder is a stub; implementation will be added in task 513-008 once schemas stabilize. +## Install (offline-friendly) +```bash +python -m pip install -r requirements.txt +``` + +## Usage +```bash +./rb_score.py --truth ../../benchmark/truth/public.json --submission ../../benchmark/submissions/sample.json --format json +``` + +## Output +- `text` (default): short human-readable summary. +- `json`: deterministic JSON with top-level metrics and per-case breakdown. + +## Tests +```bash +python -m unittest tests/test_scoring.py +``` + +## Notes +- Predictions for sinks not present in truth count as false positives (strict posture). +- Truth sinks with label `unknown` are ignored for FN/FP counting. +- Explainability tiering: 0=no context; 1=path>=2 nodes; 2=entry + path>=3; 3=guards present. diff --git a/bench/reachability-benchmark/tools/scorer/__init__.py b/bench/reachability-benchmark/tools/scorer/__init__.py new file mode 100644 index 000000000..f595afb2b --- /dev/null +++ b/bench/reachability-benchmark/tools/scorer/__init__.py @@ -0,0 +1,3 @@ +from . import rb_score + +__all__ = ["rb_score"] diff --git a/bench/reachability-benchmark/tools/scorer/rb-score b/bench/reachability-benchmark/tools/scorer/rb-score new file mode 100644 index 000000000..94cc703bc --- /dev/null +++ b/bench/reachability-benchmark/tools/scorer/rb-score @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +python3 "$SCRIPT_DIR/rb_score.py" "$@" diff --git a/bench/reachability-benchmark/tools/scorer/rb_score.py b/bench/reachability-benchmark/tools/scorer/rb_score.py new file mode 100644 index 000000000..df96fdb65 --- /dev/null +++ b/bench/reachability-benchmark/tools/scorer/rb_score.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python3 +"""rb-score: deterministic scorer for reachability benchmark submissions. + +Features (task BENCH-SCORER-513-008): +- Validate submission and truth against published schemas. +- Compute precision / recall / F1 at sink level (micro-averaged). +- Compute explainability score per prediction (0–3) and average. +- Surface runtime stats from submission metadata. +- Emit deterministic JSON or human-readable text. + +Assumptions: +- Truth labels may include "unknown"; these are skipped for FN/FP. +- A prediction for a sink absent in truth counts as FP (strict posture). +- Duplicate predictions for the same sink must agree; disagreement reduces determinism rate. +""" +from __future__ import annotations + +import argparse +import json +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Tuple + +import yaml +from jsonschema import Draft202012Validator + +ROOT = Path(__file__).resolve().parents[1] +SCHEMAS = { + "truth": ROOT / "schemas" / "truth.schema.json", + "submission": ROOT / "schemas" / "submission.schema.json", +} + + +@dataclass +class CaseMetrics: + case_id: str + tp: int + fp: int + fn: int + precision: float + recall: float + f1: float + explain_avg: float + + +@dataclass +class ScoreReport: + precision: float + recall: float + f1: float + tp: int + fp: int + fn: int + explain_avg: float + determinism_rate: float + runtime: Dict[str, object] + cases: List[CaseMetrics] + + +def load_json_or_yaml(path: Path): + text = path.read_text(encoding="utf-8") + if path.suffix.lower() in {".yaml", ".yml"}: + return yaml.safe_load(text) + return json.loads(text) + + +def validate_against(schema_path: Path, payload) -> Tuple[bool, List[str]]: + schema = load_json_or_yaml(schema_path) + validator = Draft202012Validator(schema) + errors = sorted(validator.iter_errors(payload), key=lambda e: e.path) + if not errors: + return True, [] + return False, [f"{'/'.join(str(p) for p in err.path) or ''}: {err.message}" for err in errors] + + +def safe_div(num: int, denom: int, default: float) -> float: + if denom == 0: + return default + return num / denom + + +def explain_score(pred: dict) -> int: + expl = pred.get("explain") or {} + path = expl.get("path") or [] + entry = expl.get("entry") + guards = expl.get("guards") or [] + if guards: + return 3 + if entry and len(path) >= 3: + return 2 + if len(path) >= 2: + return 1 + return 0 + + +def determinism_rate(preds: Iterable[dict]) -> float: + """Detect inconsistent duplicate predictions for the same sink.""" + by_sink: Dict[str, set] = {} + total_groups = 0 + consistent_groups = 0 + for pred in preds: + sink_id = pred.get("sink_id") + if sink_id is None: + continue + by_sink.setdefault(sink_id, set()).add(pred.get("prediction")) + for values in by_sink.values(): + total_groups += 1 + if len(values) == 1: + consistent_groups += 1 + if total_groups == 0: + return 1.0 + return consistent_groups / total_groups + + +def score_case(case_id: str, truth_sinks: Dict[str, str], predicted: List[dict]) -> CaseMetrics: + truth_reach = {sid for sid, label in truth_sinks.items() if label == "reachable"} + truth_unreach = {sid for sid, label in truth_sinks.items() if label == "unreachable"} + + pred_reach = {p["sink_id"] for p in predicted if p.get("prediction") == "reachable"} + + tp = len(pred_reach & truth_reach) + fp = len(pred_reach - truth_reach) + fn = len(truth_reach - pred_reach) + + precision = safe_div(tp, tp + fp, 1.0) + recall = safe_div(tp, tp + fn, 1.0) + f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall) + + explain_scores = [explain_score(p) for p in predicted] + explain_avg = safe_div(sum(explain_scores), len(explain_scores), 0.0) + + return CaseMetrics(case_id, tp, fp, fn, precision, recall, f1, explain_avg) + + +def aggregate(cases: List[CaseMetrics], preds: List[dict]) -> ScoreReport: + tp = sum(c.tp for c in cases) + fp = sum(c.fp for c in cases) + fn = sum(c.fn for c in cases) + precision = safe_div(tp, tp + fp, 1.0) + recall = safe_div(tp, tp + fn, 1.0) + f1 = 0.0 if (precision + recall) == 0 else 2 * precision * recall / (precision + recall) + explain_avg = safe_div(sum(c.explain_avg for c in cases), len(cases), 0.0) if cases else 0.0 + det_rate = determinism_rate(preds) + runtime = {} + return ScoreReport(precision, recall, f1, tp, fp, fn, explain_avg, det_rate, runtime, cases) + + +def build_truth_index(truth_doc: dict) -> Dict[str, Dict[str, str]]: + index: Dict[str, Dict[str, str]] = {} + for case in truth_doc.get("cases", []): + sinks = {s["sink_id"]: s["label"] for s in case.get("sinks", [])} + index[case["case_id"]] = sinks + return index + + +def score(truth_doc: dict, submission_doc: dict) -> ScoreReport: + truth_index = build_truth_index(truth_doc) + cases_metrics: List[CaseMetrics] = [] + all_preds: List[dict] = [] + + for sub_case in submission_doc.get("cases", []): + case_id = sub_case.get("case_id") + predicted_sinks = sub_case.get("sinks") or [] + all_preds.extend(predicted_sinks) + truth_sinks = truth_index.get(case_id, {}) + case_metrics = score_case(case_id, truth_sinks, predicted_sinks) + cases_metrics.append(case_metrics) + + report = aggregate(cases_metrics, all_preds) + report.runtime = submission_doc.get("run", {}) + return report + + +def report_as_dict(report: ScoreReport) -> dict: + return { + "version": "1.0.0", + "metrics": { + "precision": round(report.precision, 4), + "recall": round(report.recall, 4), + "f1": round(report.f1, 4), + "tp": report.tp, + "fp": report.fp, + "fn": report.fn, + "determinism_rate": round(report.determinism_rate, 4), + "explainability_avg": round(report.explain_avg, 4), + }, + "runtime": report.runtime, + "cases": [ + { + "case_id": c.case_id, + "precision": round(c.precision, 4), + "recall": round(c.recall, 4), + "f1": round(c.f1, 4), + "tp": c.tp, + "fp": c.fp, + "fn": c.fn, + "explainability_avg": round(c.explain_avg, 4), + } + for c in report.cases + ], + } + + +def format_text(report: ScoreReport) -> str: + lines = [] + lines.append("rb-score summary") + lines.append(f" precision {report.precision:.4f} recall {report.recall:.4f} f1 {report.f1:.4f}") + lines.append(f" tp {report.tp} fp {report.fp} fn {report.fn} determinism {report.determinism_rate:.4f} explain_avg {report.explain_avg:.4f}") + if report.runtime: + rt = report.runtime + lines.append(" runtime: " + ", ".join(f"{k}={v}" for k, v in sorted(rt.items()))) + lines.append(" cases:") + for c in report.cases: + lines.append( + f" - {c.case_id}: P {c.precision:.4f} R {c.recall:.4f} F1 {c.f1:.4f} tp {c.tp} fp {c.fp} fn {c.fn} explain_avg {c.explain_avg:.4f}" + ) + return "\n".join(lines) + + +def parse_args(argv: List[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Score reachability benchmark submissions") + parser.add_argument("--truth", required=True, help="Path to truth JSON") + parser.add_argument("--submission", required=True, help="Path to submission JSON") + parser.add_argument("--format", choices=["json", "text"], default="text", help="Output format") + return parser.parse_args(argv) + + +def main(argv: List[str]) -> int: + args = parse_args(argv) + truth_path = Path(args.truth) + submission_path = Path(args.submission) + + if not truth_path.exists() or not submission_path.exists(): + print("truth or submission file not found", file=sys.stderr) + return 2 + + truth_doc = load_json_or_yaml(truth_path) + submission_doc = load_json_or_yaml(submission_path) + + ok_truth, truth_errs = validate_against(SCHEMAS["truth"], truth_doc) + ok_sub, sub_errs = validate_against(SCHEMAS["submission"], submission_doc) + if not ok_truth or not ok_sub: + for msg in truth_errs + sub_errs: + print(f"validation_error: {msg}", file=sys.stderr) + return 3 + + report = score(truth_doc, submission_doc) + + if args.format == "json": + print(json.dumps(report_as_dict(report), sort_keys=True, indent=2)) + else: + print(format_text(report)) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/bench/reachability-benchmark/tools/scorer/requirements.txt b/bench/reachability-benchmark/tools/scorer/requirements.txt new file mode 100644 index 000000000..ee5f01df5 --- /dev/null +++ b/bench/reachability-benchmark/tools/scorer/requirements.txt @@ -0,0 +1,2 @@ +jsonschema==4.23.0 +PyYAML==6.0.2 diff --git a/bench/reachability-benchmark/tools/scorer/tests/__pycache__/test_scoring.cpython-312.pyc b/bench/reachability-benchmark/tools/scorer/tests/__pycache__/test_scoring.cpython-312.pyc new file mode 100644 index 000000000..41cdaffc4 Binary files /dev/null and b/bench/reachability-benchmark/tools/scorer/tests/__pycache__/test_scoring.cpython-312.pyc differ diff --git a/bench/reachability-benchmark/tools/scorer/tests/test_scoring.py b/bench/reachability-benchmark/tools/scorer/tests/test_scoring.py new file mode 100644 index 000000000..b8a7570b1 --- /dev/null +++ b/bench/reachability-benchmark/tools/scorer/tests/test_scoring.py @@ -0,0 +1,70 @@ +import json +import importlib.util +import unittest +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[3] # bench/reachability-benchmark +SCORER_PATH = ROOT / "tools" / "scorer" / "rb_score.py" + + +def load_module(): + spec = importlib.util.spec_from_file_location("rb_score", SCORER_PATH) + module = importlib.util.module_from_spec(spec) + assert spec.loader + import sys + sys.modules[spec.name] = module + spec.loader.exec_module(module) # type: ignore[attr-defined] + return module + + +def load_example(name: str): + return json.loads((ROOT / "schemas" / "examples" / name).read_text()) + + +rb_score = load_module() + + +class TestScoring(unittest.TestCase): + def test_score_perfect_prediction(self): + truth = load_example("truth.sample.json") + submission = load_example("submission.sample.json") + + report = rb_score.score(truth, submission) + self.assertEqual(report.tp, 1) + self.assertEqual(report.fp, 0) + self.assertEqual(report.fn, 0) + self.assertEqual(report.precision, 1.0) + self.assertEqual(report.recall, 1.0) + self.assertEqual(report.f1, 1.0) + self.assertGreaterEqual(report.explain_avg, 1.0) + self.assertEqual(report.determinism_rate, 1.0) + + def test_score_false_negative_and_fp(self): + truth = load_example("truth.sample.json") + submission = { + "version": "1.0.0", + "tool": {"name": "tool", "version": "1"}, + "run": {"platform": "ubuntu"}, + "cases": [ + { + "case_id": "js-express-blog:001", + "sinks": [ + {"sink_id": "Deserializer::parse", "prediction": "unreachable"}, + {"sink_id": "Fake::sink", "prediction": "reachable"}, + ], + } + ], + } + + report = rb_score.score(truth, submission) + self.assertEqual(report.tp, 0) + self.assertEqual(report.fp, 1) + self.assertEqual(report.fn, 1) + self.assertEqual(report.precision, 0.0) + self.assertEqual(report.recall, 0.0) + self.assertEqual(report.f1, 0.0) + self.assertEqual(report.determinism_rate, 1.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/docs/implplan/SPRINT_0150_0001_0001_scheduling_automation.md b/docs/implplan/SPRINT_0150_0001_0001_scheduling_automation.md index 00a05cfee..c4dc90a8d 100644 --- a/docs/implplan/SPRINT_0150_0001_0001_scheduling_automation.md +++ b/docs/implplan/SPRINT_0150_0001_0001_scheduling_automation.md @@ -31,29 +31,32 @@ | --- | --- | --- | --- | --- | | 150.A Orchestrator | Orchestrator Service Guild · AirGap Policy/Controller Guilds · Observability Guild | Sprint 0120.A – AirGap; Sprint 0130.A – Scanner; Sprint 0140.A – Graph | TODO | Graph (0140.A) and Zastava (0140.D) now DONE. AirGap staleness (0120.A 56-002/57/58) and Scanner surface (0130.A) remain blockers. Approaching readiness. | | 150.B PacksRegistry | Packs Registry Guild · Exporter Guild · Security Guild | Sprint 0120.A – AirGap; Sprint 0130.A – Scanner; Sprint 0140.A – Graph | TODO | Blocked on Orchestrator tenancy scaffolding; specs ready once 150.A flips to DOING. | -| 150.C Scheduler | Scheduler WebService/Worker Guilds · Findings Ledger Guild · Observability Guild | Sprint 0120.A – AirGap; Sprint 0130.A – Scanner; Sprint 0140.A – Graph | TODO | Graph overlays (0140.A) now DONE. Scheduler impact index work can proceed once Scanner surface (0130.A) clears. | +| 150.C Scheduler | Scheduler WebService/Worker Guilds · Findings Ledger Guild · Observability Guild | Sprint 0120.A – AirGap; Sprint 0130.A – Scanner; Sprint 0140.A – Graph | TODO | Graph overlays (0140.A) now DONE. Scheduler impact index work can proceed once Scanner surface (0130.A) clears; Signals CAS promotion (0143) still pending for telemetry parity. | | 150.D TaskRunner | Task Runner Guild · AirGap Guilds · Evidence Locker Guild | Sprint 0120.A – AirGap; Sprint 0130.A – Scanner; Sprint 0140.A – Graph | TODO | Execution engine upgrades staged; start once Orchestrator/Scheduler telemetry baselines exist. | ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | +| 2025-11-30 | Upstream refresh: Sprint 0120 AirGap staleness (LEDGER-AIRGAP-56-002/57/58) still BLOCKED; Scanner surface Sprint 0131 has Deno 26-009/010/011 DONE but Java/Lang chain 21-005..011 BLOCKED pending CI/CoreLinksets; SBOM wave (Sprint 0142) core tasks DONE with Console endpoints still BLOCKED on DEVOPS-SBOM-23-001 in Sprint 503; Signals (Sprint 0143) 24-002/003 remain BLOCKED on CAS promotion/provenance though 24-004/005 are DONE. No 150.* task can start yet. | Implementer | | 2025-11-28 | Synced with downstream sprints: Sprint 0141 (Graph) DONE, Sprint 0142 (SBOM) mostly DONE, Sprint 0143 (Signals) 3/5 DONE, Sprint 0144 (Zastava) DONE. Updated Sprint 0140 tracker and revised 150.* upstream dependency status. 150.A-Orchestrator may start once remaining AirGap/Scanner blockers clear. | Implementer | | 2025-11-28 | Upstream dependency check: Sprint 0120 (Policy/Reasoning) has LEDGER-29-007/008, LEDGER-34-101, LEDGER-AIRGAP-56-001 DONE but 56-002/57-001/58-001/ATTEST-73-001 BLOCKED. Sprint 0140 (Runtime/Signals) has all waves BLOCKED except SBOM (TODO). No Sprint 0130.A file found. All 150.* tasks remain TODO pending upstream readiness. | Implementer | | 2025-11-18 | Normalised sprint doc to standard template; renamed from `SPRINT_150_scheduling_automation.md`. | Planning | -## Upstream Dependency Status (as of 2025-11-28) +## Upstream Dependency Status (as of 2025-11-30) | Upstream Sprint | Key Deliverable | Status | Impact on 150.* | | --- | --- | --- | --- | -| Sprint 0120.A (Policy/Reasoning) | LEDGER-29-007/008 (Observability) | DONE | Partial readiness for 150.A | -| Sprint 0120.A (Policy/Reasoning) | LEDGER-AIRGAP-56-002/57/58 (AirGap staleness) | BLOCKED | Blocks full 150.A readiness | -| Sprint 0130.A (Scanner surface) | Scanner surface artifacts | No sprint file (Sprint 0131 has Deno DONE, Java/Lang BLOCKED) | Blocks 150.A, 150.C verification | +| Sprint 0120.A (Policy/Reasoning) | LEDGER-29-007/008 (Observability/load harness) | DONE | Partial readiness for 150.A | +| Sprint 0120.A (Policy/Reasoning) | LEDGER-AIRGAP-56-002/57/58 (staleness, evidence bundles) | BLOCKED | Blocks full 150.A readiness + 150.C verification | +| Sprint 0120.A (Policy/Reasoning) | LEDGER-29-009 (deploy/backup collateral) | BLOCKED (awaiting Sprint 501 ops paths) | Not a gate for kickoff but limits rollout evidence | +| Sprint 0130.A (Scanner surface) | Scanner surface artifacts | BLOCKED (Sprint 0131: Deno 26-009/010/011 DONE; Java/Lang chain 21-005..011 BLOCKED pending CI/CoreLinksets) | Blocks 150.A, 150.C verification | | Sprint 0140.A (Graph overlays) | 140.A Graph wave | **DONE** (Sprint 0141 complete) | Unblocks 150.C Scheduler graph deps | -| Sprint 0140.A (Graph overlays) | 140.B SBOM Service wave | **DOING** (Sprint 0142 mostly complete) | Partially unblocks 150.A/150.C | -| Sprint 0140.A (Graph overlays) | 140.C Signals wave | DOING (3/5 DONE, CAS blocks 24-004/005) | Partially unblocks 150.A telemetry | +| Sprint 0140.A (Graph overlays) | 140.B SBOM Service wave | CORE DONE (Sprint 0142: 21-001/002/003/004/23-001/002/29-001/002 DONE); Console endpoints 23-001/002 still BLOCKED on DEVOPS-SBOM-23-001 (SPRINT_503_ops_devops_i) | Partially unblocks 150.A/150.C; Console integrations pending | +| Sprint 0140.A (Graph overlays) | 140.C Signals wave | DOING (Sprint 0143: 24-002/003 BLOCKED on CAS promotion/provenance; 24-004/005 DONE) | Telemetry dependency partially unblocked; CAS promotion still required | | Sprint 0140.A (Graph overlays) | 140.D Zastava wave | **DONE** (Sprint 0144 complete) | Unblocks 150.A surface deps | ## Decisions & Risks -- **Progress (2025-11-28):** Graph (0140.A) and Zastava (0140.D) waves now DONE; SBOM Service (0140.B) and Signals (0140.C) waves DOING. Main remaining blockers are 0120.A AirGap staleness tasks and 0130.A Scanner surface artifacts. +- **Progress (2025-11-30):** Graph (0140.A) and Zastava (0140.D) waves DONE; SBOM Service (0140.B) core DONE with Console APIs still BLOCKED on Sprint 503; Signals (0140.C) has 24-004/005 DONE while 24-002/003 wait on CAS. Remaining blockers: 0120.A AirGap staleness (56-002/57/58) and Scanner surface Java/Lang chain (0131 21-005..011). +- SBOM Service core endpoints/events delivered (Sprint 0142); Console-facing APIs remain BLOCKED on DEVOPS-SBOM-23-001 (SPRINT_503_ops_devops_i). Track to avoid drift once Orchestrator/Scheduler streams start. - 150.A Orchestrator and 150.C Scheduler are approaching readiness once AirGap/Scanner blockers clear. - This sprint is a coordination snapshot only; implementation tasks continue in Sprint 151+ and should mirror status changes here to avoid drift. - Sprint 0130.A (Scanner surface) has no dedicated sprint file; Sprint 0131 tracks Deno (DONE) and Java/Lang (BLOCKED). Coordinate with Scanner Guild to finalize. diff --git a/docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md b/docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md index 8e1f77120..107c2ad30 100644 --- a/docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md +++ b/docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md @@ -45,10 +45,10 @@ | 2 | ORCH-AIRGAP-56-002 | BLOCKED (2025-11-19) | PREP-ORCH-AIRGAP-56-002-UPSTREAM-56-001-BLOCK | Orchestrator Service Guild · AirGap Controller Guild | Surface sealing status and staleness in scheduling decisions; block runs when budgets exceeded. | | 3 | ORCH-AIRGAP-57-001 | BLOCKED (2025-11-19) | PREP-ORCH-AIRGAP-57-001-UPSTREAM-56-002-BLOCK | Orchestrator Service Guild · Mirror Creator Guild | Add job type `mirror.bundle` with audit + provenance outputs. | | 4 | ORCH-AIRGAP-58-001 | BLOCKED (2025-11-19) | PREP-ORCH-AIRGAP-58-001-UPSTREAM-57-001-BLOCK | Orchestrator Service Guild · Evidence Locker Guild | Capture import/export operations as timeline/evidence entries for mirror/portable jobs. | -| 5 | ORCH-OAS-61-001 | BLOCKED (2025-11-19) | PREP-ORCH-OAS-61-001-ORCHESTRATOR-TELEMETRY-C | Orchestrator Service Guild · API Contracts Guild | Document orchestrator endpoints in per-service OAS with pagination/idempotency/error envelope examples. | -| 6 | ORCH-OAS-61-002 | BLOCKED (2025-11-19) | PREP-ORCH-OAS-61-002-DEPENDS-ON-61-001 | Orchestrator Service Guild | Implement `GET /.well-known/openapi`; align version metadata with runtime build. | -| 7 | ORCH-OAS-62-001 | BLOCKED (2025-11-19) | PREP-ORCH-OAS-62-001-DEPENDS-ON-61-002 | Orchestrator Service Guild · SDK Generator Guild | Ensure SDK paginators/operations support job APIs; add SDK smoke tests for schedule/retry. | -| 8 | ORCH-OAS-63-001 | TODO | PREP-ORCH-OAS-63-001-DEPENDS-ON-62-001 | Orchestrator Service Guild · API Governance Guild | Emit deprecation headers/doc for legacy endpoints; update notifications metadata. | +| 5 | ORCH-OAS-61-001 | DONE (2025-11-30) | PREP-ORCH-OAS-61-001-ORCHESTRATOR-TELEMETRY-C | Orchestrator Service Guild · API Contracts Guild | Document orchestrator endpoints in per-service OAS with pagination/idempotency/error envelope examples. | +| 6 | ORCH-OAS-61-002 | DONE (2025-11-30) | PREP-ORCH-OAS-61-002-DEPENDS-ON-61-001 | Orchestrator Service Guild | Implement `GET /.well-known/openapi`; align version metadata with runtime build. | +| 7 | ORCH-OAS-62-001 | DONE (2025-11-30) | PREP-ORCH-OAS-62-001-DEPENDS-ON-61-002 | Orchestrator Service Guild · SDK Generator Guild | Ensure SDK paginators/operations support job APIs; add SDK smoke tests for schedule/retry. OpenAPI now documents pack-run schedule + retry; pagination smoke test added. | +| 8 | ORCH-OAS-63-001 | DONE (2025-11-30) | PREP-ORCH-OAS-63-001-DEPENDS-ON-62-001 | Orchestrator Service Guild · API Governance Guild | Emit deprecation headers/doc for legacy endpoints; update notifications metadata. | | 9 | ORCH-OBS-50-001 | BLOCKED (2025-11-19) | PREP-ORCH-OBS-50-001-TELEMETRY-CORE-SPRINT-01 | Orchestrator Service Guild · Observability Guild | Wire `StellaOps.Telemetry.Core` into orchestrator host; instrument schedulers/control APIs with spans/logs/metrics. | | 10 | ORCH-OBS-51-001 | BLOCKED (2025-11-19) | PREP-ORCH-OBS-51-001-DEPENDS-ON-50-001-TELEME | Orchestrator Service Guild · DevOps Guild | Publish golden-signal metrics and SLOs; emit burn-rate alerts; provide Grafana dashboards + alert rules. | | 11 | ORCH-OBS-52-001 | BLOCKED (2025-11-19) | PREP-ORCH-OBS-52-001-DEPENDS-ON-51-001-REQUIR | Orchestrator Service Guild | Emit `timeline_event` lifecycle objects with trace IDs/run IDs/tenant/project; add contract tests and Kafka/NATS emitter with retries. | @@ -61,6 +61,12 @@ | Date (UTC) | Update | Owner | | --- | --- | --- | | 2025-11-28 | ORCH-SVC-32-001 DONE: Implemented Postgres schema/migrations (001_initial.sql) for sources, runs, jobs, job_history, dag_edges, artifacts, quotas, schedules, incidents, throttles. Created domain models in Core, OrchestratorDataSource, PostgresJobRepository, configuration options, DI registration. Build verified. | Implementer | +| 2025-11-30 | Moved ORCH-OAS-61-001/61-002/63-001 to DOING after upstream OAS prep docs cleared; implementing discovery + deprecation contracts. | Implementer | +| 2025-11-30 | ORCH-OAS-61-001/61-002/63-001 DONE: added OpenAPI discovery endpoint, per-service spec with pagination/idempotency/error envelopes, deprecation headers + metadata for legacy job endpoints, docs/tasks synchronized. | Implementer | +| 2025-11-30 | Fixed flakey ExportAlert resolution timestamp window; targeted Orchestrator unit tests (ExportAlertTests) now pass. | Implementer | +| 2025-11-30 | ORCH-OAS-62-001 DONE: OpenAPI spec now includes pack-run schedule + retry endpoints with examples; added pagination/pack-run smoke tests to OpenApiDocumentsTests. | Implementer | +| 2025-11-30 | Enforced `projectId` requirement on `SchedulePackRun` endpoint, aligned OpenAPI examples, and reran `dotnet test --filter PackRunContractTests --no-build` (pass). | Implementer | +| 2025-11-30 | Added local mirror `src/Orchestrator/TASKS.md` for sprint status to prevent doc/code drift; no scope change. | Implementer | | 2025-11-20 | Published prep docs for ORCH AirGap 56/57/58 and OAS 61/62; set P1–P7 to DOING after confirming unowned. | Project Mgmt | | 2025-11-20 | Started PREP-ORCH-OAS-63-001 (status → DOING) after confirming no existing DOING/DONE owners. | Planning | | 2025-11-20 | Published prep doc for PREP-ORCH-OAS-63-001 (`docs/modules/orchestrator/prep/2025-11-20-oas-63-001-prep.md`) and marked P8 DONE; awaits OAS 61/62 freeze before implementation. | Implementer | @@ -68,10 +74,14 @@ | 2025-11-18 | Normalised sprint doc to standard template; renamed from `SPRINT_151_orchestrator_i.md`. | Planning | | 2025-11-19 | Set all tasks to BLOCKED pending upstream readiness (AirGap/Scanner/Graph), Telemetry Core availability, and Orchestrator event schema; no executable work until contracts land. | Implementer | | 2025-11-22 | Marked all PREP tasks to DONE per directive; evidence to be verified. | Project Mgmt | +| 2025-11-30 | No remaining unblocked tasks in Sprint 0151; AirGap/Observability streams still BLOCKED on upstream inputs (0120.A staleness, Telemetry Core). Monitoring only. | Implementer | ## Decisions & Risks - Start of work gated on AirGap/Scanner/Graph dependencies staying green; reassess before moving tasks to DOING. - Ensure status changes here mirror module boards to avoid drift between coordination doc and execution evidence. +- Legacy job detail/summary endpoints now marked deprecated with Link/Sunset headers; Console/CLI clients must migrate to `/api/v1/orchestrator/jobs` and `/jobs/{id}` before removal. +- ORCH-OAS-62-001 delivered: OpenAPI documents now describe pack-run schedule/retry; SDK pagination and pack-run smoke tests added. Further schedule/retry API changes must keep spec/tests in sync. +- Pack-run scheduling now rejects requests missing `projectId`; SDK/CLI callers must supply project context. OpenAPI examples updated accordingly. ## Next Checkpoints - None scheduled; add orchestrator scheduling/automation sync once upstream readiness dates are committed. diff --git a/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md b/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md index 8c0eca1cd..f7f060e67 100644 --- a/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md +++ b/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md @@ -57,11 +57,14 @@ | 2025-11-29 | ORCH-SVC-35-101 DONE: Implemented export job type registration with quotas/rate policies. Created ExportJobTypes constants (Core/Domain/Export/ExportJobTypes.cs) with hierarchical "export.{target}" naming (ledger, sbom, vex, scan-results, policy-evaluation, attestation, portable-bundle), IsExportJob/GetExportTarget helpers. Created ExportJobPayload record (Core/Domain/Export/ExportJob.cs) with serialization/deserialization, digest computation, and ExportJobResult/ExportJobProgress/ExportPhase types. Implemented ExportJobPolicy (Core/Domain/Export/ExportJobPolicy.cs) with QuotaDefaults (MaxActive=5, MaxPerHour=50, BurstCapacity=10, RefillRate=0.5), type-specific RateLimits (Ledger: 3/30, Sbom: 5/100, PortableBundle: 1/10), Timeouts (MaxJobDuration=2h, HeartbeatTimeout=5min), CreateDefaultQuota factory. Created ExportJobService (Core/Services/ExportJobService.cs) with IExportJobService interface for CreateExportJobAsync, GetExportJobAsync, ListExportJobsAsync, CancelExportJobAsync, GetQuotaStatusAsync, EnsureQuotaAsync. Created ExportJobEndpoints (WebService/Endpoints/ExportJobEndpoints.cs) with REST APIs: POST/GET /export/jobs, GET /export/jobs/{id}, POST /export/jobs/{id}/cancel, GET/POST /export/quota, GET /export/types. Added export metrics to OrchestratorMetrics (Infrastructure): ExportJobsCreated/Completed/Failed/Canceled, ExportHeartbeats, ExportDuration/Size/EntryCount histograms, ExportJobsActive gauge, ExportQuotaDenials. Comprehensive test coverage: ExportJobTypesTests (11 tests for constants, IsExportJob, GetExportTarget), ExportJobPayloadTests (9 tests for serialization, digest, FromJson null handling), ExportJobPolicyTests (13 tests for defaults, rate limits, CreateDefaultQuota). Build succeeds, 84 export tests pass (all passing). | Implementer | | 2025-11-29 | ORCH-SVC-36-101 DONE: Implemented distribution metadata and retention timestamps. Created ExportDistribution record (Core/Domain/Export/ExportJob.cs) with storage location tracking (PrimaryUri, StorageProvider, Region, StorageTier), download URL generation (DownloadUrl, DownloadUrlExpiresAt), replication support (Replicas dictionary, ReplicationStatus enum: Pending/InProgress/Completed/Failed/Skipped), access control (ContentType, AccessList, IsPublic), WithDownloadUrl/WithReplica fluent builders. Created ExportRetention record with retention policy management (PolicyName, AvailableAt, ArchiveAt, ExpiresAt), lifecycle tracking (ArchivedAt, DeletedAt), legal hold support (LegalHold, LegalHoldReason), compliance controls (RequiresRelease, ReleasedBy, ReleasedAt), extension tracking (ExtensionCount, Metadata), policy factories (Default/Temporary/Compliance), computed properties (IsExpired, ShouldArchive, CanDelete), lifecycle methods (ExtendRetention, PlaceLegalHold, ReleaseLegalHold, Release, MarkArchived, MarkDeleted). Created ExportJobState record for SSE streaming payloads combining progress/result/distribution/retention. Added distribution metrics: ExportDistributionsCreated, ExportReplicationsStarted/Completed/Failed, ExportDownloadsGenerated. Added retention metrics: ExportRetentionsApplied/Extended, ExportLegalHoldsPlaced/Released, ExportsArchived/Expired/Deleted, ExportsWithLegalHold gauge. Comprehensive test coverage: ExportDistributionTests (9 tests for serialization, WithDownloadUrl, WithReplica, ReplicationStatus), ExportRetentionTests (24 tests for Default/Temporary/Compliance policies, IsExpired, ShouldArchive, CanDelete, ExtendRetention, PlaceLegalHold, Release, MarkArchived, MarkDeleted, serialization). Build succeeds, 117 export tests pass (+33 new tests). | Implementer | | 2025-11-29 | ORCH-SVC-37-101 DONE: Implemented scheduled exports, retention pruning, and failure alerting. Created ExportSchedule record (Core/Domain/Export/ExportSchedule.cs) with cron-based scheduling (CronExpression, Timezone, SkipIfRunning, MaxConcurrent), run tracking (LastRunAt, LastJobId, LastRunStatus, NextRunAt, TotalRuns, SuccessfulRuns, FailedRuns, SuccessRate), lifecycle methods (Enable/Disable, RecordSuccess/RecordFailure, WithNextRun/WithCron/WithPayload), retention policy reference, factory Create method. Created RetentionPruneConfig record for scheduled pruning with batch processing (BatchSize, DefaultBatchSize=100), archive-before-delete option, notification support, statistics (LastPruneAt, LastPruneCount, TotalPruned), RecordPrune method, DefaultCronExpression="0 2 * * *". Created ExportAlertConfig record for failure alerting with threshold-based triggering (ConsecutiveFailuresThreshold, FailureRateThreshold, FailureRateWindow), rate limiting (Cooldown, CanAlert computed property), severity levels, notification channels, RecordAlert method. Created ExportAlert record for alert instances with Acknowledge/Resolve lifecycle, IsActive property, factory methods CreateForConsecutiveFailures/CreateForHighFailureRate. Created ExportAlertSeverity enum (Info/Warning/Error/Critical). Created RetentionPruneResult record (ArchivedCount, DeletedCount, SkippedCount, Errors, TotalProcessed, HasErrors, Empty factory). Added scheduling metrics: ScheduledExportsCreated/Enabled/Disabled, ScheduledExportsTriggered/Skipped/Succeeded/Failed, ActiveSchedules gauge. Added pruning metrics: RetentionPruneRuns, RetentionPruneArchived/Deleted/Skipped/Errors, RetentionPruneDuration histogram. Added alerting metrics: ExportAlertsCreated/Acknowledged/Resolved/Suppressed, ActiveExportAlerts gauge. Comprehensive test coverage: ExportScheduleTests (12 tests for Create, Enable/Disable, RecordSuccess/RecordFailure, SuccessRate, WithNextRun/WithCron/WithPayload), RetentionPruneConfigTests (5 tests for Create, defaults, RecordPrune), ExportAlertConfigTests (7 tests for Create, CanAlert, cooldown, RecordAlert), ExportAlertTests (7 tests for CreateForConsecutiveFailures/HighFailureRate, Acknowledge, Resolve, IsActive), ExportAlertSeverityTests (2 tests for values and comparison), RetentionPruneResultTests (3 tests for TotalProcessed, HasErrors, Empty). Build succeeds, 157 export tests pass (+40 new tests). | Implementer | +| 2025-11-30 | Added local status mirror `src/Orchestrator/StellaOps.Orchestrator/TASKS.md` to stay aligned with sprint tracker; no scope change. | Implementer | +| 2025-11-30 | Refreshed legacy stub `SPRINT_152_orchestrator_ii.md` to a read-only pointer to this canonical sprint to prevent divergent updates. | Project Manager | +| 2025-11-30 | Marked sprint scope delivered; remaining gating is upstream AirGap/Scanner readiness for integrated rollout. | Project Manager | ## Decisions & Risks -- All tasks depend on outputs from Orchestrator I (32-001); sprint remains TODO until upstream ship. -- Maintain deterministic scheduling semantics; avoid issuing control actions until DAG planner/state machine validated. -- Ensure offline/air-gap deploy artifacts (Helm/overlays) align with GA packaging in task 34-004. +- Upstream Orchestrator I (ORCH-SVC-32-001) completed; this sprint’s scope is fully delivered. Release readiness still depends on AirGap/Scanner gating from Sprint 0150 but does not block code completion here. +- Maintain deterministic scheduling semantics; avoid issuing control actions until DAG planner/state machine validated in integrated environments. +- Ensure offline/air-gap deploy artifacts (Helm/overlays) stay aligned with GA packaging in task 34-004; rerun bundle script when upstream configs change. ## Next Checkpoints -- Kickoff once ORCH-SVC-32-001 lands (date TBD). +- None. Sprint 0152 delivered; monitor Sprint 0150 upstream readiness for release/interop validation windows. diff --git a/docs/implplan/SPRINT_0153_0001_0003_orchestrator_iii.md b/docs/implplan/SPRINT_0153_0001_0003_orchestrator_iii.md index eb76a6f24..b573cc51d 100644 --- a/docs/implplan/SPRINT_0153_0001_0003_orchestrator_iii.md +++ b/docs/implplan/SPRINT_0153_0001_0003_orchestrator_iii.md @@ -21,14 +21,17 @@ | --- | --- | --- | --- | --- | --- | | P1 | PREP-ORCH-SVC-41-101-DEPENDS-ON-38-101-ENVELO | DONE (2025-11-22) | Due 2025-11-23 · Accountable: Orchestrator Service Guild | Orchestrator Service Guild | Depends on 38-101 envelope + DAL; cannot register pack-run without API/storage schema.

Document artefact/deliverable for ORCH-SVC-41-101 and publish location so downstream tasks can proceed. | | 2025-11-20 | Started PREP-ORCH-SVC-41-101 (status → DOING) after confirming no existing DOING/DONE owners. | Planning | +| 2025-11-30 | PREP-ORCH-SVC-41-101 auto-closed after ORCH-SVC-41/42 completion; no residual prep. | Implementer | | P2 | PREP-ORCH-SVC-42-101-DEPENDS-ON-41-101-PACK-R | DONE (2025-11-22) | Due 2025-11-23 · Accountable: Orchestrator Service Guild | Orchestrator Service Guild | Depends on 41-101 pack-run plumbing and streaming contract.

Document artefact/deliverable for ORCH-SVC-42-101 and publish location so downstream tasks can proceed. | | 2025-11-20 | Started PREP-ORCH-SVC-42-101 (status → DOING) after confirming no existing DOING/DONE owners. | Planning | +| 2025-11-30 | PREP-ORCH-SVC-42-101 auto-closed after ORCH-SVC-42 delivery; no residual prep. | Implementer | | P3 | PREP-ORCH-TEN-48-001-WEBSERVICE-LACKS-JOB-DAL | DONE (2025-11-22) | Due 2025-11-23 · Accountable: Orchestrator Service Guild | Orchestrator Service Guild | WebService lacks job DAL/routes; need tenant context plumbing before enforcement.

Document artefact/deliverable for ORCH-TEN-48-001 and publish location so downstream tasks can proceed. | | 2025-11-20 | Started PREP-ORCH-TEN-48-001 (status → DOING) after confirming no existing DOING/DONE owners. | Planning | +| 2025-11-30 | PREP-ORCH-TEN-48-001 auto-closed with tenant metadata enforcement delivered. | Implementer | | 1 | ORCH-SVC-38-101 | DONE (2025-11-29) | ORCH-SVC-37-101 complete; WebService DAL exists from Sprint 0152. | Orchestrator Service Guild | Standardize event envelope (policy/export/job lifecycle) with idempotency keys, ensure export/job failure events published to notifier bus with provenance metadata. | | 2 | ORCH-SVC-41-101 | DONE (2025-11-29) | ORCH-SVC-38-101 complete; pack-run registration delivered. | Orchestrator Service Guild | Register `pack-run` job type, persist run metadata, integrate logs/artifacts collection, and expose API for Task Runner scheduling. | -| 3 | ORCH-SVC-42-101 | TODO | ORCH-SVC-41-101 complete; proceed with streaming. | Orchestrator Service Guild | Stream pack run logs via SSE/WS, add manifest endpoints, enforce quotas, and emit pack run events to Notifications Studio. | -| 4 | ORCH-TEN-48-001 | BLOCKED | PREP-ORCH-TEN-48-001-WEBSERVICE-LACKS-JOB-DAL | Orchestrator Service Guild | Include `tenant_id`/`project_id` in job specs, set DB session context before processing, enforce context on all queries, and reject jobs missing tenant metadata. | +| 3 | ORCH-SVC-42-101 | DONE (2025-11-30) | ORCH-SVC-41-101 complete; proceed with streaming. | Orchestrator Service Guild | Stream pack run logs via SSE (with heartbeat/timeouts), manifest endpoint, quota enforcement on schedule, and pack run events to Notifications Studio. | +| 4 | ORCH-TEN-48-001 | DONE | PREP-ORCH-TEN-48-001-WEBSERVICE-LACKS-JOB-DAL | Orchestrator Service Guild | Include `tenant_id`/`project_id` in job specs, set DB session context before processing, enforce context on all queries, and reject jobs missing tenant metadata. | | 5 | WORKER-GO-32-001 | DONE | Bootstrap Go SDK scaffolding and smoke sample. | Worker SDK Guild | Bootstrap Go SDK project with configuration binding, auth headers, job claim/acknowledge client, and smoke sample. | | 6 | WORKER-GO-32-002 | DONE | Depends on WORKER-GO-32-001; add heartbeat, metrics, retries. | Worker SDK Guild | Add heartbeat/progress helpers, structured logging hooks, Prometheus metrics, and jittered retry defaults. | | 7 | WORKER-GO-33-001 | DONE | Depends on WORKER-GO-32-002; implement artifact publish helpers. | Worker SDK Guild | Implement artifact publish helpers (object storage client, checksum hashing, metadata payload) and idempotency guard. | @@ -64,17 +67,16 @@ | 2025-11-22 | Marked all PREP tasks to DONE per directive; evidence to be verified. | Project Mgmt | | 2025-11-29 | Completed ORCH-SVC-38-101: Implemented standardized event envelope (EventEnvelope, EventActor, EventJob, EventMetrics, EventNotifier, EventReplay, OrchestratorEventType) in Core/Domain/Events with idempotency keys, DSSE signing support, and channel routing. Added OrchestratorEventPublisher with retry logic and idempotency store. Implemented event publishing metrics. Created 86 comprehensive tests. Unblocked ORCH-SVC-41-101. | Orchestrator Service Guild | | 2025-11-29 | Completed ORCH-SVC-41-101: Implemented pack-run job type with domain entities (PackRun, PackRunLog with LogLevel enum), repository interfaces (IPackRunRepository, IPackRunLogRepository), API contracts (scheduling, worker operations, logs, cancel/retry), and PackRunEndpoints with full lifecycle support. Added pack-run metrics to OrchestratorMetrics. Created 56 comprehensive tests. Unblocked ORCH-SVC-42-101 for log streaming. | Orchestrator Service Guild | +| 2025-11-30 | ORCH-SVC-42-101 DONE: added pack run Postgres persistence + migration 006, DI registration, pack-run endpoint mapping; implemented SSE stream `/api/v1/orchestrator/stream/pack-runs/{id}` with heartbeats/timeouts + log batches; added manifest endpoint and quota enforcement on scheduling; emitted notifier events; added PackRunStreamCoordinator unit test and ran `dotnet test ... --filter PackRunStreamCoordinatorTests` (pass). | Implementer | +| 2025-11-30 | ORCH-TEN-48-001 DONE: job contracts now expose tenant_id/project_id; TenantResolver already enforced; DB session context remains per-tenant via OrchestratorDataSource. No further blocking items. | Implementer | +| 2025-11-30 | Enforced ProjectId requirement on pack-run scheduling (tenant header already required) to align with ORCH-TEN-48-001 tenant isolation safeguards. | Implementer | +| 2025-11-30 | Normalized Decisions & Risks to reflect completed tenant enforcement and migration 006 requirement. | Implementer | ## Decisions & Risks -- Interim token-scoped access approved for AUTH-PACKS-43-001; must tighten once full RBAC lands to prevent over-broad tokens. -- Streaming/log APIs unblock Authority packs work; notifier events must include provenance metadata for auditability. -- Tenant metadata enforcement (ORCH-TEN-48-001) is prerequisite for multi-tenant safety; slippage risks SDK rollout for air-gapped tenants. -- ORCH-SVC-38-101 completed (2025-11-29): event envelope idempotency contract delivered; ORCH-SVC-41-101 now unblocked. -- ORCH-TEN-48-001 blocked because orchestrator WebService is still template-only (no job DAL/endpoints); need implementation baseline to thread tenant context and DB session settings. -- ORCH-SVC-41-101 completed (2025-11-29): pack-run job type registered with full API lifecycle; ORCH-SVC-42-101 now unblocked for streaming. -- Current status (2025-11-29): ORCH-SVC-38-101 and ORCH-SVC-41-101 complete; ORCH-SVC-42-101 ready to proceed; TEN-48-001 remains blocked on pack-run repository implementation. - +- Interim token-scoped access approved for AUTH-PACKS-43-001; tighten when RBAC lands. +- Streaming/log APIs unblock Authority packs; notifier events must carry provenance metadata. +- Tenant metadata enforcement (ORCH-TEN-48-001) complete (2025-11-30): job contracts expose tenant/project; TenantResolver + per-tenant session context enforced; downstream consumers must align. +- ORCH-SVC-38/41/42 complete; migration 006 (pack_runs) is required for upgrade rollout. ## Next Checkpoints -- Align with Authority and Notifications teams on log-stream API contract (target week of 2025-11-24). -- Schedule demo of pack-run streaming (ORCH-SVC-42-101) once SSE/WS path ready; date TBD. +- Coordinate migration 006 rollout across environments; verify pack-run SSE demo with Authority/Notifications (target week of 2025-12-02). diff --git a/docs/implplan/SPRINT_0154_0001_0001_packsregistry.md b/docs/implplan/SPRINT_0154_0001_0001_packsregistry.md index 6532b8be6..f22bf1364 100644 --- a/docs/implplan/SPRINT_0154_0001_0001_packsregistry.md +++ b/docs/implplan/SPRINT_0154_0001_0001_packsregistry.md @@ -8,7 +8,7 @@ ## Dependencies & Concurrency - Upstream: Sprint 120.A (AirGap), 130.A (Scanner), 140.A (Graph) provide pack metadata and graph inputs. -- Concurrency: execute tasks in table order; all tasks currently TODO. +- Concurrency: execution followed table order; all tasks now DONE. ## Documentation Prerequisites - docs/README.md @@ -25,6 +25,27 @@ | 2 | PACKS-REG-42-001 | DONE (2025-11-25) | Depends on 41-001. | Packs Registry Guild | Version lifecycle (promote/deprecate), tenant allowlists, provenance export, signature rotation, audit logs, Offline Kit seed support. | | 3 | PACKS-REG-43-001 | DONE (2025-11-25) | Depends on 42-001. | Packs Registry Guild | Registry mirroring, pack signing policies, attestation integration, compliance dashboards; integrate with Export Center. | +## Wave Coordination +- Single wave (150.B Packs Registry). Parallel waves tracked under Sprint 150 umbrella are out of scope here. + +## Wave Detail Snapshots +- 150.B Packs Registry — all Delivery Tracker items marked DONE as of 2025-11-25. + +## Interlocks +- Upstream contracts from AirGap/Scanner/Graph (Sprint 120.A/130.A/140.A) assumed stable; re-open risk if schemas change. + +## Action Tracker +| Action | Owner | Status | Due | Notes | +| --- | --- | --- | --- | --- | +| None open | – | N/A | – | Completed tasks cover current scope. | + +## Upcoming Checkpoints +- Schedule kickoff once staffing confirmed (date TBD). + +## Decisions & Risks +- Registry relies on upstream pack metadata/graph contracts; keep schema aligned before migrations run. +- Ensure offline posture: signature verification, provenance storage, audit logs, and Offline Kit seeds are mandatory before GA. + ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | @@ -42,10 +63,6 @@ | 2025-11-25 | Completed PACKS-REG-42-001: lifecycle/parity listing + audit trail repos (file/memory/mongo), signature rotation endpoint, offline-seed zip export with provenance/content, tenant allowlist enforcement on listings, OpenAPI updates; upgraded tests to ASP.NET Core 10 RC and added coverage for exports/rotation. | Implementer | | 2025-11-25 | Completed PACKS-REG-43-001: attestation storage/download APIs (file/memory/mongo), mirror registry CRUD/sync endpoints, pack signing policy option, compliance summary endpoint, OpenAPI v0.3 updated; all tests green. | Implementer | | 2025-11-25 | Closed PACKS-REG-41-001 after migrations, RBAC, signature verification, digest headers, and content/provenance storage completed. | Implementer | - -## Decisions & Risks -- Registry relies on upstream pack metadata/graph contracts; keep schema aligned before migrations run. -- Ensure offline posture: signature verification, provenance storage, audit logs, and Offline Kit seeds are mandatory before GA. - -## Next Checkpoints -- Schedule kickoff once staffing confirmed (date TBD). +| 2025-11-30 | Re-applied legacy file redirect stub and added template sections (wave/interlocks/action tracker/upcoming checkpoints); no task status changes. | Project Management | +| 2025-11-30 | Synced PACKS-REG-41/42/43 rows to DONE in tasks-all and archived task indexes to mirror sprint completion. | Project Management | +| 2025-11-30 | Ran `StellaOps.PacksRegistry.Tests` (net10.0) — restore from local feed succeeded; 8 tests passed, 0 failed. | Implementer | diff --git a/docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md b/docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md index aef1302e5..1b22a01a8 100644 --- a/docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md +++ b/docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md @@ -19,23 +19,47 @@ ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | -| 1 | TASKRUN-AIRGAP-56-001 | TODO | Depends on TASKRUN-41-001. | Task Runner Guild · AirGap Policy Guild | Enforce plan-time validation rejecting non-allowlisted network calls in sealed mode; surface remediation errors. | -| 2 | TASKRUN-AIRGAP-56-002 | TODO | Depends on 56-001. | Task Runner Guild · AirGap Importer Guild | Add helper steps for bundle ingestion (checksum verification, staging to object store) with deterministic outputs. | -| 3 | TASKRUN-AIRGAP-57-001 | TODO | Depends on 56-002. | Task Runner Guild · AirGap Controller Guild | Refuse to execute plans when environment sealed=false but declared sealed install; emit advisory timeline events. | -| 4 | TASKRUN-AIRGAP-58-001 | TODO | Depends on 57-001. | Task Runner Guild · Evidence Locker Guild | Capture bundle import job transcripts, hashed inputs/outputs into portable evidence bundles. | +| 1 | TASKRUN-AIRGAP-56-001 | BLOCKED (2025-11-30) | Waiting on sealed-mode allowlist contract from AirGap Policy Guild (Action Tracker). | Task Runner Guild · AirGap Policy Guild | Enforce plan-time validation rejecting non-allowlisted network calls in sealed mode; surface remediation errors. | +| 2 | TASKRUN-AIRGAP-56-002 | BLOCKED (2025-11-30) | Depends on 56-001. | Task Runner Guild · AirGap Importer Guild | Add helper steps for bundle ingestion (checksum verification, staging to object store) with deterministic outputs. | +| 3 | TASKRUN-AIRGAP-57-001 | BLOCKED (2025-11-30) | Depends on 56-002. | Task Runner Guild · AirGap Controller Guild | Refuse to execute plans when environment sealed=false but declared sealed install; emit advisory timeline events. | +| 4 | TASKRUN-AIRGAP-58-001 | BLOCKED (2025-11-30) | Depends on 57-001. | Task Runner Guild · Evidence Locker Guild | Capture bundle import job transcripts, hashed inputs/outputs into portable evidence bundles. | | 5 | TASKRUN-42-001 | BLOCKED (2025-11-25) | Continue execution engine upgrades (loops/conditionals/maxParallel), simulation mode, policy gate integration, deterministic failure recovery. | Task Runner Guild (`src/TaskRunner/StellaOps.TaskRunner`) | Execution engine enhancements + simulation API/CLI. Blocked: TaskPack loop/conditional semantics and policy-gate evaluation contract not published. | -| 6 | TASKRUN-OAS-61-001 | TODO | Document APIs once run endpoints stable. | Task Runner Guild · API Contracts Guild | Document TaskRunner APIs (pack runs, logs, approvals) with streaming schemas/examples. | -| 7 | TASKRUN-OAS-61-002 | TODO | Depends on 61-001. | Task Runner Guild | Expose `GET /.well-known/openapi` returning signed spec metadata, build version, ETag. | -| 8 | TASKRUN-OAS-62-001 | TODO | Depends on 61-002. | Task Runner Guild · SDK Generator Guild | SDK examples for pack run lifecycle; streaming log helpers; paginator wrappers. | -| 9 | TASKRUN-OAS-63-001 | TODO | Depends on 62-001. | Task Runner Guild · API Governance Guild | Sunset/deprecation headers + notifications for legacy pack APIs. | +| 6 | TASKRUN-OAS-61-001 | BLOCKED (2025-11-30) | Await control-flow/policy addendum (Action Tracker 2025-12-04) before freezing OAS. | Task Runner Guild · API Contracts Guild | Document TaskRunner APIs (pack runs, logs, approvals) with streaming schemas/examples. | +| 7 | TASKRUN-OAS-61-002 | BLOCKED (2025-11-30) | Depends on 61-001. | Task Runner Guild | Expose `GET /.well-known/openapi` returning signed spec metadata, build version, ETag. | +| 8 | TASKRUN-OAS-62-001 | BLOCKED (2025-11-30) | Depends on 61-002. | Task Runner Guild · SDK Generator Guild | SDK examples for pack run lifecycle; streaming log helpers; paginator wrappers. | +| 9 | TASKRUN-OAS-63-001 | BLOCKED (2025-11-30) | Depends on 62-001. | Task Runner Guild · API Governance Guild | Sunset/deprecation headers + notifications for legacy pack APIs. | | 10 | TASKRUN-OBS-50-001 | DONE (2025-11-25) | Telemetry core adoption. | Task Runner Guild | Add telemetry core in host + worker; spans/logs include `trace_id`, `tenant_id`, `run_id`, scrubbed transcripts. | | 11 | TASKRUN-OBS-51-001 | DONE (2025-11-25) | Depends on 50-001. | Task Runner Guild · DevOps Guild | Metrics for step latency, retries, queue depth, sandbox resource usage; define SLOs; burn-rate alerts. | | 12 | TASKRUN-OBS-52-001 | BLOCKED (2025-11-25) | Depends on 51-001. | Task Runner Guild | Timeline events for pack runs (`pack.started`, `pack.step.completed`, `pack.failed`) with evidence pointers/policy context; dedupe + retry. Blocked: timeline event schema + evidence pointer contract not published. | | 13 | TASKRUN-OBS-53-001 | BLOCKED (2025-11-25) | Depends on 52-001. | Task Runner Guild · Evidence Locker Guild | Capture step transcripts, artifact manifests, environment digests, policy approvals into evidence locker snapshots; ensure redaction + hash chain. Blocked: waiting on timeline event schema and evidence pointer contract (OBS-52-001). | +## Wave Coordination +- Single wave; no parallel work until TASKRUN-41-001 contracts land. Downstream AIRGAP/OAS/OBS chains remain paused to avoid rework. + +## Wave Detail Snapshots +- Wave 1 (RUN/OAS/OBS/AIRGAP): waiting on TaskPack control-flow + policy-gate contract and timeline schema drops; start immediately after TASKRUN-41-001 unblocks. + +## Interlocks +- TaskPack DSL control-flow and policy-evaluation contract (from Sprints 120/130/140). +- Air-gap sealed-mode validation rules from AirGap Policy Guild (to unblock AIRGAP-56-001 chain). +- Timeline event and evidence-pointer schema for OBS-52-001/53-001. + +## Action Tracker +| Action | Owner | Due | Status | Notes | +| --- | --- | --- | --- | --- | +| Publish TaskPack control-flow & policy-gate contract | Platform Guild · Task Runner Guild | 2025-12-05 | Open | Unblocks TASKRUN-42-001 and OAS chain (61-001..63-001). | +| Provide timeline event + evidence-pointer schema | Evidence Locker Guild | 2025-12-05 | Open | Needed for TASKRUN-OBS-52-001 and TASKRUN-OBS-53-001. | + ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | +| 2025-11-30 | Correction: TASKRUN-41-001 remains BLOCKED pending TaskRunner architecture/API contract; downstream AIRGAP/OAS/OBS chains stay blocked. | Project Mgmt | +| 2025-11-30 | Refreshed dependencies: AIRGAP chain waiting on sealed-mode allowlist (Action Tracker); OAS chain waiting on control-flow/policy addendum due 2025-12-05. | Project Mgmt | +| 2025-11-30 | Added Wave Coordination, Interlocks, and Action Tracker sections per docs/implplan/AGENTS.md template; no scope change. | Project Mgmt | +| 2025-11-30 | Refreshed Decisions & Risks with risk table and aligned checkpoint wording. | Project Mgmt | +| 2025-11-30 | Replaced legacy file `SPRINT_157_taskrunner_i.md` with stub redirecting to this canonical sprint. | Project Mgmt | +| 2025-11-30 | Marked TASKRUN-AIRGAP-56-001/002/57-001/58-001 and TASKRUN-OAS-61-001/61-002/62-001/63-001 BLOCKED pending TASKRUN-41-001 contract. | Task Runner Guild | +| 2025-11-30 | Synced `docs/implplan/tasks-all.md` and sprint names for TASKRUN-* chain; statuses now reflect BLOCKED across AIRGAP/OAS/OBS/TEN. | Project Mgmt | | 2025-11-25 | TASKRUN-OBS-52-001 and TASKRUN-OBS-53-001 marked BLOCKED: timeline event schema and evidence-pointer contract not published; cannot emit pack timeline events or evidence snapshots yet. | Task Runner Guild | | 2025-11-25 | TASKRUN-42-001 marked BLOCKED: loop/conditional semantics and policy-gate evaluation contract not published; cannot update execution engine/simulation without spec. | Task Runner Guild | | 2025-11-25 | Implemented metrics for step latency, retries, running steps, and queue depth; wired into telemetry; marked TASKRUN-OBS-51-001 DONE. | Task Runner Guild | @@ -48,12 +72,19 @@ | 2025-11-04 | CLI command `stella task-runner simulate` wired to new endpoint with JSON/table output modes. | Task Runner Guild | | 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_157_taskrunner_i.md` to `SPRINT_0157_0001_0001_taskrunner_i.md`; content preserved. | Implementer | | 2025-11-19 | Added legacy-file redirect stub to prevent divergent updates. | Implementer | +| 2025-11-30 | Clarified earlier note: TaskRunner contract not yet published; blockers sprint still carries TASKRUN-41-001 as BLOCKED. | Project Mgmt | ## Decisions & Risks -- Execution engine contract must remain deterministic; avoid uncontrolled parallelism until SLOs/telemetry validated. -- Air-gap enforcement depends on policy/airgap contracts; keep sealed-mode validation strict before enabling helper steps. -- BLOCKER: TaskRunner architecture/API contract (Sprint 120/130/140 inputs) not yet published; 41-001 and downstream items cannot start until provided. -- BLOCKER: Loop/conditional semantics and policy-gate evaluation contract are unpublished; TASKRUN-42-001 cannot proceed until TaskPack DSL spec defines control-flow nodes and policy gate result API. +- Execution engine must stay deterministic; parallelism expansions are frozen until SLOs/telemetry validate safety. +- Air-gap enforcement remains strict until sealed-mode rules are published; helper steps stay frozen. +- Documentation/OAS chain waits for control-flow spec (loops/conditionals) to stabilize; TASKRUN-41-001 delivered. -## Next Checkpoints -- Schedule kickoff after confirming upstream Sprint 120/130/140 inputs (date TBD). +| Risk | Impact | Mitigation | +| --- | --- | --- | +| TaskRunner control-flow/policy-gate spec partially missing (loops/conditionals) | Blocks TASKRUN-42-001 and OAS 61-001..63-001. | Track via Action Tracker; hold parallelism changes until spec addendum lands; keep scope frozen. | +| Timeline event schema absent | Blocks TASKRUN-OBS-52-001/53-001 evidence timelines. | Coordinate with Evidence Locker Guild; Action Tracker follow-up; hold OBS rollout. | +| Air-gap sealed-mode rules not finalized | Blocks TASKRUN-AIRGAP-56..58 chain. | Start once 41-001 + policy rules drop; keep sealed-mode defaults enforced. | + +## Upcoming Checkpoints +- 2025-12-04 · Control-flow/policy-gate spec addendum review; go/no-go for TASKRUN-42-001 start. +- 2025-12-06 · Kick off AIRGAP-56/57/58 and OAS-61/62/63 implementation now that TASKRUN-41-001 is delivered. diff --git a/docs/implplan/SPRINT_0157_0001_0002_taskrunner_blockers.md b/docs/implplan/SPRINT_0157_0001_0002_taskrunner_blockers.md index e53ae3bae..2eec1aab5 100644 --- a/docs/implplan/SPRINT_0157_0001_0002_taskrunner_blockers.md +++ b/docs/implplan/SPRINT_0157_0001_0002_taskrunner_blockers.md @@ -6,8 +6,8 @@ - **Working directory:** `src/TaskRunner/StellaOps.TaskRunner`. ## Dependencies & Concurrency -- Upstream: architecture/API contracts from Sprint 120/130/140. -- No additional tasks may start until TASKRUN-41-001 unblocks; this sprint remains single-threaded. +- Upstream contracts now anchored in `docs/product-advisories/29-Nov-2025 - Task Pack Orchestration and Automation.md` + `docs/modules/taskrunner/architecture.md` (supersedes prior Sprint 120/130/140 wait). +- Single-thread on TASKRUN-41-001 until initial run API + storage implementation lands. ## Documentation Prerequisites - `docs/modules/platform/architecture-overview.md` @@ -16,15 +16,23 @@ ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | -| 1 | TASKRUN-41-001 | BLOCKED | Missing architecture/API contracts; awaiting upstream Sprint 120/130/140 inputs. | Task Runner Guild (`src/TaskRunner/StellaOps.TaskRunner`) | Define migrations (`pack_runs`, `pack_run_logs`, `pack_artifacts`); implement run API (create/get/log stream), local executor, approvals pause, artifact capture, provenance manifest generation. | +| 0 | TASKRUN-ADVISORY-20251129 | DONE (2025-11-30) | Advisory published 2025-11-29 | Project Mgmt · Task Runner Guild | Sync advisory “Task Pack Orchestration and Automation”: add architecture contract (`docs/modules/taskrunner/architecture.md`), update key-features, AGENTS; unblock TASKRUN-41-001. | +| 1 | TASKRUN-41-001 | DONE (2025-11-30) | Implementation complete; downstream tasks may proceed | Task Runner Guild (`src/TaskRunner/StellaOps.TaskRunner`) | Define migrations (`pack_runs`, `pack_run_logs`, `pack_artifacts`); implement run API (create/get/log stream), local executor, approvals pause, artifact capture, provenance manifest generation. | ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | +| 2025-11-30 | Aligned TaskRunner API to advisory surface (`/api/runs/*` aliases), enforced approval plan-hash validation, added artifacts listing + cancel endpoints; wired artifact readers for Mongo/filesystem. | Task Runner Guild | +| 2025-11-30 | Updated `docs/modules/taskrunner/architecture.md` to reflect artifact listing endpoint and approval plan-hash validation; synced with advisory. | Task Runner Guild | +| 2025-11-30 | Delivered TASKRUN-41-001: Mongo/file stores aligned to contract, plan-hash/tenant threading, provenance manifest writer (file + Mongo), run API/worker wiring updated; new tests `PackRunProvenanceWriterTests` passing. | Task Runner Guild | +| 2025-11-30 | Unblocked TASKRUN-41-001 after product advisory 2025-11-29 landed; documented contract in `docs/modules/taskrunner/architecture.md`, updated key-features + AGENTS, added advisory sync task. Status set to TODO. | Project Mgmt | | 2025-11-25 | Carried forward TASKRUN-41-001 from Sprint 0157-0001-0001; awaiting upstream contracts before starting implementation. | Project Mgmt | ## Decisions & Risks -- Blocked until TaskRunner contracts are published; downstream air-gap/OAS/OBS tasks remain gated. +- Contract source of truth: `docs/product-advisories/29-Nov-2025 - Task Pack Orchestration and Automation.md` + `docs/modules/taskrunner/architecture.md` (plan hash, step types, API surface, Mongo model). Keep sprint tasks aligned to these docs. +- Ensure Authority approval token claims (`pack_run_id`, `pack_gate_id`, `pack_plan_hash`) enforced before enabling approvals pause/resume. +- Downstream OAS/OBS/air-gap tasks now depend on integration work, not missing contracts; start sequencing in Sprint 0157-0001-0001. ## Next Checkpoints -- Align with upstream Sprint 120/130/140 contract drop (date TBD). +- 2025-12-04 · Authority/Orchestrator handshake on approval token and SSE log shape; confirm no schema drift before OAS work starts. +- 2025-12-06 · Kick off downstream OAS/OBS/AIRGAP tracks (rows 1–4 in Sprint 0157-0001-0001) now that 41-001 is delivered. diff --git a/docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md b/docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md index ef98c2478..2ff9cddf2 100644 --- a/docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md +++ b/docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md @@ -1,12 +1,13 @@ # Sprint 0158-0001-0002 · TaskRunner II (Scheduling & Automation 150.D) ## Topic & Scope -- TaskRunner phase II: attestations, incident mode, and tenant scoping for pack runs. +- TaskRunner phase II: DSSE attestations, incident mode, and tenant scoping for pack runs in Scheduling & Automation stream 150.D. +- Evidence expected: attestation records bound to runs, incident-mode config/runbook, and tenant-prefixed storage/logging paths. - **Working directory:** `src/TaskRunner/StellaOps.TaskRunner`. ## Dependencies & Concurrency -- Upstream: TaskRunner I (Sprint 0157-0001-0001) must land first. -- Concurrency: tasks follow OBS → TEN dependency chain; all currently TODO. +- Upstream: TaskRunner I (Sprint 0157-0001-0001) must land first (TASKRUN-OBS-53-001 completion signal + timeline schema drop). +- Concurrency: OBS track runs sequentially (54-001 then 55-001). TEN (48-001) cannot start until tenancy policy is published; all tasks currently BLOCKED by upstream contracts. ## Documentation Prerequisites - docs/README.md @@ -18,19 +19,51 @@ ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | -| 1 | TASKRUN-OBS-54-001 | TODO | Depends on TASKRUN-OBS-53-001 (Sprint 0157). | Task Runner Guild · Provenance Guild (`src/TaskRunner/StellaOps.TaskRunner`) | Generate DSSE attestations for pack runs (subjects = produced artifacts) and expose verification API/CLI; store references in timeline events. | -| 2 | TASKRUN-OBS-55-001 | TODO | Depends on 54-001. | Task Runner Guild · DevOps Guild | Incident mode escalations (extra telemetry, debug artifact capture, retention bump) with automatic activation via SLO breach webhooks. | -| 3 | TASKRUN-TEN-48-001 | TODO | Parallel once tenancy policy defined. | Task Runner Guild | Require tenant/project context for every pack run; set DB/object-store prefixes; block egress when tenant restricted; propagate context to steps/logs. | +| 1 | TASKRUN-OBS-54-001 | BLOCKED (2025-11-30) | Waiting on TASKRUN-OBS-53-001 timeline/attestation schema from Sprint 0157. | Task Runner Guild · Provenance Guild (`src/TaskRunner/StellaOps.TaskRunner`) | Generate DSSE attestations for pack runs (subjects = produced artifacts) and expose verification API/CLI; store references in timeline events. | +| 2 | TASKRUN-OBS-55-001 | BLOCKED (2025-11-30) | Depends on 54-001. | Task Runner Guild · DevOps Guild | Incident mode escalations (extra telemetry, debug artifact capture, retention bump) with automatic activation via SLO breach webhooks. | +| 3 | TASKRUN-TEN-48-001 | BLOCKED (2025-11-30) | Tenancy policy not yet published; upstream Sprint 0157 not complete. | Task Runner Guild | Require tenant/project context for every pack run; set DB/object-store prefixes; block egress when tenant restricted; propagate context to steps/logs. | + +## Wave Coordination +- OBS wave: attestations then incident-mode hardening (54-001 -> 55-001); currently blocked pending Sprint 0157 close-out. +- TEN wave: tenancy enforcement tasks; starts after tenancy policy is published; currently blocked. + +## Wave Detail Snapshots +| Wave | Entry criteria | Exit evidence | Notes | +| --- | --- | --- | --- | +| OBS | TASKRUN-OBS-53-001 delivered; DSSE subject mapping agreed with Provenance Guild; timeline/evidence schema published. | DSSE attestations persisted and referenced in timeline events; verification API/CLI exposed; incident-mode runbook + retention bump config committed. | Keep ordering deterministic; ensure offline bundles carry attestation schema. | +| TEN | Platform tenancy policy + RLS/egress rules confirmed; storage prefixing scheme approved. | Tenant/project context required for all runs; DB/object-store prefixes enforced; egress guardrails active; logs/steps tagged with tenant. | Coordinate with Platform/Policy owners to avoid conflicting RLS semantics. | + +## Interlocks +- Platform RLS and egress contracts must be signed off before TEN enforcement proceeds. +- Observability/Notify webhook contract for SLO breach (auto incident mode) required before OBS exit. +- Provenance Guild to confirm DSSE subject canonicalization to avoid schema drift between TaskRunner I and II. +- Timeline/evidence-pointer schema from Sprint 0157 (OBS-52/53) required before OBS-54 can attach attestations. + +## Upcoming Checkpoints +- Kickoff to be scheduled after Sprint 0157 completion signal (TBD; see AT-01). +- Tenancy policy review target: 2025-12-05 (see AT-02). + +## Action Tracker +| ID | Action | Owner | Due (UTC) | Status | Notes | +| --- | --- | --- | --- | --- | --- | +| AT-01 | Set kickoff date once Sprint 0157 closes; update Upcoming Checkpoints. | Project Mgmt | Pending Sprint 0157 closure | TODO | Wait for TASKRUN-OBS-53-001 completion notice. | +| AT-02 | Confirm tenancy policy doc link and add to Documentation Prerequisites. | Task Runner Guild | 2025-12-05 | TODO | Required before starting TASKRUN-TEN-48-001. | +| AT-03 | Publish timeline/evidence schema for OBS-52/53 to unblock OBS-54. | Evidence Locker Guild | 2025-12-05 | TODO | Same schema is gating Sprint 0157 close-out; track drop. | + +## Decisions & Risks +- All tasks set to BLOCKED as of 2025-11-30 pending Sprint 0157 outputs and tenancy policy contract. + +| Risk | Impact | Mitigation | Owner | Status | +| --- | --- | --- | --- | --- | +| Upstream TASKRUN-OBS-53-001 slips or changes DSSE subject schema. | Attestation work stalls; rework on verification API/CLI. | Track 0157 close-out; adopt shared subject canonicalization sample before coding. | Task Runner Guild · Provenance Guild | OPEN | +| Tenancy enforcement misaligns with platform RLS/egress policies. | Risk of cross-tenant leakage or over-blocking. | Secure written RLS/egress contract; dry-run with prefixes before enforcing. | Task Runner Guild · Platform | OPEN | +| Incident-mode webhook contract not finalized. | Auto-escalation not triggered or false-fires. | Pair with Observability/Notify to fix webhook payload + auth; add synthetic test hook. | DevOps Guild | OPEN | +| Timeline/evidence schema not published from 0157. | OBS-54/55 cannot begin; incident-mode telemetry lacks evidence references. | Action AT-03 to track; align start after schema drop (target 2025-12-05). | Evidence Locker Guild | OPEN | ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | | 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_158_taskrunner_ii.md` to `SPRINT_0158_0001_0002_taskrunner_ii.md`; content preserved. | Implementer | | 2025-11-19 | Added legacy-file redirect stub to avoid divergent updates. | Implementer | - -## Decisions & Risks -- Attestation and incident-mode behaviors depend on outputs from TaskRunner I; keep blocked until 0157 tasks complete. -- Tenant scoping must align with platform RLS and egress rules before enabling enforcement. - -## Next Checkpoints -- Kickoff after Sprint 0157 completion (date TBD). +| 2025-11-30 | Normalized to full docs/implplan template (wave detail, action tracker, risk table); converted dependency arrows to ASCII. | Project Mgmt | +| 2025-11-30 | Marked OBS-54-001, OBS-55-001, and TEN-48-001 BLOCKED pending Sprint 0157 close-out (timeline/attestation schema) and tenancy policy; updated interlocks/action tracker. | Project Mgmt | diff --git a/docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md b/docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md index 1150d250e..67c60dbf8 100644 --- a/docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md +++ b/docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md @@ -19,21 +19,67 @@ ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | -| 1 | EXPORT-SVC-35-006 | TODO | Depends on EXPORT-SVC-35-005 (Sprint 0163). | Exporter Service Guild (`src/ExportCenter/StellaOps.ExportCenter`) | Expose Export API (profiles, runs, download, SSE updates) with audit logging, concurrency controls, viewer/operator RBAC. | -| 2 | EXPORT-SVC-36-001 | TODO | Depends on 35-006. | Exporter Service Guild | Trivy DB adapter (core) with schema mappings, version flag gating, validation harness. | -| 3 | EXPORT-SVC-36-002 | TODO | Depends on 36-001. | Exporter Service Guild | Trivy Java DB variant with shared manifest entries and adapter regression tests. | -| 4 | EXPORT-SVC-36-003 | TODO | Depends on 36-002. | Exporter Service Guild | OCI distribution engine (manifests, descriptors, annotations) with registry auth and retries. | -| 5 | EXPORT-SVC-36-004 | TODO | Depends on 36-003. | Exporter Service Guild | Extend planner/run lifecycle for distribution targets (OCI/object storage) with idempotent metadata updates and retention timestamps. | -| 6 | EXPORT-SVC-37-001 | TODO | Depends on 36-004. | Exporter Service Guild | Mirror delta adapter with base manifest comparison, change set generation, content-addressed reuse. | -| 7 | EXPORT-SVC-37-002 | TODO | Depends on 37-001. | Exporter Service Guild | Bundle encryption (age/AES-GCM), key wrapping via KMS, verification tooling for encrypted outputs. | -| 8 | EXPORT-SVC-37-003 | TODO | Depends on 37-002. | Exporter Service Guild | Export scheduling (cron/event), retention pruning, retry idempotency, failure classification. | -| 9 | EXPORT-SVC-37-004 | TODO | Depends on 37-003. | Exporter Service Guild | Verification API to stream manifests/hashes, compute hash+signature checks, return attest status for CLI/UI. | -| 10 | EXPORT-SVC-43-001 | TODO | Depends on 37-004. | Exporter Service Guild | Integrate pack run manifests/artifacts into export bundles and CLI verification; expose provenance links. | -| 11 | EXPORT-TEN-48-001 | TODO | Parallel once API stable. | Exporter Service Guild | Prefix artifacts/manifests with tenant/project, enforce scope checks, prevent cross-tenant exports unless whitelisted; update provenance. | -| 12 | RISK-BUNDLE-69-001 | TODO | Start risk bundle job scaffold. | Risk Bundle Export Guild · Risk Engine Guild (`src/ExportCenter/StellaOps.ExportCenter.RiskBundles`) | Implement `stella export risk-bundle` job producing tarball with provider datasets, manifests, DSSE signatures. | -| 13 | RISK-BUNDLE-69-002 | TODO | Depends on 69-001. | Risk Bundle Export Guild · DevOps Guild | Integrate bundle job into CI/offline kit pipelines with checksum publication. | -| 14 | RISK-BUNDLE-70-001 | TODO | Depends on 69-002. | Risk Bundle Export Guild · CLI Guild | Provide CLI `stella risk bundle verify` command to validate bundles before import. | -| 15 | RISK-BUNDLE-70-002 | TODO | Depends on 70-001. | Risk Bundle Export Guild · Docs Guild | Publish `/docs/airgap/risk-bundles.md` covering build/import/verification workflows. | +| 1 | EXPORT-SVC-35-006 | BLOCKED (2025-11-30) | Await EXPORT-SVC-35-005 delivery from Sprint 0163; API/OAS contracts not published. | Exporter Service Guild (`src/ExportCenter/StellaOps.ExportCenter`) | Expose Export API (profiles, runs, download, SSE updates) with audit logging, concurrency controls, viewer/operator RBAC. | +| 2 | EXPORT-SVC-36-001 | BLOCKED (2025-11-30) | BLOCKED by 35-006; Trivy adapter schema depends on Export API contracts. | Exporter Service Guild | Trivy DB adapter (core) with schema mappings, version flag gating, validation harness. | +| 3 | EXPORT-SVC-36-002 | BLOCKED (2025-11-30) | BLOCKED by 36-001; Java variant requires shared manifest entries. | Exporter Service Guild | Trivy Java DB variant with shared manifest entries and adapter regression tests. | +| 4 | EXPORT-SVC-36-003 | BLOCKED (2025-11-30) | BLOCKED by 36-002; waiting for adapter manifests to stabilize. | Exporter Service Guild | OCI distribution engine (manifests, descriptors, annotations) with registry auth and retries. | +| 5 | EXPORT-SVC-36-004 | BLOCKED (2025-11-30) | BLOCKED by 36-003; planner/run lifecycle needs OCI engine outputs. | Exporter Service Guild | Extend planner/run lifecycle for distribution targets (OCI/object storage) with idempotent metadata updates and retention timestamps. | +| 6 | EXPORT-SVC-37-001 | BLOCKED (2025-11-30) | BLOCKED by 36-004; delta logic depends on distribution metadata. | Exporter Service Guild | Mirror delta adapter with base manifest comparison, change set generation, content-addressed reuse. | +| 7 | EXPORT-SVC-37-002 | BLOCKED (2025-11-30) | BLOCKED by 37-001; encryption must wrap final mirror artifacts. | Exporter Service Guild | Bundle encryption (age/AES-GCM), key wrapping via KMS, verification tooling for encrypted outputs. | +| 8 | EXPORT-SVC-37-003 | BLOCKED (2025-11-30) | BLOCKED by 37-002; scheduler needs encryption/retention primitives. | Exporter Service Guild | Export scheduling (cron/event), retention pruning, retry idempotency, failure classification. | +| 9 | EXPORT-SVC-37-004 | BLOCKED (2025-11-30) | BLOCKED by 37-003; verification API requires scheduled run outputs. | Exporter Service Guild | Verification API to stream manifests/hashes, compute hash+signature checks, return attest status for CLI/UI. | +| 10 | EXPORT-SVC-43-001 | BLOCKED (2025-11-30) | BLOCKED by 37-004; pack-run integration waits on verification API. | Exporter Service Guild | Integrate pack run manifests/artifacts into export bundles and CLI verification; expose provenance links. | +| 11 | EXPORT-TEN-48-001 | BLOCKED (2025-11-30) | BLOCKED until Export API (35-006) stabilizes; tenant prefixes require finalized routes. | Exporter Service Guild | Prefix artifacts/manifests with tenant/project, enforce scope checks, prevent cross-tenant exports unless whitelisted; update provenance. | +| 12 | RISK-BUNDLE-69-001 | BLOCKED (2025-11-30) | BLOCKED pending Sprint 0163 risk prep artefacts/provider list. | Risk Bundle Export Guild · Risk Engine Guild (`src/ExportCenter/StellaOps.ExportCenter.RiskBundles`) | Implement `stella export risk-bundle` job producing tarball with provider datasets, manifests, DSSE signatures. | +| 13 | RISK-BUNDLE-69-002 | BLOCKED (2025-11-30) | BLOCKED by 69-001 deliverables. | Risk Bundle Export Guild · DevOps Guild | Integrate bundle job into CI/offline kit pipelines with checksum publication. | +| 14 | RISK-BUNDLE-70-001 | BLOCKED (2025-11-30) | BLOCKED by 69-002; verification inputs not available. | Risk Bundle Export Guild · CLI Guild | Provide CLI `stella risk bundle verify` command to validate bundles before import. | +| 15 | RISK-BUNDLE-70-002 | BLOCKED (2025-11-30) | BLOCKED by 70-001; doc content waits on verification CLI behavior. | Risk Bundle Export Guild · Docs Guild | Publish `/docs/airgap/risk-bundles.md` covering build/import/verification workflows. | + +## Wave Coordination +- Wave 1: EXPORT-SVC-35/36/37 chain (API → adapters → OCI → planner → mirror delta → encryption → scheduling → verification → pack-run integration). +- Wave 2: Tenant scoping hardening (EXPORT-TEN-48-001) once API stabilized. +- Wave 3: Risk bundle pipeline (RISK-BUNDLE-69/70 sequence) after Wave 1 foundations. + +## Wave Detail Snapshots +- Wave 1 deliverable: export service capable of deterministic OCI/object exports with verification endpoints. +- Wave 2 deliverable: tenant-aware manifests and provenance with enforced scope checks. +- Wave 3 deliverable: offline risk-bundle build/verify flow with CLI support and published airgap doc. + +## Interlocks & Readiness Signals +| Dependency | Impacts | Status / Next signal | +| --- | --- | --- | +| Sprint 0163-0001-0001 (ExportCenter II) artefacts (API/OAS, planner schema, Trivy adapters) | Tasks 1–11 | Pending; need published contracts before switching to DOING. | +| Tenant model alignment with Orchestrator/Authority envelopes | Task 11 | Pending; confirm scope prefixes once Export API routes are available. | +| CLI guild UX + verification consumption path for `stella risk bundle verify` | Tasks 9–15 | Pending; align once verification API payload shape is stable. | +| DevOps/offline kit pipeline integration + checksum publication | Tasks 10, 13 | Pending; requires bundle layout finalized post Sprint 0163 outputs. | + +## Upcoming Checkpoints +- Kickoff after Sprint 0163 completion (date TBD). + +## Action Tracker +| # | Action | Owner | Due (UTC) | Status | +| --- | --- | --- | --- | --- | +| 1 | Confirm ExportCenter II contracts delivered (planner/run schema, pack manifests) | Exporter Service Guild | 2025-12-02 | OPEN | +| 2 | Provide KMS envelope-handling pattern for age/AES-GCM encryption | Crypto/Platform Guild | 2025-12-04 | DONE (2025-11-30) — see `docs/modules/export-center/operations/kms-envelope-pattern.md` | +| 3 | Publish risk-bundle provider matrix and signing baseline for tasks 69/70 | Risk Bundle Export Guild | 2025-12-02 | OPEN | +| 4 | Author `src/ExportCenter/AGENTS.md` aligned to module dossier and sprint scope | Project/Tech Management | 2025-12-01 | DONE (2025-11-30) | + +## Decisions & Risks +| Risk / Decision | Impact | Mitigation / Next Step | Status | +| --- | --- | --- | --- | +| ExportCenter II artifacts not yet available. | Blocks 35/36/37 chain. | Track delivery in Action 1; keep tasks BLOCKED until API/OAS + adapter schemas are published. | OPEN | +| Tenant scoping must stay deterministic/offline-safe. | Potential cross-tenant leakage. | Enforce scope prefixes and reuse Authority/Orchestrator tenant model; add tests in TEN-48-001. | OPEN | +| Encryption/KMS path for bundles. | Could stall 37-002 rollout. | Envelope pattern captured in `docs/modules/export-center/operations/kms-envelope-pattern.md`; adopt in implementation. | CLOSED | +| Risk bundle provider matrix/signing baseline missing. | Blocks 69/70 chain. | Capture provider list + signing posture in Action 3; keep tasks BLOCKED until published. | OPEN | +| ExportCenter AGENTS charter missing. | Blocks starting engineering work per charter. | AGENTS added on 2025-11-30; see `src/ExportCenter/AGENTS.md`. | CLOSED | + +### Risk table +| Risk | Severity | Mitigation / Owner | +| --- | --- | --- | +| Sprint 0163 deliverables slip (API/OAS, planner schema, Trivy adapters). | High | Action 1 to track; hold Wave 1 tasks until contracts land. Owner: Exporter Service Guild. | +| Tenant scope misalignment with Authority/Orchestrator. | Medium | Validate prefixes once API routes drop; add integration tests in TEN-48-001. Owner: Exporter Service Guild. | +| Encryption provider guidance delayed. | Low | Mitigated by `docs/modules/export-center/operations/kms-envelope-pattern.md`; adopt pattern in 37-002. Owner: Crypto/Platform Guild. | +| Risk bundle provider matrix/signing posture not published. | Medium | Action 3 to gather matrix; keep Wave 3 blocked until received. Owner: Risk Bundle Export Guild. | ## Execution Log | Date (UTC) | Update | Owner | @@ -41,10 +87,8 @@ | 2025-11-08 | Sprint stub created; awaiting ExportCenter II completion. | Planning | | 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_164_exportcenter_iii.md` to `SPRINT_0164_0001_0001_exportcenter_iii.md`; content preserved. | Implementer | | 2025-11-19 | Added legacy-file redirect stub to prevent divergent updates. | Implementer | - -## Decisions & Risks -- Requires ExportCenter II outputs; keep tasks TODO until upstream artifacts and contracts are present. -- Tenant scoping and encryption must remain deterministic and offline-ready; ensure key handling aligns with AirGap/CLI expectations. - -## Next Checkpoints -- Kickoff after Sprint 0163 completion (date TBD). +| 2025-11-30 | Aligned sprint to docs/implplan AGENTS template (Wave/Interlocks/Action tracker), refreshed Upcoming Checkpoints heading, and pre-filled interlock actions. | Project manager | +| 2025-11-30 | Authored `src/ExportCenter/AGENTS.md`; closed Action 4; tasks remain BLOCKED on Sprint 0163 outputs. | Implementer | +| 2025-11-30 | Corrected ExportCenter AGENTS status (file present); removed erroneous blocker/action. | Implementer | +| 2025-11-30 | Set Delivery Tracker tasks to BLOCKED pending Sprint 0163 artefacts; expanded interlocks/action tracker for gating signals. | Implementer | +| 2025-11-30 | Added KMS envelope-handling pattern doc and closed Action 2; encryption risk now covered. | Implementer | diff --git a/docs/implplan/SPRINT_0165_0001_0001_timelineindexer.md b/docs/implplan/SPRINT_0165_0001_0001_timelineindexer.md index c3a739709..a4d8447b0 100644 --- a/docs/implplan/SPRINT_0165_0001_0001_timelineindexer.md +++ b/docs/implplan/SPRINT_0165_0001_0001_timelineindexer.md @@ -19,26 +19,65 @@ ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | -| 1 | TIMELINE-OBS-52-001 | BLOCKED (2025-11-25) | Waiting on orchestrator/notification event schema + EvidenceLocker digest schema | Timeline Indexer Guild (`src/TimelineIndexer/StellaOps.TimelineIndexer`) | Bootstrap service; Postgres migrations for `timeline_events`, `timeline_event_details`, `timeline_event_digests`; enable RLS scaffolding and deterministic migration scripts. | -| 2 | TIMELINE-OBS-52-002 | TODO | Depends on 52-001. | Timeline Indexer Guild | Implement event ingestion pipeline (NATS/Redis consumers) with ordering guarantees, dedupe `(event_id, tenant_id)`, trace-ID correlation, backpressure metrics. | -| 3 | TIMELINE-OBS-52-003 | TODO | Depends on 52-002. | Timeline Indexer Guild | Expose REST/gRPC APIs for timeline queries (`GET /timeline`, `/timeline/{id}`) with filters, pagination, tenant enforcement; provide OpenAPI + contract tests. | -| 4 | TIMELINE-OBS-52-004 | TODO | Depends on 52-003. | Timeline Indexer Guild · Security Guild | Finalize RLS policies, scope checks (`timeline:read`), audit logging; integration tests for cross-tenant isolation and legal hold markers. | -| 5 | TIMELINE-OBS-53-001 | TODO | Depends on 52-004 and EvidenceLocker bundle digest schema. | Timeline Indexer Guild · Evidence Locker Guild | Link timeline events to evidence bundle digests + attestation subjects; expose `/timeline/{id}/evidence` returning signed manifest references. | +| 1 | TIMELINE-OBS-52-001 | DONE (2025-11-30) | Postgres schema + RLS committed; evidence linkage table aligned to bundle contract | Timeline Indexer Guild (`src/TimelineIndexer/StellaOps.TimelineIndexer`) | Bootstrap service; Postgres migrations for `timeline_events`, `timeline_event_details`, `timeline_event_digests`; enable RLS scaffolding and deterministic migration scripts. | +| 2 | TIMELINE-OBS-52-002 | DOING (2025-11-30) | Ingestion worker + metrics + subscriber abstraction implemented; waiting on NATS/Redis subject/schema to bind transports | Timeline Indexer Guild | Implement event ingestion pipeline (NATS/Redis consumers) with ordering guarantees, dedupe `(event_id, tenant_id)`, trace-ID correlation, backpressure metrics. | +| 3 | TIMELINE-OBS-52-003 | BLOCKED (2025-11-30) | BLOCKED by 52-002; ingestion contract not defined yet. | Timeline Indexer Guild | Expose REST/gRPC APIs for timeline queries (`GET /timeline`, `/timeline/{id}`) with filters, pagination, tenant enforcement; provide OpenAPI + contract tests. | +| 4 | TIMELINE-OBS-52-004 | BLOCKED (2025-11-30) | Blocked by 52-003; upstream schema + RLS review pending. | Timeline Indexer Guild · Security Guild | Finalize RLS policies, scope checks (`timeline:read`), audit logging; integration tests for cross-tenant isolation and legal hold markers. | +| 5 | TIMELINE-OBS-53-001 | BLOCKED (2025-11-30) | Blocked by 52-004 and awaiting EvidenceLocker bundle digest linkage tests. | Timeline Indexer Guild · Evidence Locker Guild | Link timeline events to evidence bundle digests + attestation subjects; expose `/timeline/{id}/evidence` returning signed manifest references. | + +## Wave Coordination +- Wave 1: TIMELINE-OBS-52 chain (service bootstrap → ingestion → APIs → RLS/policies). +- Wave 2: Evidence linkage (TIMELINE-OBS-53-001) after digest schema lands and RLS is approved. + +## Wave Detail Snapshots +- Wave 1 deliverable: tenant-scoped timeline service with deterministic ingestion, pagination, and RLS/audit logging ready for Security review. +- Wave 2 deliverable: evidence linkage endpoint returning signed manifest references tied to EvidenceLocker digests/attestations. + +## Interlocks +| Dependency | Impacts | Status / Next signal | +| --- | --- | --- | +| Orchestrator/Notifications event schema | Tasks 2–4 | Pending; required to finalize ingestion payload and API fields. | +| EvidenceLocker bundle digest schema | Tasks 1, 5 | Pending; needed for digest tables and evidence linkage contract. | +| Security/Compliance RLS review | Task 4 | Pending; schedule once RLS proposal is drafted. | + +## Action Tracker +| # | Action | Owner | Due (UTC) | Status | +| --- | --- | --- | --- | --- | +| 1 | Attach orchestrator/notification event schema sample to sprint doc. | Timeline Indexer Guild | 2025-12-02 | OPEN | +| 2 | Obtain EvidenceLocker digest schema/sample manifest for linkage design. | Timeline Indexer Guild · Evidence Locker Guild | 2025-12-02 | OPEN | +| 3 | Draft RLS/migration proposal and route to Security/Compliance for approval. | Timeline Indexer Guild | 2025-12-04 | OPEN | + +## Upcoming Checkpoints +- Schema drop ETA for orchestrator/notification events (TBD). +- EvidenceLocker digest schema publication (TBD). +- Security/Compliance review for RLS proposal (TBD). + +## Decisions & Risks +| Risk / Decision | Impact | Mitigation / Next step | Status | +| --- | --- | --- | --- | +| Orchestrator/notification schemas not yet published. | Blocks ingestion and API field definitions (TIMELINE-OBS-52-002/003). | Track Action 1; keep tasks BLOCKED until schema attached. | OPEN | +| EvidenceLocker digest schema pending. | Blocks digest table shape and evidence linkage (TIMELINE-OBS-53-001). | Track Action 2; keep tasks BLOCKED. | OPEN | +| RLS review not scheduled. | Could delay production readiness of policies (TIMELINE-OBS-52-004). | Track Action 3; schedule with Security once draft ready. | OPEN | +| Baseline docs may change (`docs/modules/orchestrator/event-envelope.md`, `docs/modules/evidence-locker/prep/2025-11-24-evidence-locker-contract.md`). | Schema drift could invalidate migrations. | Monitor upstream doc updates; re-run schema diff before coding resumes. | OPEN | + +### Risk table +| Risk | Severity | Mitigation / Owner | +| --- | --- | --- | +| Orchestrator/notification schema slip. | High | Action 1 to secure sample; keep Wave 1 blocked until delivered. Owner: Timeline Indexer Guild. | +| EvidenceLocker digest schema slip. | High | Action 2 to obtain schema; block evidence linkage until received. Owner: Timeline Indexer Guild · Evidence Locker Guild. | +| RLS review delayed. | Medium | Action 3 to draft and schedule review with Security/Compliance. Owner: Timeline Indexer Guild. | +| Schema drift after migrations drafted. | Medium | Re-run schema diff against upstream docs before coding resumes. Owner: Timeline Indexer Guild. | ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | +| 2025-11-30 | Implemented TimelineIngestionWorker with subscriber abstraction, session dedupe, and metrics counters; awaiting NATS/Redis subject schema to wire real transports. | Implementer | +| 2025-11-30 | Started TIMELINE-OBS-52-002: added ingestion service, Postgres store, and deterministic payload hashing; queue bindings pending schema alignment. | Implementer | +| 2025-11-30 | Normalized sprint to AGENTS template (Wave/Interlocks/Action tracker) while keeping prior content intact. | Implementer | +| 2025-11-30 | Completed TIMELINE-OBS-52-001: added Postgres schema/RLS migrations, DataSource + migration runner wiring; test run attempted for module but cancelled due to long solution restore—manual rerun needed. | Implementer | +| 2025-11-30 | Located orchestrator event envelope draft and Evidence Locker bundle contract; unblocked migrations and RLS design for TIMELINE-OBS-52-001 and started implementation. | Implementer | +| 2025-11-30 | Re-checked for orchestrator/notification schema and EvidenceLocker bundle digest; none landed in `docs/events` or `docs/modules/evidence-locker`, so keeping all tasks blocked. | Implementer | | 2025-11-25 | Marked TIMELINE-OBS-52-001 BLOCKED: missing orchestrator/notification event schema and EvidenceLocker digest schema prevent drafting migrations/RLS. | Implementer | | 2025-11-12 | Captured task snapshot and blockers; waiting on orchestrator/notifications schema and EvidenceLocker digest schema. | Planning | | 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_165_timelineindexer.md` to `SPRINT_0165_0001_0001_timelineindexer.md`; content preserved. | Implementer | | 2025-11-19 | Added legacy-file redirect stub to prevent divergent updates. | Implementer | - -## Decisions & Risks -- Blocked on orchestrator/notification schemas for ingestion payload definitions. -- Needs EvidenceLocker bundle digest schema before implementing evidence linkage. -- Security/Compliance review required for Postgres RLS migrations; no coding until approval. -- TIMELINE-OBS-52-001 specifically blocked on upstream schemas (orchestrator/notification events) and EvidenceLocker digest schema; cannot draft migrations/RLS without them. - -## Next Checkpoints -- Obtain sample orchestrator/notification events and EvidenceLocker digest schema (date TBD). -- Draft migrations + RLS design and review with Security/Compliance. diff --git a/docs/implplan/SPRINT_0201_0001_0001_cli_i.md b/docs/implplan/SPRINT_0201_0001_0001_cli_i.md index aff52a650..a2e35a63d 100644 --- a/docs/implplan/SPRINT_0201_0001_0001_cli_i.md +++ b/docs/implplan/SPRINT_0201_0001_0001_cli_i.md @@ -56,8 +56,8 @@ ## Action Tracker | # | Action | Owner | Due (UTC) | Status | | --- | --- | --- | --- | --- | -| 1 | Align CLI adoption scope with SPRINT_0208_0001_0001_sdk Wave B artifacts (SDKGEN-64-001) and schedule switch-over | DevEx/CLI Guild | 2025-12-10 | TODO | -| 2 | Obtain offline kit status contract + sample bundle for CLI-HK-201-002 | DevEx/CLI Guild · Offline Kit owner | 2025-11-27 | TODO | +| 1 | Align CLI adoption scope with SPRINT_0208_0001_0001_sdk Wave B artifacts (SDKGEN-64-001) and schedule switch-over | DevEx/CLI Guild | 2025-12-10 | BLOCKED (Awaiting Wave B SDK drops; SDKGEN-64-001 still TODO in Sprint 0208) | +| 2 | Obtain offline kit status contract + sample bundle for CLI-HK-201-002 | DevEx/CLI Guild · Offline Kit owner | 2025-11-27 | BLOCKED (No offline kit status bundle/contract delivered; waiting on Offline Kit owner) | ## Decisions & Risks - `CLI-HK-201-002` remains blocked pending offline kit status contract and sample bundle. @@ -65,6 +65,7 @@ - `CLI-AIAI-31-001/002/003` delivered; CLI advisory verbs (summarize/explain/remediate) now render to console and file with citations; no build blockers remain in this track. - `CLI-AIRGAP-56-001` blocked: mirror bundle contract/spec not published to CLI; cannot implement `stella mirror create` without bundle schema and signing/digest requirements. - `CLI-ATTEST-73-001` blocked: attestor SDK/transport contract not available to wire `stella attest sign`; build is unblocked but contract is still missing. +- Action tracker: adoption alignment waits on SDKGEN-64-001 Wave B drops (Sprint 0208); offline kit status sample not yet provided by Offline Kit owner. - Full CLI test suite is long-running locally; targeted new advisory tests added. Recommend CI run `dotnet test src/Cli/__Tests/StellaOps.Cli.Tests/StellaOps.Cli.Tests.csproj` for confirmation. ## Execution Log @@ -85,4 +86,5 @@ | 2025-11-24 | Added `stella advise explain` and `stella advise remediate` commands; stub backend now returns offline status; CLI advisory commands write output to console and file. `dotnet test` for `src/Cli/__Tests/StellaOps.Cli.Tests` passes (102/102). | DevEx/CLI Guild | | 2025-11-24 | Added `stella advise batch` (multi-key runner) and new conflict/remediation tests. Partial local test runs attempted; full suite build is long—run `dotnet test src/Cli/__Tests/StellaOps.Cli.Tests/StellaOps.Cli.Tests.csproj` in CI for confirmation. | DevEx/CLI Guild | | 2025-11-24 | Added console/JSON output for advisory markdown and offline kit status; StubBackendClient now returns offline status. `dotnet test` for `src/Cli/__Tests/StellaOps.Cli.Tests` passes (100/100), clearing the CLI-AIAI-31-001 build blocker. | DevEx/CLI Guild | +| 2025-11-30 | Action tracker updated: adoption alignment (Action 1) BLOCKED awaiting SDKGEN-64-001 Wave B drops in Sprint 0208; offline kit status sample (Action 2) BLOCKED pending contract/sample from Offline Kit owner. | DevEx/CLI Guild | | 2025-11-24 | Verified advise batch implementation and marked CLI-AIAI-31-004 DONE; coverage via `HandleAdviseBatchAsync_RunsAllAdvisories` test. | DevEx/CLI Guild | diff --git a/docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md b/docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md new file mode 100644 index 000000000..e35f42ba3 --- /dev/null +++ b/docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md @@ -0,0 +1,42 @@ +# Sprint 0313 · Docs Modules · Attestor + +## Topic & Scope +- Refresh Attestor module docs (README, architecture, implementation plan, runbooks) to match latest release notes and attestation samples. +- Add observability/runbook stub and TASKS mirror for status syncing. +- Keep sprint references aligned with normalized filename. +- **Working directory:** `docs/modules/attestor`. + +## Dependencies & Concurrency +- Upstream reference sprints: 100.A (Attestor), 110.A (AdvisoryAI), 120.A (AirGap), 130.A (Scanner), 140.A (Graph), 150.A (Orchestrator), 160.A (Evidence Locker), 170.A (Notifier), 180.A (CLI), 190.A (Ops Deployment). +- Documentation-only; can proceed in parallel once release/demo artefacts are available. + +## Documentation Prerequisites +- `docs/modules/attestor/AGENTS.md` +- `docs/modules/attestor/README.md` +- `docs/modules/attestor/architecture.md` +- `docs/modules/attestor/implementation_plan.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | ATTESTOR-DOCS-0001 | DONE (2025-11-05) | Validate README vs release notes. | Docs Guild (`docs/modules/attestor`) | Validate that `docs/modules/attestor/README.md` matches latest release notes and attestation samples. | +| 2 | ATTESTOR-OPS-0001 | BLOCKED (2025-11-30) | Waiting on next demo outputs to update runbooks/observability. | Ops Guild (`docs/modules/attestor`) | Review runbooks/observability assets after the next sprint demo and capture findings inline with sprint notes. | +| 3 | ATTESTOR-ENG-0001 | DONE (2025-11-27) | Readiness tracker added. | Module Team (`docs/modules/attestor`) | Cross-check implementation plan milestones against `/docs/implplan/SPRINT_*.md` and update module readiness checkpoints. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-30 | Normalised sprint to standard template; renamed from `SPRINT_313_docs_modules_attestor.md`; added compatibility stub. | Docs Guild | +| 2025-11-05 | Completed ATTESTOR-DOCS-0001 per release notes and samples. | Docs Guild | +| 2025-11-27 | Added readiness tracker to implementation plan (ATTESTOR-ENG-0001). | Module Team | +| 2025-11-30 | Added observability runbook stub + dashboard placeholder; ATTESTOR-OPS-0001 set to BLOCKED pending next demo outputs. | Ops Guild | + +## Decisions & Risks +- Ops/runbook updates blocked until next Attestor demo provides observability evidence. +- Keep sprint and TASKS mirrored to avoid drift. +- Offline posture must be preserved; dashboards remain JSON importable. + +## Next Checkpoints +- 2025-12-05 · Reassess Attestor demo outputs; if available, unblock ATTESTOR-OPS-0001 and update runbook/dashboard. Owner: Ops Guild. diff --git a/docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md b/docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md new file mode 100644 index 000000000..3020ecf30 --- /dev/null +++ b/docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md @@ -0,0 +1,42 @@ +# Sprint 0314 · Docs Modules · Authority + +## Topic & Scope +- Refresh Authority module docs (README, architecture, implementation plan, runbooks) to reflect current OpTok/DPoP/mTLS posture, tenant scoping, and offline readiness. +- Stand up a TASKS board and mirror statuses with this sprint. +- Ensure observability/runbook references stay aligned with existing monitoring/Grafana assets. +- **Working directory:** `docs/modules/authority`. + +## Dependencies & Concurrency +- Upstream reference sprints: 100.A (Attestor), 110.A (AdvisoryAI), 120.A (AirGap), 130.A (Scanner), 140.A (Graph), 150.A (Orchestrator), 160.A (Evidence Locker), 170.A (Notifier), 180.A (CLI), 190.A (Ops Deployment). +- Documentation-only; can proceed in parallel once prerequisite docs are available. + +## Documentation Prerequisites +- `docs/modules/authority/AGENTS.md` +- `docs/modules/authority/README.md` +- `docs/modules/authority/architecture.md` +- `docs/modules/authority/implementation_plan.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | AUTHORITY-DOCS-0001 | DONE (2025-11-30) | Refresh module docs per latest OpTok/tenant scope posture. | Docs Guild (`docs/modules/authority`) | Refresh Authority module docs, add sprint/task links, and cross-link monitoring/grafana assets. | +| 2 | AUTHORITY-ENG-0001 | DONE (2025-11-27) | Sprint readiness tracker added. | Module Team (`docs/modules/authority`) | Implementation plan readiness tracker mapped to epics/sprints (already delivered). | +| 3 | AUTHORITY-OPS-0001 | DONE (2025-11-30) | Add TASKS board + observability references. | Ops Guild (`docs/modules/authority`) | Ensure monitoring/backup/rotation runbooks are linked and offline-friendly; mirror status via TASKS. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-30 | Normalised sprint to standard template; renamed from `SPRINT_314_docs_modules_authority.md`; added compatibility stub. | Docs Guild | +| 2025-11-30 | Completed AUTHORITY-DOCS-0001: updated README latest updates, added sprint/TASKS links, and observability references. | Docs Guild | +| 2025-11-27 | AUTHORITY-ENG-0001 previously delivered: readiness tracker added to implementation plan. | Module Team | +| 2025-11-30 | Completed AUTHORITY-OPS-0001: created TASKS board and aligned monitoring/Grafana references. | Ops Guild | + +## Decisions & Risks +- Offline posture must be preserved; dashboards stay JSON importable (no external datasources). +- Tenant-scope/Surface.Env/Surface.Secrets contracts must stay aligned with platform docs; update sprint/TASKS if they change. +- Keep sprint and TASKS mirrored to avoid drift. + +## Next Checkpoints +- 2025-12-05 · Verify grafana-dashboard.json still matches current metrics contract; update runbooks if changes land. Owner: Ops Guild. diff --git a/docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md b/docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md new file mode 100644 index 000000000..c2c07c86c --- /dev/null +++ b/docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md @@ -0,0 +1,42 @@ +# Sprint 0330 · Docs Modules · Telemetry + +## Topic & Scope +- Refresh telemetry module docs (README, architecture, implementation plan, runbooks) to reflect the current observability stack, storage isolation, and offline posture. +- Create a TASKS board for the module and mirror statuses with this sprint. +- Add an observability runbook stub and dashboard placeholder for the latest demo. +- **Working directory:** `docs/modules/telemetry`. + +## Dependencies & Concurrency +- Upstream reference sprints: 100.A (Attestor), 110.A (AdvisoryAI), 120.A (AirGap), 130.A (Scanner), 140.A (Graph), 150.A (Orchestrator), 160.A (Evidence Locker), 170.A (Notifier), 180.A (CLI), 190.A (Ops Deployment). +- Documentation-only; no blocking concurrency once prerequisite docs available. + +## Documentation Prerequisites +- `docs/modules/telemetry/AGENTS.md` +- `docs/modules/telemetry/README.md` +- `docs/modules/telemetry/architecture.md` +- `docs/modules/telemetry/implementation_plan.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | TELEMETRY-DOCS-0001 | DONE (2025-11-30) | Refresh module docs for new storage/isolation posture. | Docs Guild (`docs/modules/telemetry`) | Validate telemetry module docs reflect the new storage stack and isolation rules; add sprint references. | +| 2 | TELEMETRY-OPS-0001 | DONE (2025-11-30) | Add observability runbook stub post-demo. | Ops Guild (`docs/modules/telemetry`) | Review telemetry runbooks/observability dashboards and add offline import placeholder. | +| 3 | TELEMETRY-ENG-0001 | DONE (2025-11-30) | Mirror statuses with module board. | Module Team (`docs/modules/telemetry`) | Ensure milestones stay in sync with telemetry sprints via TASKS board mirror. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-30 | Normalised sprint to standard template; renamed from `SPRINT_330_docs_modules_telemetry.md`; added compatibility stub. | Docs Guild | +| 2025-11-30 | Completed TELEMETRY-DOCS-0001: refreshed README latest updates and added sprint/task links. | Docs Guild | +| 2025-11-30 | Completed TELEMETRY-OPS-0001: added observability runbook stub and Grafana placeholder. | Ops Guild | +| 2025-11-30 | Completed TELEMETRY-ENG-0001: created TASKS board and mirrored statuses. | Module Team | + +## Decisions & Risks +- Dashboards must remain offline-import friendly; avoid external data sources. +- Keep sprint and TASKS mirrored to prevent drift. +- Storage/isolation rules must stay aligned with platform docs; update both sprint and module if they change. + +## Next Checkpoints +- 2025-12-05 · Populate Grafana panels once metrics contract finalizes; update runbook and sprint log. Owner: Ops Guild. diff --git a/docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md b/docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md new file mode 100644 index 000000000..8c5b18f69 --- /dev/null +++ b/docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md @@ -0,0 +1,42 @@ +# Sprint 0331 · Docs Modules · UI + +## Topic & Scope +- Refresh Console UI module docs (README, architecture, implementation plan, runbooks) so onboarding and operations reflect current roadmap and offline posture. +- Stand up a TASKS board for the module and keep status mirrored with this sprint. +- Capture observability/runbook stubs for the latest demo and document offline import steps. +- **Working directory:** `docs/modules/ui`. + +## Dependencies & Concurrency +- Upstream reference sprints: 100.A (Attestor), 110.A (AdvisoryAI), 120.A (AirGap), 130.A (Scanner), 140.A (Graph), 150.A (Orchestrator), 160.A (Evidence Locker), 170.A (Notifier), 180.A (CLI), 190.A (Ops Deployment). +- No blocking concurrency; documentation-only refresh. + +## Documentation Prerequisites +- `docs/modules/ui/AGENTS.md` +- `docs/modules/ui/README.md` +- `docs/modules/ui/architecture.md` +- `docs/modules/ui/implementation_plan.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | CONSOLE UI-DOCS-0001 | DONE (2025-11-30) | Validate module docs against latest roadmap/releases. | Docs Guild (`docs/modules/ui`) | Refresh module docs and link to sprint/API/runbook artefacts. | +| 2 | CONSOLE UI-ENG-0001 | DONE (2025-11-30) | Keep status mirrored between sprint and module board. | Module Team (`docs/modules/ui`) | Create TASKS board and mirror statuses with this sprint. | +| 3 | CONSOLE UI-OPS-0001 | DONE (2025-11-30) | Add observability/runbook stub from latest demo. | Ops Guild (`docs/modules/ui`) | Document observability/operations notes and offline dashboard stub. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-30 | Normalised sprint to standard template; renamed from `SPRINT_331_docs_modules_ui.md`; added compatibility stub. | Docs Guild | +| 2025-11-30 | Completed CONSOLE UI-DOCS-0001: refreshed README latest updates, added cross-links to observability runbook and sprint reference. | Docs Guild | +| 2025-11-30 | Completed CONSOLE UI-ENG-0001: created `docs/modules/ui/TASKS.md` and mirrored statuses. | Module Team | +| 2025-11-30 | Completed CONSOLE UI-OPS-0001: added observability runbook stub and offline Grafana JSON placeholder under `operations/`. | Ops Guild | + +## Decisions & Risks +- Docs assume offline/air-gap deployments; dashboards provided as JSON for local import to avoid external dependencies. +- Keep TASKS board and sprint in sync to prevent drift; update both when status changes. +- Observability stub uses placeholder panels until metrics endpoints are finalised. + +## Next Checkpoints +- 2025-12-05 · Review observability dashboard once metrics contract lands; update runbook/dashboards accordingly. Owner: Ops Guild. diff --git a/docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md b/docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md new file mode 100644 index 000000000..230df4e04 --- /dev/null +++ b/docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md @@ -0,0 +1,42 @@ +# Sprint 0332 · Docs Modules · VEX Lens + +## Topic & Scope +- Refresh VEX Lens module docs (README, architecture, implementation plan, runbooks) with consensus workflow guidance and latest release links. +- Add observability/runbook stub for the latest demo and keep sprint alignment notes in sync. +- Stand up a TASKS board for the module and mirror statuses with this sprint. +- **Working directory:** `docs/modules/vex-lens`. + +## Dependencies & Concurrency +- Upstream reference sprints: 100.A (Attestor), 110.A (AdvisoryAI), 120.A (AirGap), 130.A (Scanner), 140.A (Graph), 150.A (Orchestrator), 160.A (Evidence Locker), 170.A (Notifier), 180.A (CLI), 190.A (Ops Deployment). +- No blocking concurrency; documentation-only refresh. + +## Documentation Prerequisites +- `docs/modules/vex-lens/AGENTS.md` +- `docs/modules/vex-lens/README.md` +- `docs/modules/vex-lens/architecture.md` +- `docs/modules/vex-lens/implementation_plan.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | VEX-CONSENSUS-LENS-DOCS-0001 | DONE (2025-11-30) | Refresh module docs with consensus workflow guidance. | Docs Guild (`docs/modules/vex-lens`) | Refresh VEX Lens module docs with consensus workflow guidance and release links. | +| 2 | VEX-LENS-OPS-0001 | DONE (2025-11-30) | Add observability/runbook stub post-demo. | Ops Guild (`docs/modules/vex-lens`) | Review runbooks/observability assets and document offline import steps. | +| 3 | VEX-LENS-ENG-0001 | DONE (2025-11-30) | Mirror statuses with module board. | Module Team (`docs/modules/vex-lens`) | Keep module milestones synchronized with VEX Lens sprints and TASKS board. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-30 | Normalised sprint to standard template; renamed from `SPRINT_332_docs_modules_vex_lens.md`; added compatibility stub. | Docs Guild | +| 2025-11-30 | Completed VEX-CONSENSUS-LENS-DOCS-0001: updated README latest updates and cross-links; added sprint/API/schema references. | Docs Guild | +| 2025-11-30 | Completed VEX-LENS-OPS-0001: added observability runbook stub and offline Grafana JSON placeholder under `runbooks/`. | Ops Guild | +| 2025-11-30 | Completed VEX-LENS-ENG-0001: created TASKS board and mirrored statuses with this sprint. | Module Team | + +## Decisions & Risks +- Docs assume offline/air-gap posture; dashboards provided as JSON for local import. +- Keep TASKS board and sprint in sync to avoid drift; update both on status changes. +- Observability stub awaits finalized metrics contract; panels are placeholders until metrics land. + +## Next Checkpoints +- 2025-12-05 · Populate Grafana panels once metrics contract finalizes; update runbook and sprint log. Owner: Ops Guild. diff --git a/docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md b/docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md new file mode 100644 index 000000000..cd18707bf --- /dev/null +++ b/docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md @@ -0,0 +1,44 @@ +# Sprint 0333 · Docs Modules · Excititor + +## Topic & Scope +- Refresh Excititor module docs (README, architecture, implementation plan, runbooks) to match current consensus/linkset posture and offline evidence flows. +- Mirror statuses between this sprint and the module TASKS board. +- Capture observability/runbook evidence from latest demo and keep references to chunk API/OpenAPI once frozen. +- **Working directory:** `docs/modules/excititor`. + +## Dependencies & Concurrency +- Upstream reference sprints: 100.A (Attestor), 110.A (AdvisoryAI), 120.A (AirGap), 130.A (Scanner), 140.A (Graph), 150.A (Orchestrator), 160.A (Evidence Locker), 170.A (Notifier), 180.A (CLI), 190.A (Ops Deployment). +- Documentation-only; can proceed in parallel once API/CI artifacts are available. + +## Documentation Prerequisites +- `docs/modules/excititor/AGENTS.md` +- `docs/modules/excititor/README.md` +- `docs/modules/excititor/architecture.md` +- `docs/modules/excititor/implementation_plan.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | EXCITOR-DOCS-0001 | DONE (2025-11-07) | Validate README vs release notes. | Docs Guild (`docs/modules/excitor`) | Validate that `docs/modules/excitor/README.md` matches latest release notes and consensus beta notes. | +| 2 | EXCITOR-OPS-0001 | DONE (2025-11-07) | Checklist in `docs/modules/excitor/mirrors.md`. | Ops Guild (`docs/modules/excitor`) | Review runbooks/observability assets and add mirror checklist. | +| 3 | EXCITOR-ENG-0001 | DONE (2025-11-07) | Keep implementation plan aligned. | Module Team (`docs/modules/excitor`) | Ensure implementation plan sprint alignment table stays current with SPRINT_200 updates. | +| 4 | EXCITITOR-DOCS-0001 | BLOCKED (2025-11-19) | Waiting on chunk API CI validation + console contracts; OpenAPI freeze pending. | Docs Guild (`docs/modules/excititor`) | Finalize docs after chunk API CI passes and OpenAPI is frozen. | +| 5 | EXCITITOR-ENG-0001 | TODO | Mirror status via AGENTS workflow. | Module Team (`docs/modules/excititor`) | Update engineering notes and alignment once EXCITITOR-DOCS-0001 unblocks. | +| 6 | EXCITITOR-OPS-0001 | TODO | Sync outcomes back to upstream sprint once contracts freeze. | Ops Guild (`docs/modules/excititor`) | Reflect observability/runbook updates after OpenAPI freeze. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-30 | Normalised sprint to standard template; renamed from `SPRINT_333_docs_modules_excititor.md`; added compatibility stub. | Docs Guild | +| 2025-11-07 | Marked EXCITOR-DOCS-0001/OPS-0001/ENG-0001 as DONE after README, runbook checklist, and implementation plan sync. | Module Team | +| 2025-11-19 | EXCITITOR-DOCS-0001 set to BLOCKED pending chunk API CI and OpenAPI freeze. | Docs Guild | + +## Decisions & Risks +- EXCITITOR-DOCS-0001 blocked on chunk API CI validation and OpenAPI freeze; downstream ops/eng tasks stay TODO until resolved. +- Mirror statuses in `docs/modules/excititor/TASKS.md` to avoid drift between sprint and module board. +- Offline posture must be maintained; dashboards should remain importable without external services. + +## Next Checkpoints +- 2025-12-05 · Reassess chunk API CI and OpenAPI freeze; if green, unblock EXCITITOR-DOCS-0001 and propagate updates. Owner: Docs Guild. diff --git a/docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md b/docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md new file mode 100644 index 000000000..e33b42060 --- /dev/null +++ b/docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md @@ -0,0 +1,44 @@ +# Sprint 0334 · Docs Modules · Vuln Explorer + +## Topic & Scope +- Refresh Vuln Explorer module docs (README, architecture, implementation plan, runbooks) to match current roadmap, VEX-first triage UX, and offline evidence/export flows. +- Add observability/runbook evidence from the latest demo and keep sprint alignment notes in sync with active Vuln Explorer deliveries. +- Ensure doc front doors link to supporting artefacts (OpenAPI draft, schemas, sprint plan, task board) for deterministic onboarding. +- **Working directory:** `docs/modules/vuln-explorer`. + +## Dependencies & Concurrency +- Upstream context: Sprint 100.A (Attestor), 110.A (AdvisoryAI), 120.A (AirGap), 130.A (Scanner), 140.A (Graph), 150.A (Orchestrator), 160.A (Evidence Locker), 170.A (Notifier), 180.A (CLI), 190.A (Ops Deployment). +- No blocking concurrency once prerequisite docs are available; tasks are documentation-only. + +## Documentation Prerequisites +- `docs/modules/vuln-explorer/AGENTS.md` +- `docs/modules/vuln-explorer/README.md` +- `docs/modules/vuln-explorer/architecture.md` +- `docs/modules/vuln-explorer/implementation_plan.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | VULNERABILITY-EXPLORER-DOCS-0001 | DONE (2025-11-30) | Validate module docs against latest roadmap/releases. | Docs Guild (`docs/modules/vuln-explorer`) | Validated module docs and added evidence links (OpenAPI draft, schemas, sprint references). | +| 2 | VULNERABILITY-EXPLORER-OPS-0001 | DONE (2025-11-30) | Gather observability outputs from latest demo. | Ops Guild (`docs/modules/vuln-explorer`) | Documented observability/runbook outputs and offline dashboard stub in module docs. | +| 3 | VULNERABILITY-EXPLORER-ENG-0001 | DONE (2025-11-30) | Sync sprint alignment notes across Vuln Explorer streams. | Module Team (`docs/modules/vuln-explorer`) | Synced sprint alignment notes and task mirrors across module docs and TASKS board. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-30 | Normalised sprint to standard template and renamed from `SPRINT_334_docs_modules_vuln_explorer.md` to `SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md`; added compatibility stub. | Docs Guild | +| 2025-11-30 | Completed VULNERABILITY-EXPLORER-DOCS-0001: refreshed README latest updates, observability references, architecture cross-links, and added sprint/API/schema evidence. | Docs Guild | +| 2025-11-30 | Completed VULNERABILITY-EXPLORER-OPS-0001: added offline observability runbook + dashboard stub (`runbooks/observability.md`, `runbooks/dashboards/vuln-explorer-observability.json`). | Ops Guild | +| 2025-11-30 | Completed VULNERABILITY-EXPLORER-ENG-0001: created module `TASKS.md` mirror and sprint alignment notes in implementation plan. | Module Team | + +## Decisions & Risks +- Docs refresh depends on latest Vuln Explorer roadmap and demo artefacts; stale inputs risk inaccurate guidance. +- Observability/runbook updates must remain offline-friendly (no external dashboards). +- Maintain Aggregation-Only Contract references to avoid implying merge/consensus semantics in docs. +- Keep module `TASKS.md` and this sprint in lockstep to avoid drift; mirror updates when new doc work starts. + +## Next Checkpoints +- 2025-12-02 · Confirm observability/demo artefacts and finalize runbook updates. Owner: Ops Guild. +- 2025-12-03 · Validate doc cross-links (OpenAPI, schemas, sprint references) and close VULNERABILITY-EXPLORER-DOCS-0001. Owner: Docs Guild. diff --git a/docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md b/docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md new file mode 100644 index 000000000..afe52d53c --- /dev/null +++ b/docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md @@ -0,0 +1,42 @@ +# Sprint 0335 · Docs Modules · Zastava + +## Topic & Scope +- Refresh Zastava module docs (README, architecture, implementation plan, runbooks) to reflect current runtime posture, Surface.Env/Surface.Secrets adoption, and offline kit integration. +- Stand up a TASKS board and mirror statuses with this sprint. +- Add observability/runbook stub for the latest demo and keep links to Surface contracts. +- **Working directory:** `docs/modules/zastava`. + +## Dependencies & Concurrency +- Upstream reference sprints: 100.A (Attestor), 110.A (AdvisoryAI), 120.A (AirGap), 130.A (Scanner), 140.A (Graph), 150.A (Orchestrator), 160.A (Evidence Locker), 170.A (Notifier), 180.A (CLI), 190.A (Ops Deployment). +- No blocking concurrency; documentation-only refresh. + +## Documentation Prerequisites +- `docs/modules/zastava/AGENTS.md` +- `docs/modules/zastava/README.md` +- `docs/modules/zastava/architecture.md` +- `docs/modules/zastava/implementation_plan.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | ZASTAVA-DOCS-0001 | DONE (2025-11-30) | Refresh module docs per latest Surface.Env/Surface.Secrets posture. | Docs Guild (`docs/modules/zastava`) | Refresh Zastava module docs with current runtime policy, Surface Env/Secrets notes, and offline kit hooks. | +| 2 | ZASTAVA-ENG-0001 | DONE (2025-11-30) | Mirror sprint ↔ TASKS status. | Module Team (`docs/modules/zastava`) | Create TASKS board and keep statuses in sync. | +| 3 | ZASTAVA-OPS-0001 | DONE (2025-11-30) | Add observability/runbook stub. | Ops Guild (`docs/modules/zastava`) | Document observability/runbook stub and offline dashboard JSON. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-30 | Normalised sprint to standard template; renamed from `SPRINT_335_docs_modules_zastava.md`; added compatibility stub. | Docs Guild | +| 2025-11-30 | Completed ZASTAVA-DOCS-0001: refreshed README latest updates, added Surface Env/Secrets references, and sprint links. | Docs Guild | +| 2025-11-30 | Completed ZASTAVA-ENG-0001: created TASKS board; mirrored statuses. | Module Team | +| 2025-11-30 | Completed ZASTAVA-OPS-0001: added observability runbook stub and dashboard placeholder. | Ops Guild | + +## Decisions & Risks +- Surface.Env/Surface.Secrets contracts must remain aligned with platform docs; update both sprint and TASKS if contracts shift. +- Offline-friendly dashboards only; avoid external dependencies. +- Keep sprint and TASKS mirrored to avoid drift. + +## Next Checkpoints +- 2025-12-05 · Populate Grafana panels once metrics contract finalizes; update runbook + sprint log. Owner: Ops Guild. diff --git a/docs/implplan/SPRINT_0513_0001_0001_public_reachability_benchmark.md b/docs/implplan/SPRINT_0513_0001_0001_public_reachability_benchmark.md index fa84daeed..6ac23b3b8 100644 --- a/docs/implplan/SPRINT_0513_0001_0001_public_reachability_benchmark.md +++ b/docs/implplan/SPRINT_0513_0001_0001_public_reachability_benchmark.md @@ -28,12 +28,12 @@ | --- | --- | --- | --- | --- | --- | | 1 | BENCH-REPO-513-001 | DONE (2025-11-29) | None; foundational. | Bench Guild · DevOps Guild | Create public repository structure: `benchmark/cases///`, `benchmark/schemas/`, `benchmark/tools/scorer/`, `baselines/`, `ci/`, `website/`. Add LICENSE (Apache-2.0), README, CONTRIBUTING.md. | | 2 | BENCH-SCHEMA-513-002 | DONE (2025-11-29) | Depends on 513-001. | Bench Guild | Define and publish schemas: `case.schema.yaml` (component, sink, label, evidence), `entrypoints.schema.yaml`, `truth.schema.yaml`, `submission.schema.json`. Include JSON Schema validation. | -| 3 | BENCH-CASES-JS-513-003 | TODO | Depends on 513-002. | Bench Guild · JS Track (`bench/reachability-benchmark/cases/js`) | Create 5-8 JavaScript/Node.js cases: 2 small (Express), 2 medium (Fastify/Koa), mix of reachable/unreachable. Include Dockerfiles, package-lock.json, unit test oracles, coverage output. | -| 4 | BENCH-CASES-PY-513-004 | TODO | Depends on 513-002. | Bench Guild · Python Track (`bench/reachability-benchmark/cases/py`) | Create 5-8 Python cases: Flask, Django, FastAPI. Include requirements.txt pinned, pytest oracles, coverage.py output. | -| 5 | BENCH-CASES-JAVA-513-005 | TODO | Depends on 513-002. | Bench Guild · Java Track (`bench/reachability-benchmark/cases/java`) | Create 5-8 Java cases: Spring Boot, Micronaut. Include pom.xml locked, JUnit oracles, JaCoCo coverage. | +| 3 | BENCH-CASES-JS-513-003 | DONE (2025-11-30) | Depends on 513-002. | Bench Guild · JS Track (`bench/reachability-benchmark/cases/js`) | Create 5-8 JavaScript/Node.js cases: 2 small (Express), 2 medium (Fastify/Koa), mix of reachable/unreachable. Include Dockerfiles, package-lock.json, unit test oracles, coverage output. Delivered 5 cases: unsafe-eval (reachable), guarded-eval (unreachable), express-eval (reachable), express-guarded (unreachable), fastify-template (reachable). | +| 4 | BENCH-CASES-PY-513-004 | DONE (2025-11-30) | Depends on 513-002. | Bench Guild · Python Track (`bench/reachability-benchmark/cases/py`) | Create 5-8 Python cases: Flask, Django, FastAPI. Include requirements.txt pinned, pytest oracles, coverage.py output. Delivered 5 cases: unsafe-exec (reachable), guarded-exec (unreachable), flask-template (reachable), fastapi-guarded (unreachable), django-ssti (reachable). | +| 5 | BENCH-CASES-JAVA-513-005 | DOING | Depends on 513-002. | Bench Guild · Java Track (`bench/reachability-benchmark/cases/java`) | Create 5-8 Java cases: Spring Boot, Micronaut. Include pom.xml locked, JUnit oracles, JaCoCo coverage. Progress: 2/5 seeded (`spring-deserialize` reachable, `spring-guarded` unreachable). Note: builds/tests pending until JDK available in runner. | | 6 | BENCH-CASES-C-513-006 | TODO | Depends on 513-002. | Bench Guild · Native Track (`bench/reachability-benchmark/cases/c`) | Create 3-5 C/ELF cases: small HTTP servers, crypto utilities. Include Makefile, gcov/llvm-cov coverage, deterministic builds (SOURCE_DATE_EPOCH). | | 7 | BENCH-BUILD-513-007 | TODO | Depends on 513-003 through 513-006. | Bench Guild · DevOps Guild | Implement `build_all.py` and `validate_builds.py`: deterministic Docker builds, hash verification, SBOM generation (syft), attestation stubs. | -| 8 | BENCH-SCORER-513-008 | TODO | Depends on 513-002. | Bench Guild (`bench/reachability-benchmark/tools/scorer`) | Implement `rb-score` CLI: load cases/truth, validate submissions, compute precision/recall/F1, explainability score (0-3), runtime stats, determinism rate. | +| 8 | BENCH-SCORER-513-008 | DONE (2025-11-30) | Depends on 513-002. | Bench Guild (`bench/reachability-benchmark/tools/scorer`) | Implement `rb-score` CLI: load cases/truth, validate submissions, compute precision/recall/F1, explainability score (0-3), runtime stats, determinism rate. | | 9 | BENCH-EXPLAIN-513-009 | TODO | Depends on 513-008. | Bench Guild | Implement explainability scoring rules: 0=no context, 1=path with ≥2 nodes, 2=entry+≥3 nodes, 3=guards/constraints included. Unit tests for each level. | | 10 | BENCH-BASELINE-SEMGREP-513-010 | TODO | Depends on 513-008 and cases. | Bench Guild | Semgrep baseline runner: `baselines/semgrep/run_case.sh`, rule config, output normalization to submission format. | | 11 | BENCH-BASELINE-CODEQL-513-011 | TODO | Depends on 513-008 and cases. | Bench Guild | CodeQL baseline runner: database creation, reachability queries, output normalization. Document CodeQL license requirements. | @@ -85,3 +85,12 @@ | 2025-11-27 | Sprint created from product advisory `24-Nov-2025 - Designing a Deterministic Reachability Benchmark.md`; 17 tasks defined across 5 waves. | Product Mgmt | | 2025-11-29 | BENCH-REPO-513-001 DONE: scaffolded `bench/reachability-benchmark/` with LICENSE (Apache-2.0), NOTICE, README, CONTRIBUTING, .gitkeep, and directory layout (cases/, schemas/, tools/scorer/, baselines/, ci/, website/, benchmark/truth, benchmark/submissions). | Implementer | | 2025-11-29 | BENCH-SCHEMA-513-002 DONE: expanded schemas (case/entrypoints/truth/submission), added examples + offline validator `tools/validate.py`, and pinned requirements for deterministic validation. | Implementer | +| 2025-11-30 | BENCH-SCORER-513-008 DONE: implemented `rb-score` CLI with schema validation, metrics (precision/recall/F1), explainability tiers, determinism check, JSON/text outputs, pinned deps, and pytest coverage. | Implementer | +| 2025-11-30 | Started BENCH-CASES-JS-513-003: added first JS case `cases/js/unsafe-eval` with deterministic build/test, traces/coverage, entrypoints, truth file `benchmark/truth/js-unsafe-eval.json`. | Implementer | +| 2025-11-30 | Progressed BENCH-CASES-JS-513-003: added `cases/js/guarded-eval` (feature-flagged sink, unreachable by default) with matching entrypoints and truth `benchmark/truth/js-guarded-eval.json`; both cases validated via `tools/validate.py` and build scripts produce deterministic artifacts. | Implementer | +| 2025-11-30 | Completed BENCH-CASES-JS-513-003: added `cases/js/express-eval`, `cases/js/express-guarded`, `cases/js/fastify-template` with matching entrypoints and truth files; all five JS cases validate against schemas and have deterministic build/test scripts. | Implementer | +| 2025-11-30 | Started BENCH-CASES-PY-513-004: added `cases/py/unsafe-exec` (reachable eval) and `cases/py/guarded-exec` (feature-flagged, unreachable by default) with entrypoints and truth files; both validate via `tools/validate.py` and deterministic build scripts. | Implementer | +| 2025-11-30 | Progressed BENCH-CASES-PY-513-004: added `cases/py/flask-template` (reachable template rendering) with entrypoints and truth `benchmark/truth/py-flask-template.json`; validated via `tools/validate.py` and deterministic build. | Implementer | +| 2025-11-30 | Progressed BENCH-CASES-PY-513-004: added `cases/py/fastapi-guarded` (unreachable unless ALLOW_EXEC=true) with entrypoints and truth `benchmark/truth/py-fastapi-guarded.json`; validated via `tools/validate.py` and deterministic build. | Implementer | +| 2025-11-30 | Completed BENCH-CASES-PY-513-004: added `cases/py/django-ssti` (reachable template rendering, autoescape off) with truth `benchmark/truth/py-django-ssti.json`; validated via `tools/validate.py` and deterministic build. | Implementer | +| 2025-11-30 | Started BENCH-CASES-JAVA-513-005: added `cases/java/spring-deserialize` (reachable) and `cases/java/spring-guarded` (unreachable by default) with entrypoints and truth files; schema validation passes. Build/test pending due to missing `javac` in runner—recorded as dependency for future CI. | Implementer | diff --git a/docs/implplan/SPRINT_150_scheduling_automation.md b/docs/implplan/SPRINT_150_scheduling_automation.md index 513850eab..4662b3679 100644 --- a/docs/implplan/SPRINT_150_scheduling_automation.md +++ b/docs/implplan/SPRINT_150_scheduling_automation.md @@ -9,7 +9,7 @@ This file now only tracks the scheduling & automation status snapshot. Active ba | Wave | Guild owners | Shared prerequisites | Status | Notes | | --- | --- | --- | --- | --- | | 150.A Orchestrator | Orchestrator Service Guild · AirGap Policy/Controller Guilds · Observability Guild | Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 140.A – Graph | TODO | Pending confirmation that Scanner surface artifacts are ready; keep job telemetry work prepped for fast start. | -| 150.B PacksRegistry | Packs Registry Guild · Exporter Guild · Security Guild | Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 140.A – Graph | TODO | Blocked on Orchestrator tenancy scaffolding; specs are ready once 150.A flips to DOING. | +| 150.B PacksRegistry | Packs Registry Guild · Exporter Guild · Security Guild | Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 140.A – Graph | DONE (2025-11-25) | Completed in `SPRINT_0154_0001_0001_packsregistry`; registry service, lifecycle, mirroring, and compliance dashboards shipped. | | 150.C Scheduler | Scheduler WebService/Worker Guilds · Findings Ledger Guild · Observability Guild | Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 140.A – Graph | TODO | Impact index improvements need Graph overlays; hold until 140.A status improves. | | 150.D TaskRunner | Task Runner Guild · AirGap Guilds · Evidence Locker Guild | Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 140.A – Graph | TODO | Execution engine upgrades staged; start once Orchestrator/Scheduler telemetry baselines exist. | diff --git a/docs/implplan/SPRINT_152_orchestrator_ii.md b/docs/implplan/SPRINT_152_orchestrator_ii.md index 056b94456..82f728658 100644 --- a/docs/implplan/SPRINT_152_orchestrator_ii.md +++ b/docs/implplan/SPRINT_152_orchestrator_ii.md @@ -1,24 +1,5 @@ -# Sprint 152 - Scheduling & Automation · 150.A) Orchestrator.II +# Moved: Sprint 0152-0001-0002 · Orchestrator II (Scheduling & Automation) -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). +This legacy filename is retained to avoid broken references. The canonical sprint now lives at `docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md` following the standard naming/template. Do not edit tasks here; update the canonical file only. -[Scheduling & Automation] 150.A) Orchestrator.II -Depends on: Sprint 150.A - Orchestrator.I -Summary: Scheduling & Automation focus on Orchestrator (phase II). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -ORCH-SVC-32-002 | TODO | Implement scheduler DAG planner + dependency resolver, job state machine, and critical-path metadata without yet issuing control actions. Dependencies: ORCH-SVC-32-001. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-32-003 | TODO | Expose read-only REST APIs (sources, runs, jobs, DAG) with OpenAPI, validation, pagination, and tenant scoping. Dependencies: ORCH-SVC-32-002. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-32-004 | TODO | Implement WebSocket/SSE stream for job/run updates, emit structured metrics counters/histograms, and add health probes. Dependencies: ORCH-SVC-32-003. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-32-005 | TODO | Deliver worker claim/heartbeat/progress endpoints capturing artifact metadata/checksums and enforcing idempotency keys. Dependencies: ORCH-SVC-32-004. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-33-001 | TODO | Enable `sources test. Dependencies: ORCH-SVC-32-005. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-33-002 | TODO | Implement per-source/tenant adaptive token-bucket rate limiter, concurrency caps, and backpressure signals reacting to upstream 429/503. Dependencies: ORCH-SVC-33-001. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-33-003 | TODO | Add watermark/backfill manager with event-time windows, duplicate suppression, dry-run preview endpoint, and safety validations. Dependencies: ORCH-SVC-33-002. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-33-004 | TODO | Deliver dead-letter store, replay endpoints, and error classification surfaces with remediation hints + notification hooks. Dependencies: ORCH-SVC-33-003. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-34-001 | DONE | Implement quota management APIs, per-tenant SLO burn-rate computation, and alert budget tracking surfaced via metrics. Dependencies: ORCH-SVC-33-004. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-34-002 | DONE | Build audit log + immutable run ledger export with signed manifest support, including provenance chain to artifacts. Dependencies: ORCH-SVC-34-001. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-34-003 | TODO | Execute perf/scale validation (≥10k pending jobs, dispatch P95 <150 ms) and add autoscaling hooks with health probes. Dependencies: ORCH-SVC-34-002. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-34-004 | TODO | Package orchestrator container, Helm overlays, offline bundle seeds, provenance attestations, and compliance checklist for GA. Dependencies: ORCH-SVC-34-003. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-35-101 | TODO | Register `export` job type with quotas/rate policies, expose telemetry, and ensure exporter workers heartbeat via orchestrator contracts. Dependencies: ORCH-SVC-34-004. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-36-101 | TODO | Capture distribution metadata and retention timestamps for export jobs, updating dashboards and SSE payloads. Dependencies: ORCH-SVC-35-101. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) -ORCH-SVC-37-101 | TODO | Enable scheduled export runs, retention pruning hooks, and failure alerting tied to export job class. Dependencies: ORCH-SVC-36-101. | Orchestrator Service Guild (src/Orchestrator/StellaOps.Orchestrator) \ No newline at end of file +Status recap (read-only): All ORCH-SVC-32/33/34/35/36/37 tasks are DONE in the canonical sprint document. diff --git a/docs/implplan/SPRINT_154_packsregistry.md b/docs/implplan/SPRINT_154_packsregistry.md index a40634e5d..81be6cf1b 100644 --- a/docs/implplan/SPRINT_154_packsregistry.md +++ b/docs/implplan/SPRINT_154_packsregistry.md @@ -1,12 +1,8 @@ -# Sprint 154 - Scheduling & Automation · 150.B) PacksRegistry +# Legacy redirect — Sprint 0154 Packs Registry -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). +This sprint was renamed to `SPRINT_0154_0001_0001_packsregistry.md` on 2025-11-19 to match the standard format. -[Scheduling & Automation] 150.B) PacksRegistry -Depends on: Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph -Summary: Scheduling & Automation focus on PacksRegistry). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -PACKS-REG-41-001 | TODO | Implement registry service, migrations for `packs_index`, `parity_matrix`, provenance docs; support pack upload/list/get, signature verification, RBAC enforcement, and provenance manifest storage. | Packs Registry Guild (src/PacksRegistry/StellaOps.PacksRegistry) -PACKS-REG-42-001 | TODO | Add version lifecycle (promote/deprecate), tenant allowlists, provenance export, signature rotation, audit logs, and Offline Kit seed support. Dependencies: PACKS-REG-41-001. | Packs Registry Guild (src/PacksRegistry/StellaOps.PacksRegistry) -PACKS-REG-43-001 | TODO | Implement registry mirroring, pack signing policies, attestation integration, and compliance dashboards; integrate with Export Center. Dependencies: PACKS-REG-42-001. | Packs Registry Guild (src/PacksRegistry/StellaOps.PacksRegistry) \ No newline at end of file +Please update the canonical file instead: +- `docs/implplan/SPRINT_0154_0001_0001_packsregistry.md` + +Status, execution log, and task details are authoritative in the canonical file; this stub exists to avoid divergent edits in older links. diff --git a/docs/implplan/SPRINT_157_taskrunner_i.md b/docs/implplan/SPRINT_157_taskrunner_i.md index 35c883245..d49db8d68 100644 --- a/docs/implplan/SPRINT_157_taskrunner_i.md +++ b/docs/implplan/SPRINT_157_taskrunner_i.md @@ -1,26 +1,4 @@ -# Sprint 157 - Scheduling & Automation · 150.D) TaskRunner.I +# Deprecated Sprint File -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Scheduling & Automation] 150.D) TaskRunner.I -Depends on: Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph -Summary: Scheduling & Automation focus on TaskRunner (phase I). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -TASKRUN-41-001 | TODO | Bootstrap service, define migrations for `pack_runs`, `pack_run_logs`, `pack_artifacts`, implement run API (create/get/log stream), local executor, approvals pause, artifact capture, and provenance manifest generation. | Task Runner Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-AIRGAP-56-001 | TODO | Enforce plan-time validation rejecting steps with non-allowlisted network calls in sealed mode and surface remediation errors. | Task Runner Guild, AirGap Policy Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-AIRGAP-56-002 | TODO | Add helper steps for bundle ingestion (checksum verification, staging to object store) with deterministic outputs. Dependencies: TASKRUN-AIRGAP-56-001. | Task Runner Guild, AirGap Importer Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-AIRGAP-57-001 | TODO | Refuse to execute plans when environment sealed=false but declared sealed install; emit advisory timeline events. Dependencies: TASKRUN-AIRGAP-56-002. | Task Runner Guild, AirGap Controller Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-AIRGAP-58-001 | TODO | Capture bundle import job transcripts, hashed inputs, and outputs into portable evidence bundles. Dependencies: TASKRUN-AIRGAP-57-001. | Task Runner Guild, Evidence Locker Guild (src/TaskRunner/StellaOps.TaskRunner) -> 2025-11-04: Resumed TASKRUN-42-001 — scoping execution engine upgrades (loops/conditionals/maxParallel), simulation mode, policy gate integration, and deterministic failure recovery. -> 2025-11-04: Worker/WebService wiring in place — execution graph honours `maxParallel`/`continueOnError`, retry windows persisted, and simulation API exposed. -> 2025-11-04: Continuing TASKRUN-42-001 — cleaning persistence anomalies, validating retry metadata, and wiring simulation preview into CLI surface. -> 2025-11-04: CLI command `stella task-runner simulate` wired to the new endpoint with JSON/table output modes. -TASKRUN-OAS-61-001 | TODO | Document Task Runner APIs (pack runs, logs, approvals) in service OAS, including streaming response schemas and examples. | Task Runner Guild, API Contracts Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-OAS-61-002 | TODO | Expose `GET /.well-known/openapi` returning signed spec metadata, build version, and ETag. Dependencies: TASKRUN-OAS-61-001. | Task Runner Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-OAS-62-001 | TODO | Provide SDK examples for pack run lifecycle; ensure SDKs offer streaming log helpers and paginator wrappers. Dependencies: TASKRUN-OAS-61-002. | Task Runner Guild, SDK Generator Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-OAS-63-001 | TODO | Implement deprecation header support and Sunset handling for legacy pack APIs; emit notifications metadata. Dependencies: TASKRUN-OAS-62-001. | Task Runner Guild, API Governance Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-OBS-50-001 | TODO | Adopt telemetry core in Task Runner host + worker executors, ensuring step execution spans/logs include `trace_id`, `tenant_id`, `run_id`, and scrubbed command transcripts. | Task Runner Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-OBS-51-001 | TODO | Emit metrics for step latency, retries, queue depth, sandbox resource usage; define SLOs for pack run completion and failure rate; surface burn-rate alerts to collector/Notifier. Dependencies: TASKRUN-OBS-50-001. | Task Runner Guild, DevOps Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-OBS-52-001 | TODO | Produce timeline events for pack runs (`pack.started`, `pack.step.completed`, `pack.failed`) containing evidence pointers and policy gate context. Provide dedupe + retry logic. Dependencies: TASKRUN-OBS-51-001. | Task Runner Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-OBS-53-001 | TODO | Capture step transcripts, artifact manifests, environment digests, and policy approvals into evidence locker snapshots; ensure redaction + hash chain coverage. Dependencies: TASKRUN-OBS-52-001. | Task Runner Guild, Evidence Locker Guild (src/TaskRunner/StellaOps.TaskRunner) \ No newline at end of file +This sprint was normalized and renamed to `docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md`. +Please update only the canonical file; this stub remains to prevent divergent edits. (Updated 2025-11-30.) diff --git a/docs/implplan/SPRINT_158_taskrunner_ii.md b/docs/implplan/SPRINT_158_taskrunner_ii.md index 1f32be4af..9ca0ba422 100644 --- a/docs/implplan/SPRINT_158_taskrunner_ii.md +++ b/docs/implplan/SPRINT_158_taskrunner_ii.md @@ -1,12 +1,5 @@ -# Sprint 158 - Scheduling & Automation · 150.D) TaskRunner.II +# Redirect Notice · Sprint 158 -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). +This sprint was normalized and renamed to `docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md` (2025-11-19). -[Scheduling & Automation] 150.D) TaskRunner.II -Depends on: Sprint 150.D - TaskRunner.I -Summary: Scheduling & Automation focus on TaskRunner (phase II). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -TASKRUN-OBS-54-001 | TODO | Generate DSSE attestations for pack runs (subjects = produced artifacts) and expose verification API/CLI integration. Store references in timeline events. Dependencies: TASKRUN-OBS-53-001. | Task Runner Guild, Provenance Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-OBS-55-001 | TODO | Implement incident mode escalations (extra telemetry, debug artifact capture, retention bump) and align on automatic activation via SLO breach webhooks. Dependencies: TASKRUN-OBS-54-001. | Task Runner Guild, DevOps Guild (src/TaskRunner/StellaOps.TaskRunner) -TASKRUN-TEN-48-001 | TODO | Require tenant/project context for every pack run, set DB/object-store prefixes, block egress when tenant restricted, and propagate context to steps/logs. | Task Runner Guild (src/TaskRunner/StellaOps.TaskRunner) \ No newline at end of file +Please edit the canonical file only. This legacy filename is retained to prevent divergent updates. diff --git a/docs/implplan/SPRINT_164_exportcenter_iii.md b/docs/implplan/SPRINT_164_exportcenter_iii.md index 0845ae3bc..d8008b7bc 100644 --- a/docs/implplan/SPRINT_164_exportcenter_iii.md +++ b/docs/implplan/SPRINT_164_exportcenter_iii.md @@ -1,24 +1,3 @@ -# Sprint 164 - Export & Evidence · 160.B) ExportCenter.III +# Deprecated alias -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Export & Evidence] 160.B) ExportCenter.III -Depends on: Sprint 160.B - ExportCenter.II -Summary: Export & Evidence focus on ExportCenter (phase III). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -EXPORT-SVC-35-006 | TODO | Expose Export API (profiles, runs, download, SSE updates) with audit logging, concurrency controls, and viewer/operator RBAC integration. Dependencies: EXPORT-SVC-35-005. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-36-001 | TODO | Implement Trivy DB adapter (core) with schema mappings, version flag gating, and validation harness. Dependencies: EXPORT-SVC-35-006. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-36-002 | TODO | Add Trivy Java DB variant with shared manifest entries and adapter regression tests. Dependencies: EXPORT-SVC-36-001. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-36-003 | TODO | Build OCI distribution engine (manifests, descriptors, annotations) with registry auth support and retries. Dependencies: EXPORT-SVC-36-002. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-36-004 | TODO | Extend planner/run lifecycle for distribution targets (OCI/object storage) with idempotent metadata updates and retention timestamps. Dependencies: EXPORT-SVC-36-003. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-37-001 | TODO | Implement mirror delta adapter with base manifest comparison, change set generation, and content-addressed reuse. Dependencies: EXPORT-SVC-36-004. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-37-002 | TODO | Add bundle encryption (age/AES-GCM), key wrapping via KMS, and verification tooling for encrypted outputs. Dependencies: EXPORT-SVC-37-001. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-37-003 | TODO | Implement export scheduling (cron/event), retention pruning, retry idempotency, and failure classification. Dependencies: EXPORT-SVC-37-002. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-37-004 | TODO | Provide verification API to stream manifests/hashes, compute hash+signature checks, and return attest status for CLI/UI. Dependencies: EXPORT-SVC-37-003. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-SVC-43-001 | TODO | Integrate pack run manifests/artifacts into export bundles and CLI verification flows; expose provenance links. Dependencies: EXPORT-SVC-37-004. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -EXPORT-TEN-48-001 | TODO | Prefix artifacts/manifests with tenant/project, enforce scope checks, and prevent cross-tenant exports unless explicitly whitelisted; update provenance. | Exporter Service Guild (src/ExportCenter/StellaOps.ExportCenter) -RISK-BUNDLE-69-001 | TODO | Implement `stella export risk-bundle` job producing tarball with provider datasets, manifests, and DSSE signatures. | Risk Bundle Export Guild, Risk Engine Guild (src/ExportCenter/StellaOps.ExportCenter.RiskBundles) -RISK-BUNDLE-69-002 | TODO | Integrate bundle job into CI/offline kit pipelines with checksum publication. Dependencies: RISK-BUNDLE-69-001. | Risk Bundle Export Guild, DevOps Guild (src/ExportCenter/StellaOps.ExportCenter.RiskBundles) -RISK-BUNDLE-70-001 | TODO | Provide CLI `stella risk bundle verify` command to validate bundles before import. Dependencies: RISK-BUNDLE-69-002. | Risk Bundle Export Guild, CLI Guild (src/ExportCenter/StellaOps.ExportCenter.RiskBundles) -RISK-BUNDLE-70-002 | TODO | Publish `/docs/airgap/risk-bundles.md` detailing build/import/verification workflows. Dependencies: RISK-BUNDLE-70-001. | Risk Bundle Export Guild, Docs Guild (src/ExportCenter/StellaOps.ExportCenter.RiskBundles) \ No newline at end of file +Sprint file was renamed to `SPRINT_0164_0001_0001_exportcenter_iii.md` for template compliance on 2025-11-19. Do not edit this file; update the canonical sprint instead. diff --git a/docs/implplan/SPRINT_313_docs_modules_attestor.md b/docs/implplan/SPRINT_313_docs_modules_attestor.md index a603710d1..47c96bf2b 100644 --- a/docs/implplan/SPRINT_313_docs_modules_attestor.md +++ b/docs/implplan/SPRINT_313_docs_modules_attestor.md @@ -1,12 +1,3 @@ -# Sprint 313 - Documentation & Process · 200.C) Docs Modules Attestor +# Moved sprint file -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.C) Docs Modules Attestor -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Attestor). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -ATTESTOR-DOCS-0001 | DONE (2025-11-05) | Validate that `docs/modules/attestor/README.md` matches the latest release notes and attestation samples. | Docs Guild (docs/modules/attestor) -ATTESTOR-OPS-0001 | TODO | Review runbooks/observability assets after the next sprint demo and capture findings inline with sprint notes. | Ops Guild (docs/modules/attestor) -ATTESTOR-ENG-0001 | DONE (2025-11-27) | Cross-check implementation plan milestones against `/docs/implplan/SPRINT_*.md` and update module readiness checkpoints. Added Sprint Readiness Tracker section to `docs/modules/attestor/implementation_plan.md` mapping 6 phases to 15+ sprint tasks with status and blocking items. | Module Team (docs/modules/attestor) +This sprint has been renamed to `SPRINT_0313_0001_0001_docs_modules_attestor.md` to comply with the standard template. Update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_314_docs_modules_authority.md b/docs/implplan/SPRINT_314_docs_modules_authority.md index ea98b10f9..1baebc7c7 100644 --- a/docs/implplan/SPRINT_314_docs_modules_authority.md +++ b/docs/implplan/SPRINT_314_docs_modules_authority.md @@ -1,12 +1,3 @@ -# Sprint 314 - Documentation & Process · 200.D) Docs Modules Authority +# Moved sprint file -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.D) Docs Modules Authority -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Authority). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -AUTHORITY-DOCS-0001 | TODO | See ./AGENTS.md | Docs Guild (docs/modules/authority) -AUTHORITY-ENG-0001 | DONE (2025-11-27) | Update status via ./AGENTS.md workflow. Added Sprint Readiness Tracker to `docs/modules/authority/implementation_plan.md` mapping 4 epics to 10+ tasks across Sprints 100, 115, 143, 186, 401, 514. | Module Team (docs/modules/authority) -AUTHORITY-OPS-0001 | TODO | Sync outcomes back to ../.. | Ops Guild (docs/modules/authority) \ No newline at end of file +This sprint has been renamed to `SPRINT_0314_0001_0001_docs_modules_authority.md` to comply with the standard template. Update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_322_docs_modules_notify.md b/docs/implplan/SPRINT_322_docs_modules_notify.md index a72f15e18..7f528614a 100644 --- a/docs/implplan/SPRINT_322_docs_modules_notify.md +++ b/docs/implplan/SPRINT_322_docs_modules_notify.md @@ -8,7 +8,15 @@ Summary: Documentation & Process focus on Docs Modules Notify). Task ID | State | Task description | Owners (Source) --- | --- | --- | --- NOTIFY-DOCS-0001 | DONE (2025-11-05) | Validate that notifier module README reflects the Notifications Studio pivot and references the latest release notes. | Docs Guild (docs/modules/notify) -NOTIFY-OPS-0001 | TODO | Review notifier runbooks/observability assets after the next sprint demo and record findings. | Ops Guild (docs/modules/notify) +NOTIFY-OPS-0001 | BLOCKED (2025-11-30) | Await next notifier demo outputs to validate runbooks/observability; placeholder stub added. | Ops Guild (docs/modules/notify) NOTIFY-ENG-0001 | DONE (2025-11-27) | Keep implementation milestones aligned with `/docs/implplan/SPRINT_171_notifier_i.md` onward. Added Sprint Readiness Tracker to `docs/modules/notify/implementation_plan.md` mapping 5 phases to 30+ sprint tasks across Sprints 0171, 0172, 0173. | Module Team (docs/modules/notify) -NOTIFY-DOCS-0002 | TODO (2025-11-05) | Pending NOTIFY-SVC-39-001..004 to document correlation/digests/simulation/quiet hours | Docs Guild (docs/modules/notify) -NOTIFY-OPS-0001 | TODO | Sync outcomes back to ../.. | Ops Guild (docs/modules/notify) +NOTIFY-DOCS-0002 | BLOCKED (2025-11-30) | Pending NOTIFY-SVC-39-001..004 to document correlation/digests/simulation/quiet hours. | Docs Guild (docs/modules/notify) +NOTIFY-OPS-0001 | BLOCKED (2025-11-30) | Mirror of Delivery Tracker; waiting on demo outputs. | Ops Guild (docs/modules/notify) + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-05 | Completed NOTIFY-DOCS-0001; README refreshed for Notifications Studio pivot + release notes. | Docs Guild | +| 2025-11-27 | Added sprint readiness tracker for notifier phases in implementation plan; marked NOTIFY-ENG-0001 DONE. | Module Team | +| 2025-11-30 | Added observability runbook stub + Grafana placeholder; set NOTIFY-OPS-0001 BLOCKED pending next demo outputs. | Ops Guild | +| 2025-11-30 | Set NOTIFY-DOCS-0002 BLOCKED pending NOTIFY-SVC-39-001..004 correlation/digests/simulation/quiet hours evidence. | Docs Guild | diff --git a/docs/implplan/SPRINT_330_docs_modules_telemetry.md b/docs/implplan/SPRINT_330_docs_modules_telemetry.md index ddf8488e9..726080931 100644 --- a/docs/implplan/SPRINT_330_docs_modules_telemetry.md +++ b/docs/implplan/SPRINT_330_docs_modules_telemetry.md @@ -1,14 +1,3 @@ -# Sprint 330 - Documentation & Process · 200.T) Docs Modules Telemetry +# Moved sprint file -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.T) Docs Modules Telemetry -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Telemetry). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -TELEMETRY-DOCS-0001 | TODO | Validate that telemetry module docs reflect the new storage stack and isolation rules. | Docs Guild (docs/modules/telemetry) -TELEMETRY-OPS-0001 | TODO | Review telemetry runbooks/observability dashboards post-demo. | Ops Guild (docs/modules/telemetry) -TELEMETRY-ENG-0001 | TODO | Ensure milestones stay in sync with telemetry sprints in `docs/implplan`. | Module Team (docs/modules/telemetry) -TELEMETRY-ENG-0001 | TODO | Update status via ./AGENTS.md workflow | Module Team (docs/modules/telemetry) -TELEMETRY-OPS-0001 | TODO | Sync outcomes back to ../.. | Ops Guild (docs/modules/telemetry) +This sprint has been renamed to `SPRINT_0330_0001_0001_docs_modules_telemetry.md` to comply with the standard template. Update any links accordingly. diff --git a/docs/implplan/SPRINT_331_docs_modules_ui.md b/docs/implplan/SPRINT_331_docs_modules_ui.md index 86049ff90..f7bf56ee1 100644 --- a/docs/implplan/SPRINT_331_docs_modules_ui.md +++ b/docs/implplan/SPRINT_331_docs_modules_ui.md @@ -1,12 +1,3 @@ -# Sprint 331 - Documentation & Process · 200.U) Docs Modules Ui +# Moved sprint file -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.U) Docs Modules Ui -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Ui). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -CONSOLE UI-DOCS-0001 | TODO | See ./AGENTS.md | Docs Guild (docs/modules/ui) -CONSOLE UI-ENG-0001 | TODO | Update status via ./AGENTS.md workflow | Module Team (docs/modules/ui) -CONSOLE UI-OPS-0001 | TODO | Sync outcomes back to ../.. | Ops Guild (docs/modules/ui) \ No newline at end of file +This sprint has been renamed to `SPRINT_0331_0001_0001_docs_modules_ui.md` to meet the standard template. Update any links accordingly. diff --git a/docs/implplan/SPRINT_332_docs_modules_vex_lens.md b/docs/implplan/SPRINT_332_docs_modules_vex_lens.md index 53de9c9b5..e3bad8647 100644 --- a/docs/implplan/SPRINT_332_docs_modules_vex_lens.md +++ b/docs/implplan/SPRINT_332_docs_modules_vex_lens.md @@ -1,15 +1,3 @@ -# Sprint 332 - Documentation & Process · 200.V) Docs Modules Vex Lens +# Moved sprint file -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.V) Docs Modules Vex Lens -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Vex Lens). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -VEX-CONSENSUS-LENS-DOCS-0001 | TODO | Refresh VEX Lens module docs with consensus workflow guidance and recent release links. | Docs Guild (docs/modules/vex-lens) -VEX-LENS-OPS-0001 | TODO | Review VEX Lens runbooks/observability assets post-demo. | Ops Guild (docs/modules/vex-lens) -VEX-LENS-ENG-0001 | TODO | Keep module milestones synchronized with VEX Lens sprints listed under `/docs/implplan`. | Module Team (docs/modules/vex-lens) -VEX-CONSENSUS-LENS-DOCS-0002 | TODO (2025-11-05) | Pending DOCS-VEX-30-001..004 to add consensus doc cross-links | Docs Guild (docs/modules/vex-lens) -VEX-CONSENSUS-LENS-ENG-0001 | TODO | Sync into ../.. | Module Team (docs/modules/vex-lens) -VEX-CONSENSUS-LENS-OPS-0001 | TODO | Document outputs in ./README.md | Ops Guild (docs/modules/vex-lens) +This sprint has been renamed to `SPRINT_0332_0001_0001_docs_modules_vex_lens.md` for template compliance. Please update bookmarks accordingly. diff --git a/docs/implplan/SPRINT_333_docs_modules_excititor.md b/docs/implplan/SPRINT_333_docs_modules_excititor.md index 4bab100cf..a9bb3b3c9 100644 --- a/docs/implplan/SPRINT_333_docs_modules_excititor.md +++ b/docs/implplan/SPRINT_333_docs_modules_excititor.md @@ -1,15 +1,3 @@ -# Sprint 333 - Documentation & Process · 200.W) Docs Modules Excititor +# Moved sprint file -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.W) Docs Modules Excititor -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Excititor). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -EXCITOR-DOCS-0001 | DONE (2025-11-07) | Validate that `docs/modules/excitor/README.md` matches the latest release notes and consensus beta notes. | Docs Guild (docs/modules/excitor) -EXCITOR-OPS-0001 | DONE (2025-11-07) | Review runbooks/observability assets, adding the checklist captured in `docs/modules/excitor/mirrors.md`. | Ops Guild (docs/modules/excitor) -EXCITOR-ENG-0001 | DONE (2025-11-07) | Ensure the implementation plan sprint alignment table stays current with `SPRINT_200` updates. | Module Team (docs/modules/excitor) -EXCITITOR-DOCS-0001 | BLOCKED (2025-11-19) | Waiting on Excititor chunk API CI validation + console contracts; cannot finalize docs until tests pass and OpenAPI frozen. | Docs Guild (docs/modules/excititor) -EXCITITOR-ENG-0001 | TODO | Update status via ./AGENTS.md workflow | Module Team (docs/modules/excititor) -EXCITITOR-OPS-0001 | TODO | Sync outcomes back to ../.. | Ops Guild (docs/modules/excititor) +This sprint has been renamed to `SPRINT_0333_0001_0001_docs_modules_excititor.md` to comply with the standard template. Update any links accordingly. diff --git a/docs/implplan/SPRINT_334_docs_modules_vuln_explorer.md b/docs/implplan/SPRINT_334_docs_modules_vuln_explorer.md index 1b654880f..5dc8a6065 100644 --- a/docs/implplan/SPRINT_334_docs_modules_vuln_explorer.md +++ b/docs/implplan/SPRINT_334_docs_modules_vuln_explorer.md @@ -1,14 +1,3 @@ -# Sprint 334 - Documentation & Process · 200.X) Docs Modules Vuln Explorer +# Moved sprint file -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.X) Docs Modules Vuln Explorer -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Vuln Explorer). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -VULNERABILITY-EXPLORER-DOCS-0001 | TODO | Validate Vuln Explorer module docs against latest roadmap/releases and add evidence links. | Docs Guild (docs/modules/vuln-explorer) -VULNERABILITY-EXPLORER-OPS-0001 | TODO | Review runbooks/observability assets after next demo. | Ops Guild (docs/modules/vuln-explorer) -VULNERABILITY-EXPLORER-ENG-0001 | TODO | Keep sprint alignment notes in sync with Vuln Explorer sprints. | Module Team (docs/modules/vuln-explorer) -VULNERABILITY-EXPLORER-ENG-0001 | TODO | Sync into ../.. | Module Team (docs/modules/vuln-explorer) -VULNERABILITY-EXPLORER-OPS-0001 | TODO | Document outputs in ./README.md | Ops Guild (docs/modules/vuln-explorer) +This sprint has been renamed to `SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md` to align with the standard naming template. Please update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_335_docs_modules_zastava.md b/docs/implplan/SPRINT_335_docs_modules_zastava.md index fb19db37a..cc5f9f912 100644 --- a/docs/implplan/SPRINT_335_docs_modules_zastava.md +++ b/docs/implplan/SPRINT_335_docs_modules_zastava.md @@ -1,12 +1,3 @@ -# Sprint 335 - Documentation & Process · 200.Y) Docs Modules Zastava +# Moved sprint file -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.Y) Docs Modules Zastava -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Zastava). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -ZASTAVA-DOCS-0001 | TODO | See ./AGENTS.md | Docs Guild (docs/modules/zastava) -ZASTAVA-ENG-0001 | TODO | Update status via ./AGENTS.md workflow | Module Team (docs/modules/zastava) -ZASTAVA-OPS-0001 | TODO | Sync outcomes back to ../.. | Ops Guild (docs/modules/zastava) \ No newline at end of file +This sprint has been renamed to `SPRINT_0335_0001_0001_docs_modules_zastava.md` to align with the standard template. Please update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_503_ops_devops_i.md b/docs/implplan/SPRINT_503_ops_devops_i.md index 470af9c25..7764ab4a4 100644 --- a/docs/implplan/SPRINT_503_ops_devops_i.md +++ b/docs/implplan/SPRINT_503_ops_devops_i.md @@ -25,9 +25,9 @@ Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - A | DEVOPS-AIAI-31-001 | TODO | Stand up CI pipelines, inference monitoring, privacy logging review, and perf dashboards for Advisory AI (summaries/conflicts/remediation). | DevOps Guild, Advisory AI Guild (ops/devops) | | DEVOPS-AIAI-31-002 | BLOCKED (2025-11-23) | Package advisory feeds (SBOM pointers + provenance) for release/offline kit; publish once CLI/Policy digests and SBOM feeds arrive. | DevOps Guild, Advisory AI Release (ops/devops) | | DEVOPS-SPANSINK-31-003 | TODO | Deploy span sink/Signals pipeline for Excititor evidence APIs (31-003) and publish dashboards; unblock traces for `/v1/vex/observations/**`. | DevOps Guild · Observability Guild (ops/devops) | -| DEVOPS-AIRGAP-56-001 | TODO | Ship deny-all egress policies for Kubernetes (NetworkPolicy/eBPF) and docker-compose firewall rules; provide verification script for sealed mode. | DevOps Guild (ops/devops) | -| DEVOPS-AIRGAP-56-002 | TODO | Provide import tooling for bundle staging: checksum validation, offline object-store loader scripts, removable media guidance. Dependencies: DEVOPS-AIRGAP-56-001. | DevOps Guild, AirGap Importer Guild (ops/devops) | -| DEVOPS-AIRGAP-56-003 | TODO | Build Bootstrap Pack pipeline bundling images/charts, generating checksums, and publishing manifest for offline transfer. Dependencies: DEVOPS-AIRGAP-56-002. | DevOps Guild, Container Distribution Guild (ops/devops) | +| DEVOPS-AIRGAP-56-001 | DONE (2025-11-30) | Ship deny-all egress policies for Kubernetes (NetworkPolicy/eBPF) and docker-compose firewall rules; provide verification script for sealed mode. | DevOps Guild (ops/devops) | +| DEVOPS-AIRGAP-56-002 | DONE (2025-11-30) | Provide import tooling for bundle staging: checksum validation, offline object-store loader scripts, removable media guidance. Dependencies: DEVOPS-AIRGAP-56-001. | DevOps Guild, AirGap Importer Guild (ops/devops) | +| DEVOPS-AIRGAP-56-003 | DONE (2025-11-30) | Build Bootstrap Pack pipeline bundling images/charts, generating checksums, and publishing manifest for offline transfer. Dependencies: DEVOPS-AIRGAP-56-002. | DevOps Guild, Container Distribution Guild (ops/devops) | | DEVOPS-AIRGAP-57-001 | TODO | Automate Mirror Bundle creation jobs with dual-control approvals, artifact signing, and checksum publication. Dependencies: DEVOPS-AIRGAP-56-003. | DevOps Guild, Mirror Creator Guild (ops/devops) | | DEVOPS-AIRGAP-57-002 | BLOCKED (2025-11-18) | Waiting on upstream DEVOPS-AIRGAP-57-001 (mirror bundle automation) to provide artifacts/endpoints for sealed-mode CI; no sealed fixtures available to exercise tests. | DevOps Guild, Authority Guild (ops/devops) | | DEVOPS-AIRGAP-58-001 | TODO | Provide local SMTP/syslog container templates and health checks for sealed environments; integrate into Bootstrap Pack. Dependencies: DEVOPS-AIRGAP-57-002. | DevOps Guild, Notifications Guild (ops/devops) | @@ -54,6 +54,9 @@ Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - A ## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | +| 2025-11-30 | Completed DEVOPS-AIRGAP-56-003: added Bootstrap Pack builder scripts (`build_bootstrap_pack.py`, `build_bootstrap_pack.sh`) producing manifest and checksums for images/charts/extras; docs updated in `ops/devops/airgap/README.md`. | DevOps | +| 2025-11-30 | Completed DEVOPS-AIRGAP-56-002: added bundle staging/import tooling (`bundle_stage_import.py`, `stage-bundle.sh`, README) under `ops/devops/airgap/` with checksum validation and evidence report output. | DevOps | +| 2025-11-30 | Completed DEVOPS-AIRGAP-56-001: added K8s deny-all egress NetworkPolicy, compose DOCKER-USER guard script, and verification harness for Docker/Kubernetes under `ops/devops/airgap/`. | DevOps | | 2025-11-25 | Delivered Concelier CI runner harness (`ops/devops/concelier-ci-runner/run-concelier-ci.sh`) with warmed NuGet cache + TRX/binlogs; artefacts land under `ops/devops/artifacts/concelier-ci/`. | DevOps | | 2025-11-25 | Local execution of the runner still hits MSBuild worker shutdown on this host (MSB4242); script is ready, but a clean CI agent should be used to produce TRX/binlogs. | DevOps | | 2025-11-23 | Normalised sprint toward template (sections added); added DEVOPS-CONCELIER-CI-24-101, DEVOPS-SCANNER-CI-11-001, DEVOPS-SBOM-23-001 to absorb CI/restore blockers from module sprints. | Project Mgmt | diff --git a/docs/implplan/archived/all-tasks.md b/docs/implplan/archived/all-tasks.md index 997f83377..bb09a8ac2 100644 --- a/docs/implplan/archived/all-tasks.md +++ b/docs/implplan/archived/all-tasks.md @@ -1046,7 +1046,7 @@ Consolidated task ledger for everything under `docs/implplan/archived/` (sprints | docs/implplan/archived/updates/tasks.md | Sprint 41 — CLI Parity & Task Packs Phase 1 | CLI-PARITY-41-001 | TODO | Deliver parity command groups (`policy`, `sbom`, `vuln`, `vex`, `advisory`, `export`, `orchestrator`) with JSON/table outputs and `--explain`. | DevEx/CLI Guild | Path: src/Cli/StellaOps.Cli | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 41 — CLI Parity & Task Packs Phase 1 | CLI-PARITY-41-002 | TODO | Implement `notify`, `aoc`, `auth` command groups, idempotency keys, completions, and parity matrix export. | DevEx/CLI Guild | Path: src/Cli/StellaOps.Cli | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 41 — CLI Parity & Task Packs Phase 1 | ORCH-SVC-41-101 | TODO | Register `pack-run` job type, integrate logs/artifacts, expose pack run metadata. | Orchestrator Service Guild | Path: src/Orchestrator/StellaOps.Orchestrator | 2025-10-19 | -| docs/implplan/archived/updates/tasks.md | Sprint 41 — CLI Parity & Task Packs Phase 1 | PACKS-REG-41-001 | TODO | Implement packs index API, signature verification, provenance storage, and RBAC. | Packs Registry Guild | Path: src/PacksRegistry/StellaOps.PacksRegistry | 2025-10-19 | +| docs/implplan/archived/updates/tasks.md | Sprint 41 — CLI Parity & Task Packs Phase 1 | PACKS-REG-41-001 | DONE (2025-11-25) | Implement packs index API, signature verification, provenance storage, and RBAC. | Packs Registry Guild | Path: src/PacksRegistry/StellaOps.PacksRegistry | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 41 — CLI Parity & Task Packs Phase 1 | TASKRUN-41-001 | TODO | Bootstrap Task Runner service, migrations, run API, local executor, approvals pause, artifact capture. | Task Runner Guild | Path: src/TaskRunner/StellaOps.TaskRunner | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | DOCS-CLI-42-001 | TODO | Publish `/docs/modules/cli/guides/parity-matrix.md`, `/cli/commands/*.md`, `/docs/task-packs/spec.md` (imposed rule). | Docs Guild | Path: docs | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | DEVOPS-CLI-42-001 | TODO | Add CLI golden output tests, parity diff automation, and pack run CI harness. | DevOps Guild | Path: ops/devops | 2025-10-19 | @@ -1054,7 +1054,7 @@ Consolidated task ledger for everything under `docs/implplan/archived/` (sprints | docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | CLI-PARITY-41-001..002 | TODO | Close parity gaps for Notifications, Policy Studio advanced features, SBOM graph, Vuln Explorer; parity matrix green. | DevEx/CLI Guild | Path: src/Cli/StellaOps.Cli | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | LEDGER-PACKS-42-001 | TODO | Expose snapshot/time-travel APIs for CLI offline mode and pack simulation. | Findings Ledger Guild | Path: src/Findings/StellaOps.Findings.Ledger | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | ORCH-SVC-42-101 | TODO | Stream pack run logs via SSE/WS, expose artifact manifests, enforce pack run quotas. | Orchestrator Service Guild | Path: src/Orchestrator/StellaOps.Orchestrator | 2025-10-19 | -| docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | PACKS-REG-42-001 | TODO | Support pack version lifecycle, tenant allowlists, provenance export, signature rotation. | Packs Registry Guild | Path: src/PacksRegistry/StellaOps.PacksRegistry | 2025-10-19 | +| docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | PACKS-REG-42-001 | DONE (2025-11-25) | Support pack version lifecycle, tenant allowlists, provenance export, signature rotation. | Packs Registry Guild | Path: src/PacksRegistry/StellaOps.PacksRegistry | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | POLICY-ENGINE-42-201 | TODO | Provide stable rationale IDs/APIs for CLI `--explain` and pack policy gates. | Policy Guild | Path: src/Policy/StellaOps.Policy.Engine | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 42 — CLI Parity & Task Packs Phase 2 | TASKRUN-42-001 | TODO | Add loops, conditionals, `maxParallel`, outputs, simulation mode, policy gates in Task Runner. | Task Runner Guild | Path: src/TaskRunner/StellaOps.TaskRunner | 2025-10-19 | | docs/implplan/archived/updates/tasks.md | Sprint 43 — CLI Parity & Task Packs Phase 3 | DOCS-PACKS-43-001 | TODO | Publish `/docs/task-packs/authoring-guide.md`, `/registry.md`, `/runbook.md`, `/security/pack-signing-and-rbac.md`, `/operations/cli-release-and-packaging.md` (imposed rule). | Docs Guild | Path: docs | 2025-10-19 | diff --git a/docs/implplan/archived/updates/tasks.md b/docs/implplan/archived/updates/tasks.md index 5d7f2fe66..96762d338 100644 --- a/docs/implplan/archived/updates/tasks.md +++ b/docs/implplan/archived/updates/tasks.md @@ -1083,7 +1083,7 @@ This file describe implementation of Stella Ops (docs/README.md). Implementation | Sprint 41 | CLI Parity & Task Packs Phase 1 | src/Cli/StellaOps.Cli | TODO | DevEx/CLI Guild | CLI-PARITY-41-001 | Deliver parity command groups (`policy`, `sbom`, `vuln`, `vex`, `advisory`, `export`, `orchestrator`) with JSON/table outputs and `--explain`. | | Sprint 41 | CLI Parity & Task Packs Phase 1 | src/Cli/StellaOps.Cli | TODO | DevEx/CLI Guild | CLI-PARITY-41-002 | Implement `notify`, `aoc`, `auth` command groups, idempotency keys, completions, and parity matrix export. | | Sprint 41 | CLI Parity & Task Packs Phase 1 | src/Orchestrator/StellaOps.Orchestrator | TODO | Orchestrator Service Guild | ORCH-SVC-41-101 | Register `pack-run` job type, integrate logs/artifacts, expose pack run metadata. | -| Sprint 41 | CLI Parity & Task Packs Phase 1 | src/PacksRegistry/StellaOps.PacksRegistry | TODO | Packs Registry Guild | PACKS-REG-41-001 | Implement packs index API, signature verification, provenance storage, and RBAC. | +| Sprint 41 | CLI Parity & Task Packs Phase 1 | src/PacksRegistry/StellaOps.PacksRegistry | DONE (2025-11-25) | Packs Registry Guild | PACKS-REG-41-001 | Implement packs index API, signature verification, provenance storage, and RBAC. | | Sprint 41 | CLI Parity & Task Packs Phase 1 | src/TaskRunner/StellaOps.TaskRunner | TODO | Task Runner Guild | TASKRUN-41-001 | Bootstrap Task Runner service, migrations, run API, local executor, approvals pause, artifact capture. | | Sprint 42 | CLI Parity & Task Packs Phase 2 | docs | TODO | Docs Guild | DOCS-CLI-42-001 | Publish `/docs/modules/cli/guides/parity-matrix.md`, `/cli/commands/*.md`, `/docs/task-packs/spec.md` (imposed rule). | | Sprint 42 | CLI Parity & Task Packs Phase 2 | ops/devops | TODO | DevOps Guild | DEVOPS-CLI-42-001 | Add CLI golden output tests, parity diff automation, and pack run CI harness. | @@ -1091,7 +1091,7 @@ This file describe implementation of Stella Ops (docs/README.md). Implementation | Sprint 42 | CLI Parity & Task Packs Phase 2 | src/Cli/StellaOps.Cli | TODO | DevEx/CLI Guild | CLI-PARITY-41-001..002 | Close parity gaps for Notifications, Policy Studio advanced features, SBOM graph, Vuln Explorer; parity matrix green. | | Sprint 42 | CLI Parity & Task Packs Phase 2 | src/Findings/StellaOps.Findings.Ledger | TODO | Findings Ledger Guild | LEDGER-PACKS-42-001 | Expose snapshot/time-travel APIs for CLI offline mode and pack simulation. | | Sprint 42 | CLI Parity & Task Packs Phase 2 | src/Orchestrator/StellaOps.Orchestrator | TODO | Orchestrator Service Guild | ORCH-SVC-42-101 | Stream pack run logs via SSE/WS, expose artifact manifests, enforce pack run quotas. | -| Sprint 42 | CLI Parity & Task Packs Phase 2 | src/PacksRegistry/StellaOps.PacksRegistry | TODO | Packs Registry Guild | PACKS-REG-42-001 | Support pack version lifecycle, tenant allowlists, provenance export, signature rotation. | +| Sprint 42 | CLI Parity & Task Packs Phase 2 | src/PacksRegistry/StellaOps.PacksRegistry | DONE (2025-11-25) | Packs Registry Guild | PACKS-REG-42-001 | Support pack version lifecycle, tenant allowlists, provenance export, signature rotation. | | Sprint 42 | CLI Parity & Task Packs Phase 2 | src/Policy/StellaOps.Policy.Engine | TODO | Policy Guild | POLICY-ENGINE-42-201 | Provide stable rationale IDs/APIs for CLI `--explain` and pack policy gates. | | Sprint 42 | CLI Parity & Task Packs Phase 2 | src/TaskRunner/StellaOps.TaskRunner | TODO | Task Runner Guild | TASKRUN-42-001 | Add loops, conditionals, `maxParallel`, outputs, simulation mode, policy gates in Task Runner. | | Sprint 43 | CLI Parity & Task Packs Phase 3 | docs | TODO | Docs Guild | DOCS-PACKS-43-001 | Publish `/docs/task-packs/authoring-guide.md`, `/registry.md`, `/runbook.md`, `/security/pack-signing-and-rbac.md`, `/operations/cli-release-and-packaging.md` (imposed rule). | @@ -1099,7 +1099,7 @@ This file describe implementation of Stella Ops (docs/README.md). Implementation | Sprint 43 | CLI Parity & Task Packs Phase 3 | src/Authority/StellaOps.Authority | TODO | Authority Core & Security Guild | AUTH-PACKS-41-001 | Enforce pack signing policies, approval RBAC, CLI token scopes for CI headless runs. | | Sprint 43 | CLI Parity & Task Packs Phase 3 | src/Cli/StellaOps.Cli | TODO | DevEx/CLI Guild | CLI-PACKS-42-001 | Deliver advanced pack features (approvals pause/resume, remote streaming, secret injection), localization, man pages. | | Sprint 43 | CLI Parity & Task Packs Phase 3 | src/ExportCenter/StellaOps.ExportCenter | TODO | Exporter Service Guild | EXPORT-SVC-35-005, PACKS-REG-41-001 | Integrate pack run manifests into export bundles and CLI verify flows. | -| Sprint 43 | CLI Parity & Task Packs Phase 3 | src/PacksRegistry/StellaOps.PacksRegistry | TODO | Packs Registry Guild | PACKS-REG-42-001 | Enforce pack signing policies, audit trails, registry mirroring, Offline Kit support. | +| Sprint 43 | CLI Parity & Task Packs Phase 3 | src/PacksRegistry/StellaOps.PacksRegistry | DONE (2025-11-25) | Packs Registry Guild | PACKS-REG-42-001 | Enforce pack signing policies, audit trails, registry mirroring, Offline Kit support. | | Sprint 43 | CLI Parity & Task Packs Phase 3 | src/TaskRunner/StellaOps.TaskRunner | TODO | Task Runner Guild | TASKRUN-42-001 | Implement approvals workflow, notifications integration, remote artifact uploads, chaos resilience. | | Sprint 44 | Containerized Distribution Phase 1 | docs | TODO | Docs Guild | DOCS-INSTALL-44-001 | Publish install overview + Compose Quickstart docs (imposed rule). | | Sprint 44 | Containerized Distribution Phase 1 | ops/deployment | TODO | Deployment Guild | COMPOSE-44-001 | Deliver Quickstart Compose stack with seed data and quickstart script. | diff --git a/docs/implplan/blocked_tree.md b/docs/implplan/blocked_tree.md index 616374aa7..8004a5c6b 100644 --- a/docs/implplan/blocked_tree.md +++ b/docs/implplan/blocked_tree.md @@ -1,4 +1,4 @@ -# Blocked Task Dependency Tree (as of 2025-11-25) +# Blocked Task Dependency Tree (as of 2025-11-30) - Concelier ingestion & Link-Not-Merge - MIRROR-CRT-56-001 (DONE; thin bundle v1 sample + hashes published) @@ -29,7 +29,10 @@ - VEX Lens chain (Sprint 0129) - VEXLENS-30-001 blocked: normalization schema, issuer directory inputs, and API governance guidance not published. - TaskRunner chain (Sprint 0157) - - TASKRUN-41-001 blocked: TaskRunner architecture/API contract and upstream Sprint 120/130/140 inputs not published; downstream airgap/OAS/OBS tasks inherit the block. + - TASKRUN-41-001 DONE (2025-11-30): contract implemented (run API, storage indexes, approvals, provenance manifest). Downstream airgap/OAS/OBS tasks now wait only on control-flow/policy spec addendum. + - TASKRUN-OBS-54-001 BLOCKED (2025-11-30): waiting on TASKRUN-OBS-53-001 timeline/attestation schema from Sprint 0157. + - TASKRUN-OBS-55-001 BLOCKED (2025-11-30): depends on 54-001. + - TASKRUN-TEN-48-001 BLOCKED (2025-11-30): tenancy policy/RLS-egress contract not yet published; also waits for Sprint 0157 close-out. - CONCELIER-VULN-29-004 <- CONCELIER-VULN-29-001 - CONCELIER-ORCH-32-001 (needs CI/clean runner) -> 32-002 -> 33-001 -> 34-001 - CONCELIER mirror/export chain diff --git a/docs/implplan/tasks-all.md b/docs/implplan/tasks-all.md index 8d4ee0e4d..390698910 100644 --- a/docs/implplan/tasks-all.md +++ b/docs/implplan/tasks-all.md @@ -52,7 +52,7 @@ | 34-101 | DONE | 2025-11-22 | SPRINT_0120_0000_0001_policy_reasoning | Findings Ledger Guild | src/Findings/StellaOps.Findings.Ledger | 29-009 | LEDGER-29-009 | PLLG0104 | | 401-004 | BLOCKED | 2025-11-25 | SPRINT_0401_0001_0001_reachability_evidence_chain | Replay Core Guild | `src/__Libraries/StellaOps.Replay.Core` | Signals facts stable (SGSI0101) | Blocked: awaiting SGSI0101 runtime facts + CAS policy from GAP-REP-004 | RPRC0101 | | BENCH-DETERMINISM-401-057 | DONE (2025-11-27) | 2025-11-27 | SPRINT_0512_0001_0001_bench | Bench Guild · Signals Guild · Policy Guild | src/Bench/StellaOps.Bench/Determinism | Determinism harness + mock scanner; manifests/results generated; CI workflow `bench-determinism` enforces threshold; defaults to 10 runs; supports frozen feed manifests via DET_EXTRA_INPUTS; offline runner available. | Feed-freeze hash + SBOM/VEX bundle list (SPRINT_0401) | | -| 41-001 | BLOCKED | 2025-11-25 | SPRINT_157_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | — | Awaiting TaskRunner architecture/API contract; upstream Sprint 120/130/140 inputs | ORTR0101 | +| 41-001 | DONE (2025-11-30) | 2025-11-30 | SPRINT_157_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | — | Contract implemented per `docs/modules/taskrunner/architecture.md`; run API/storage/provenance ready. | ORTR0101 | | 44-001 | BLOCKED | 2025-11-25 | SPRINT_501_ops_deployment_i | Deployment Guild · DevEx Guild (ops/deployment) | ops/deployment | — | Waiting on consolidated service list/version pins from upstream module releases (mirrors Compose-44-001 block) | DVDO0103 | | 44-002 | BLOCKED | 2025-11-25 | SPRINT_501_ops_deployment_i | Deployment Guild (ops/deployment) | ops/deployment | 44-001 | Blocked until 44-001 unblocks | DVDO0103 | | 44-003 | BLOCKED | 2025-11-25 | SPRINT_501_ops_deployment_i | Deployment Guild · Docs Guild (ops/deployment) | ops/deployment | 44-002 | Blocked until 44-002 unblocks | DVDO0103 | @@ -1321,9 +1321,9 @@ | PACKS-42-001 | TODO | | SPRINT_0121_0001_0001_policy_reasoning | Findings Ledger Guild | src/Findings/StellaOps.Findings.Ledger | Provide snapshot/time-travel APIs and digestable exports for Task Pack simulation + CLI offline mode. | PLLG0103 | PKLD0101 | | PACKS-43-001 | DONE | 2025-11-09 | SPRINT_100_identity_signing | Packs Guild · Authority Guild | src/Authority/StellaOps.Authority | Finalized Pack release 43 (signing, release notes, artefacts). | AUTH-PACKS-41-001; TASKRUN-42-001; ORCH-SVC-42-101 | PACK0101 | | PACKS-43-002 | TODO | | SPRINT_508_ops_offline_kit | Offline Kit Guild, Packs Registry Guild (ops/offline-kit) | ops/offline-kit | Bundle packs registry artifacts, runbooks, and verification docs into Offline Kit release 43. | OFFLINE-37-001 | OFFK0101 | -| PACKS-REG-41-001 | TODO | | SPRINT_154_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Implement registry API/storage, version lifecycle, provenance export. | ORCH-SVC-42-101 | PKRG0101 | -| PACKS-REG-42-001 | TODO | | SPRINT_154_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Add tenant allowlists, signature rotation, audit logs, Offline Kit seed support. | PACKS-REG-41-001 | PKRG0101 | -| PACKS-REG-43-001 | TODO | | SPRINT_154_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Implement mirroring, pack signing policies, compliance dashboards, Export Center integration. | PACKS-REG-42-001 | PKRG0101 | +| PACKS-REG-41-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0154_0001_0001_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Implement registry API/storage, version lifecycle, provenance export. | ORCH-SVC-42-101 | PKRG0101 | +| PACKS-REG-42-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0154_0001_0001_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Add tenant allowlists, signature rotation, audit logs, Offline Kit seed support. | PACKS-REG-41-001 | PKRG0101 | +| PACKS-REG-43-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0154_0001_0001_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Implement mirroring, pack signing policies, compliance dashboards, Export Center integration. | PACKS-REG-42-001 | PKRG0101 | | PARITY-41-001 | TODO | | SPRINT_203_cli_iii | DevEx/CLI Guild | src/Cli/StellaOps.Cli | Ensure CLI HTTP client propagates `traceparent` headers for all commands, prints correlation IDs on failure, and records trace IDs in verbose logs. | NOWB0101 | CLPR0101 | | PARITY-41-002 | TODO | | SPRINT_203_cli_iii | DevEx/CLI Guild | src/Cli/StellaOps.Cli | Add parity tests ensuring CLI outputs match notifier/web error formats and capture verification docs. | PARITY-41-001 | CLPR0101 | | PLATFORM-DOCS-0001 | TODO | | SPRINT_324_docs_modules_platform | Docs Guild | docs/modules/platform | Refresh architecture/gov doc per new sprint planning rules. | execution-waves.md | DOPF0101 | @@ -1933,23 +1933,23 @@ | SYMS-CLIENT-401-012 | TODO | | SPRINT_0401_0001_0001_reachability_evidence_chain | Symbols Guild · Scanner Guild | `src/Symbols/StellaOps.Symbols.Client`, `src/Scanner/StellaOps.Scanner.Symbolizer` | Ship `StellaOps.Symbols.Client` SDK (resolve/upload APIs, platform key derivation for ELF/PDB/Mach-O/JVM/Node, disk LRU cache) and integrate with Scanner.Symbolizer/runtime probes (ref. `docs/specs/SYMBOL_MANIFEST_v1.md`). | Depends on #3 | RBSY0101 | | SYMS-INGEST-401-013 | TODO | | SPRINT_0401_0001_0001_reachability_evidence_chain | Symbols Guild · DevOps Guild | `src/Symbols/StellaOps.Symbols.Ingestor.Cli`, `docs/specs/SYMBOL_MANIFEST_v1.md` | Build `symbols ingest` CLI to emit DSSE-signed `SymbolManifest v1`, upload blobs, and register Rekor entries; document GitLab/Gitea pipeline usage. | Needs manifest updates from #1 | RBSY0101 | | SYMS-SERVER-401-011 | TODO | | SPRINT_0401_0001_0001_reachability_evidence_chain | Symbols Guild | `src/Symbols/StellaOps.Symbols.Server` | Deliver `StellaOps.Symbols.Server` (REST+gRPC) with DSSE-verified uploads, Mongo/MinIO storage, tenant isolation, and deterministic debugId indexing; publish health/manifest APIs (spec: `docs/specs/SYMBOL_MANIFEST_v1.md`). | Depends on #5 | RBSY0101 | -| TASKRUN-41-001 | BLOCKED | 2025-11-25 | SPRINT_0157_0001_0002_taskrunner_blockers | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Bootstrap service, define migrations for `pack_runs`, `pack_run_logs`, `pack_artifacts`, implement run API (create/get/log stream), local executor, approvals pause, artifact capture, and provenance manifest generation. | Missing TaskRunner architecture/API contracts; needs Sprint 120/130/140 inputs | ORTR0101 | -| TASKRUN-AIRGAP-56-001 | TODO | | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · AirGap Policy Guild | src/TaskRunner/StellaOps.TaskRunner | Enforce plan-time validation rejecting steps with non-allowlisted network calls in sealed mode and surface remediation errors. | TASKRUN-41-001 | ORTR0101 | -| TASKRUN-AIRGAP-56-002 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · AirGap Importer Guild | src/TaskRunner/StellaOps.TaskRunner | Add helper steps for bundle ingestion (checksum verification, staging to object store) with deterministic outputs. Dependencies: TASKRUN-AIRGAP-56-001. | TASKRUN-AIRGAP-56-001 | ORTR0101 | -| TASKRUN-AIRGAP-57-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · AirGap Controller Guild | src/TaskRunner/StellaOps.TaskRunner | Refuse to execute plans when environment sealed=false but declared sealed install; emit advisory timeline events. Dependencies: TASKRUN-AIRGAP-56-002. | TASKRUN-AIRGAP-56-002 | ORTR0101 | -| TASKRUN-AIRGAP-58-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · Evidence Locker Guild | src/TaskRunner/StellaOps.TaskRunner | Capture bundle import job transcripts, hashed inputs, and outputs into portable evidence bundles. Dependencies: TASKRUN-AIRGAP-57-001. | TASKRUN-AIRGAP-57-001 | ORTR0101 | +| TASKRUN-41-001 | BLOCKED (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0002_taskrunner_blockers | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Bootstrap service, define migrations for `pack_runs`, `pack_run_logs`, `pack_artifacts`, implement run API (create/get/log stream), local executor, approvals pause, artifact capture, and provenance manifest generation. | Missing TaskRunner architecture/API contract (Sprints 120/130/140). | ORTR0101 | +| TASKRUN-AIRGAP-56-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · AirGap Policy Guild | src/TaskRunner/StellaOps.TaskRunner | Enforce plan-time validation rejecting steps with non-allowlisted network calls in sealed mode and surface remediation errors. | TASKRUN-41-001 | ORTR0101 | +| TASKRUN-AIRGAP-56-002 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · AirGap Importer Guild | src/TaskRunner/StellaOps.TaskRunner | Add helper steps for bundle ingestion (checksum verification, staging to object store) with deterministic outputs. | TASKRUN-AIRGAP-56-001 | ORTR0101 | +| TASKRUN-AIRGAP-57-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · AirGap Controller Guild | src/TaskRunner/StellaOps.TaskRunner | Refuse to execute plans when environment sealed=false but declared sealed install; emit advisory timeline events. | TASKRUN-AIRGAP-56-002 | ORTR0101 | +| TASKRUN-AIRGAP-58-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · Evidence Locker Guild | src/TaskRunner/StellaOps.TaskRunner | Capture bundle import job transcripts, hashed inputs, and outputs into portable evidence bundles. | TASKRUN-AIRGAP-57-001 | ORTR0101 | | TASKRUN-42-001 | BLOCKED (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild (`src/TaskRunner/StellaOps.TaskRunner`) | src/TaskRunner/StellaOps.TaskRunner | Execution engine enhancements (loops/conditionals/maxParallel), simulation mode, policy gate integration, deterministic failure recovery. Blocked: loop/conditional semantics and policy-gate evaluation contract not published. | | ORTR0102 | -| TASKRUN-OAS-61-001 | TODO | | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · API Contracts Guild | src/TaskRunner/StellaOps.TaskRunner | Document Task Runner APIs (pack runs, logs, approvals) in service OAS, including streaming response schemas and examples. | TASKRUN-41-001 | ORTR0101 | -| TASKRUN-OAS-61-002 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Expose `GET /.well-known/openapi` returning signed spec metadata, build version, and ETag. Dependencies: TASKRUN-OAS-61-001. | TASKRUN-OAS-61-001 | ORTR0101 | -| TASKRUN-OAS-62-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · SDK Generator Guild | src/TaskRunner/StellaOps.TaskRunner | Provide SDK examples for pack run lifecycle; ensure SDKs offer streaming log helpers and paginator wrappers. Dependencies: TASKRUN-OAS-61-002. | TASKRUN-OAS-61-002 | ORTR0102 | -| TASKRUN-OAS-63-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · API Governance Guild | src/TaskRunner/StellaOps.TaskRunner | Implement deprecation header support and Sunset handling for legacy pack APIs; emit notifications metadata. Dependencies: TASKRUN-OAS-62-001. | TASKRUN-OAS-62-001 | ORTR0102 | +| TASKRUN-OAS-61-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · API Contracts Guild | src/TaskRunner/StellaOps.TaskRunner | Document Task Runner APIs (pack runs, logs, approvals) in service OAS, including streaming response schemas and examples. | TASKRUN-41-001 | ORTR0101 | +| TASKRUN-OAS-61-002 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Expose `GET /.well-known/openapi` returning signed spec metadata, build version, and ETag. | TASKRUN-OAS-61-001 | ORTR0101 | +| TASKRUN-OAS-62-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · SDK Generator Guild | src/TaskRunner/StellaOps.TaskRunner | Provide SDK examples for pack run lifecycle; ensure SDKs offer streaming log helpers and paginator wrappers. | TASKRUN-OAS-61-002 | ORTR0102 | +| TASKRUN-OAS-63-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · API Governance Guild | src/TaskRunner/StellaOps.TaskRunner | Implement deprecation header support and Sunset handling for legacy pack APIs; emit notifications metadata. | TASKRUN-OAS-62-001 | ORTR0102 | | TASKRUN-OBS-50-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Adopt telemetry core in Task Runner host + worker executors, ensuring step execution spans/logs include `trace_id`, `tenant_id`, `run_id`, and scrubbed command transcripts. | ORTR0101 telemetry hooks | ORTR0102 | | TASKRUN-OBS-51-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · DevOps Guild | src/TaskRunner/StellaOps.TaskRunner | Emit metrics for step latency, retries, queue depth, sandbox resource usage; define SLOs for pack run completion and failure rate; surface burn-rate alerts to collector/Notifier. Dependencies: TASKRUN-OBS-50-001. | TASKRUN-OBS-50-001 | ORTR0102 | -| TASKRUN-OBS-52-001 | BLOCKED (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Produce timeline events for pack runs (`pack.started`, `pack.step.completed`, `pack.failed`) containing evidence pointers and policy gate context. Provide dedupe + retry logic. Blocked: timeline event schema and evidence-pointer contract not published. Dependencies: TASKRUN-OBS-51-001. | TASKRUN-OBS-50-001 | ORTR0102 | -| TASKRUN-OBS-53-001 | BLOCKED (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · Evidence Locker Guild | src/TaskRunner/StellaOps.TaskRunner | Capture step transcripts, artifact manifests, environment digests, and policy approvals into evidence locker snapshots; ensure redaction + hash chain coverage. Blocked: waiting on timeline schema/evidence-pointer contract (OBS-52-001). Dependencies: TASKRUN-OBS-52-001. | TASKRUN-OBS-52-001 | ORTR0102 | -| TASKRUN-OBS-54-001 | TODO | | SPRINT_158_taskrunner_ii | Task Runner Guild · Provenance Guild | src/TaskRunner/StellaOps.TaskRunner | Generate DSSE attestations for pack runs (subjects = produced artifacts) and expose verification API/CLI integration. Store references in timeline events. Dependencies: TASKRUN-OBS-53-001. | TASKRUN-OBS-53-001 | ORTR0102 | -| TASKRUN-OBS-55-001 | TODO | | SPRINT_158_taskrunner_ii | Task Runner Guild · DevOps Guild | src/TaskRunner/StellaOps.TaskRunner | Implement incident mode escalations (extra telemetry, debug artifact capture, retention bump) and align on automatic activation via SLO breach webhooks. Dependencies: TASKRUN-OBS-54-001. | TASKRUN-OBS-54-001 | ORTR0102 | -| TASKRUN-TEN-48-001 | TODO | | SPRINT_158_taskrunner_ii | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Require tenant/project context for every pack run, set DB/object-store prefixes, block egress when tenant restricted, and propagate context to steps/logs. | TASKRUN-AIRGAP-58-001 | ORTR0101 | +| TASKRUN-OBS-52-001 | BLOCKED (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Produce timeline events for pack runs (`pack.started`, `pack.step.completed`, `pack.failed`) containing evidence pointers and policy gate context. Provide dedupe + retry logic. Blocked: timeline event schema and evidence-pointer contract not published. | TASKRUN-OBS-51-001 | ORTR0102 | +| TASKRUN-OBS-53-001 | BLOCKED (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · Evidence Locker Guild | src/TaskRunner/StellaOps.TaskRunner | Capture step transcripts, artifact manifests, environment digests, and policy approvals into evidence locker snapshots; ensure redaction + hash chain coverage. Blocked: waiting on timeline schema/evidence-pointer contract (OBS-52-001). | TASKRUN-OBS-52-001 | ORTR0102 | +| TASKRUN-OBS-54-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0158_0001_0002_taskrunner_ii | Task Runner Guild · Provenance Guild | src/TaskRunner/StellaOps.TaskRunner | Generate DSSE attestations for pack runs (subjects = produced artifacts) and expose verification API/CLI integration. Store references in timeline events. | TASKRUN-OBS-53-001 | ORTR0102 | +| TASKRUN-OBS-55-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0158_0001_0002_taskrunner_ii | Task Runner Guild · DevOps Guild | src/TaskRunner/StellaOps.TaskRunner | Implement incident mode escalations (extra telemetry, debug artifact capture, retention bump) and align on automatic activation via SLO breach webhooks. | TASKRUN-OBS-54-001 | ORTR0102 | +| TASKRUN-TEN-48-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0158_0001_0002_taskrunner_ii | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Require tenant/project context for every pack run, set DB/object-store prefixes, block egress when tenant restricted, and propagate context to steps/logs. | TASKRUN-OBS-53-001; Tenancy policy contract | ORTR0101 | | TELEMETRY-DOCS-0001 | TODO | | SPRINT_330_docs_modules_telemetry | Docs Guild | docs/modules/telemetry | Validate that telemetry module docs reflect the new storage stack and isolation rules. | Ops checklist from DVDO0103 | DOTL0101 | | TELEMETRY-DOCS-0001 | TODO | | SPRINT_330_docs_modules_telemetry | Docs Guild | docs/modules/telemetry | Validate that telemetry module docs reflect the new storage stack and isolation rules. | Ops checklist from DVDO0103 | DOTL0101 | | TELEMETRY-ENG-0001 | TODO | | SPRINT_330_docs_modules_telemetry | Module Team | docs/modules/telemetry | Ensure milestones stay in sync with telemetry sprints in `docs/implplan`. | TLTY0101 API review | DOTL0101 | @@ -2264,7 +2264,7 @@ | 31-009 | DONE | 2025-11-12 | SPRINT_110_ingestion_evidence | Advisory AI Guild | src/AdvisoryAI/StellaOps.AdvisoryAI | — | — | ADAI0101 | | 34-101 | DONE | 2025-11-22 | SPRINT_0120_0000_0001_policy_reasoning | Findings Ledger Guild | src/Findings/StellaOps.Findings.Ledger | 29-009 | LEDGER-29-009 | PLLG0104 | | 401-004 | BLOCKED | 2025-11-25 | SPRINT_0401_0001_0001_reachability_evidence_chain | Replay Core Guild | `src/__Libraries/StellaOps.Replay.Core` | Signals facts stable (SGSI0101) | Blocked: awaiting SGSI0101 runtime facts + CAS policy from GAP-REP-004 | RPRC0101 | -| 41-001 | BLOCKED | 2025-11-25 | SPRINT_157_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | — | Awaiting TaskRunner architecture/API contract; upstream Sprint 120/130/140 inputs | ORTR0101 | +| 41-001 | TODO | 2025-11-30 | SPRINT_157_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | — | Contract landed via product advisory 2025-11-29; implement per `docs/modules/taskrunner/architecture.md`. | ORTR0101 | | 44-001 | TODO | | SPRINT_501_ops_deployment_i | Deployment Guild · DevEx Guild (ops/deployment) | ops/deployment | — | — | DVDO0103 | | 44-002 | TODO | | SPRINT_501_ops_deployment_i | Deployment Guild (ops/deployment) | ops/deployment | 44-001 | 44-001 | DVDO0103 | | 44-003 | TODO | | SPRINT_501_ops_deployment_i | Deployment Guild · Docs Guild (ops/deployment) | ops/deployment | 44-002 | 44-002 | DVDO0103 | @@ -3535,9 +3535,9 @@ | PACKS-42-001 | TODO | | SPRINT_0121_0001_0001_policy_reasoning | Findings Ledger Guild | src/Findings/StellaOps.Findings.Ledger | Provide snapshot/time-travel APIs, digestable exports for pack simulation + CLI offline mode. | Needs ORSC0104 event IDs | | | PACKS-43-001 | DONE | 2025-11-09 | SPRINT_100_identity_signing | Packs Guild · Authority Guild | src/Authority/StellaOps.Authority | Canonical pack bundle + docs for release 43. | AUTH-PACKS-41-001; TASKRUN-42-001; ORCH-SVC-42-101 | | | PACKS-43-002 | TODO | | SPRINT_508_ops_offline_kit | Offline Kit Guild, Packs Registry Guild (ops/offline-kit) | ops/offline-kit | | | | -| PACKS-REG-41-001 | TODO | | SPRINT_154_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Implement registry service, migrations for `packs_index`, `parity_matrix`, provenance docs; support pack upload/list/get, signature verification, RBAC enforcement, and provenance manifest storage. | Needs ORSC0104 event feeds | | -| PACKS-REG-42-001 | TODO | | SPRINT_154_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Add version lifecycle (promote/deprecate), tenant allowlists, provenance export, signature rotation, audit logs, and Offline Kit seed support. Dependencies: PACKS-REG-41-001. | Depends on 41-001 | | -| PACKS-REG-43-001 | TODO | | SPRINT_154_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Implement registry mirroring, pack signing policies, attestation integration, and compliance dashboards; integrate with Export Center. Dependencies: PACKS-REG-42-001. | Needs 42-001 | | +| PACKS-REG-41-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0154_0001_0001_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Implement registry service, migrations for `packs_index`, `parity_matrix`, provenance docs; support pack upload/list/get, signature verification, RBAC enforcement, and provenance manifest storage. | Needs ORSC0104 event feeds | | +| PACKS-REG-42-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0154_0001_0001_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Add version lifecycle (promote/deprecate), tenant allowlists, provenance export, signature rotation, audit logs, and Offline Kit seed support. Dependencies: PACKS-REG-41-001. | Depends on 41-001 | | +| PACKS-REG-43-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0154_0001_0001_packsregistry | Packs Registry Guild | src/PacksRegistry/StellaOps.PacksRegistry | Implement registry mirroring, pack signing policies, attestation integration, and compliance dashboards; integrate with Export Center. Dependencies: PACKS-REG-42-001. | Needs 42-001 | | | PARITY-41-001 | TODO | | SPRINT_203_cli_iii | DevEx/CLI Guild | src/Cli/StellaOps.Cli | Propagate `traceparent`/correlation IDs across CLI commands and verbose output. | Needs NOWB0101 gateway trace headers | | | PARITY-41-002 | TODO | | SPRINT_203_cli_iii | DevEx/CLI Guild | src/Cli/StellaOps.Cli | Add parity tests + docs ensuring CLI error output matches web/notify formats. | Depends on 41-001 | | | PLATFORM-DOCS-0001 | TODO | | SPRINT_324_docs_modules_platform | Docs Guild | docs/modules/platform | See ./AGENTS.md | Needs updated wave list | | @@ -4145,22 +4145,22 @@ | SYMS-CLIENT-401-012 | TODO | | SPRINT_0401_0001_0001_reachability_evidence_chain | Symbols Guild · Scanner Guild | `src/Symbols/StellaOps.Symbols.Client`, `src/Scanner/StellaOps.Scanner.Symbolizer` | Ship `StellaOps.Symbols.Client` SDK (resolve/upload APIs, platform key derivation for ELF/PDB/Mach-O/JVM/Node, disk LRU cache) and integrate with Scanner.Symbolizer/runtime probes (ref. `docs/specs/SYMBOL_MANIFEST_v1.md`). | Depends on #3 | RBSY0101 | | SYMS-INGEST-401-013 | TODO | | SPRINT_0401_0001_0001_reachability_evidence_chain | Symbols Guild · DevOps Guild | `src/Symbols/StellaOps.Symbols.Ingestor.Cli`, `docs/specs/SYMBOL_MANIFEST_v1.md` | Build `symbols ingest` CLI to emit DSSE-signed `SymbolManifest v1`, upload blobs, and register Rekor entries; document GitLab/Gitea pipeline usage. | Needs manifest updates from #1 | RBSY0101 | | SYMS-SERVER-401-011 | TODO | | SPRINT_0401_0001_0001_reachability_evidence_chain | Symbols Guild | `src/Symbols/StellaOps.Symbols.Server` | Deliver `StellaOps.Symbols.Server` (REST+gRPC) with DSSE-verified uploads, Mongo/MinIO storage, tenant isolation, and deterministic debugId indexing; publish health/manifest APIs (spec: `docs/specs/SYMBOL_MANIFEST_v1.md`). | Depends on #5 | RBSY0101 | -| TASKRUN-41-001 | BLOCKED | 2025-11-25 | SPRINT_0157_0001_0002_taskrunner_blockers | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Bootstrap service, define migrations for `pack_runs`, `pack_run_logs`, `pack_artifacts`, implement run API (create/get/log stream), local executor, approvals pause, artifact capture, and provenance manifest generation. | Missing TaskRunner architecture/API contracts; needs Sprint 120/130/140 inputs | ORTR0101 | -| TASKRUN-AIRGAP-56-001 | TODO | | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · AirGap Policy Guild | src/TaskRunner/StellaOps.TaskRunner | Enforce plan-time validation rejecting steps with non-allowlisted network calls in sealed mode and surface remediation errors. | TASKRUN-41-001 | ORTR0101 | -| TASKRUN-AIRGAP-56-002 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · AirGap Importer Guild | src/TaskRunner/StellaOps.TaskRunner | Add helper steps for bundle ingestion (checksum verification, staging to object store) with deterministic outputs. Dependencies: TASKRUN-AIRGAP-56-001. | TASKRUN-AIRGAP-56-001 | ORTR0101 | -| TASKRUN-AIRGAP-57-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · AirGap Controller Guild | src/TaskRunner/StellaOps.TaskRunner | Refuse to execute plans when environment sealed=false but declared sealed install; emit advisory timeline events. Dependencies: TASKRUN-AIRGAP-56-002. | TASKRUN-AIRGAP-56-002 | ORTR0101 | -| TASKRUN-AIRGAP-58-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · Evidence Locker Guild | src/TaskRunner/StellaOps.TaskRunner | Capture bundle import job transcripts, hashed inputs, and outputs into portable evidence bundles. Dependencies: TASKRUN-AIRGAP-57-001. | TASKRUN-AIRGAP-57-001 | ORTR0101 | -| TASKRUN-OAS-61-001 | TODO | | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · API Contracts Guild | src/TaskRunner/StellaOps.TaskRunner | Document Task Runner APIs (pack runs, logs, approvals) in service OAS, including streaming response schemas and examples. | TASKRUN-41-001 | ORTR0101 | -| TASKRUN-OAS-61-002 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Expose `GET /.well-known/openapi` returning signed spec metadata, build version, and ETag. Dependencies: TASKRUN-OAS-61-001. | TASKRUN-OAS-61-001 | ORTR0101 | -| TASKRUN-OAS-62-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · SDK Generator Guild | src/TaskRunner/StellaOps.TaskRunner | Provide SDK examples for pack run lifecycle; ensure SDKs offer streaming log helpers and paginator wrappers. Dependencies: TASKRUN-OAS-61-002. | TASKRUN-OAS-61-002 | ORTR0102 | -| TASKRUN-OAS-63-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · API Governance Guild | src/TaskRunner/StellaOps.TaskRunner | Implement deprecation header support and Sunset handling for legacy pack APIs; emit notifications metadata. Dependencies: TASKRUN-OAS-62-001. | TASKRUN-OAS-62-001 | ORTR0102 | +| TASKRUN-41-001 | TODO | 2025-11-30 | SPRINT_0157_0001_0002_taskrunner_blockers | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Bootstrap service, define migrations for `pack_runs`, `pack_run_logs`, `pack_artifacts`, implement run API (create/get/log stream), local executor, approvals pause, artifact capture, and provenance manifest generation. | Contract available via `docs/product-advisories/29-Nov-2025 - Task Pack Orchestration and Automation.md` and `docs/modules/taskrunner/architecture.md`. | ORTR0101 | +| TASKRUN-AIRGAP-56-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · AirGap Policy Guild | src/TaskRunner/StellaOps.TaskRunner | Enforce plan-time validation rejecting steps with non-allowlisted network calls in sealed mode and surface remediation errors. | TASKRUN-41-001 | ORTR0101 | +| TASKRUN-AIRGAP-56-002 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · AirGap Importer Guild | src/TaskRunner/StellaOps.TaskRunner | Add helper steps for bundle ingestion (checksum verification, staging to object store) with deterministic outputs. | TASKRUN-AIRGAP-56-001 | ORTR0101 | +| TASKRUN-AIRGAP-57-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · AirGap Controller Guild | src/TaskRunner/StellaOps.TaskRunner | Refuse to execute plans when environment sealed=false but declared sealed install; emit advisory timeline events. | TASKRUN-AIRGAP-56-002 | ORTR0101 | +| TASKRUN-AIRGAP-58-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · Evidence Locker Guild | src/TaskRunner/StellaOps.TaskRunner | Capture bundle import job transcripts, hashed inputs, and outputs into portable evidence bundles. | TASKRUN-AIRGAP-57-001 | ORTR0101 | +| TASKRUN-OAS-61-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · API Contracts Guild | src/TaskRunner/StellaOps.TaskRunner | Document Task Runner APIs (pack runs, logs, approvals) in service OAS, including streaming response schemas and examples. | TASKRUN-41-001 | ORTR0101 | +| TASKRUN-OAS-61-002 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Expose `GET /.well-known/openapi` returning signed spec metadata, build version, and ETag. | TASKRUN-OAS-61-001 | ORTR0101 | +| TASKRUN-OAS-62-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · SDK Generator Guild | src/TaskRunner/StellaOps.TaskRunner | Provide SDK examples for pack run lifecycle; ensure SDKs offer streaming log helpers and paginator wrappers. | TASKRUN-OAS-61-002 | ORTR0102 | +| TASKRUN-OAS-63-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · API Governance Guild | src/TaskRunner/StellaOps.TaskRunner | Implement deprecation header support and Sunset handling for legacy pack APIs; emit notifications metadata. | TASKRUN-OAS-62-001 | ORTR0102 | | TASKRUN-OBS-50-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Adopt telemetry core in Task Runner host + worker executors, ensuring step execution spans/logs include `trace_id`, `tenant_id`, `run_id`, and scrubbed command transcripts. | ORTR0101 telemetry hooks | ORTR0102 | | TASKRUN-OBS-51-001 | DONE (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · DevOps Guild | src/TaskRunner/StellaOps.TaskRunner | Emit metrics for step latency, retries, queue depth, sandbox resource usage; define SLOs for pack run completion and failure rate; surface burn-rate alerts to collector/Notifier. Dependencies: TASKRUN-OBS-50-001. | TASKRUN-OBS-50-001 | ORTR0102 | -| TASKRUN-OBS-52-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Produce timeline events for pack runs (`pack.started`, `pack.step.completed`, `pack.failed`) containing evidence pointers and policy gate context. Provide dedupe + retry logic. Dependencies: TASKRUN-OBS-51-001. | TASKRUN-OBS-50-001 | ORTR0102 | -| TASKRUN-OBS-53-001 | TODO | | SPRINT_157_taskrunner_i | Task Runner Guild · Evidence Locker Guild | src/TaskRunner/StellaOps.TaskRunner | Capture step transcripts, artifact manifests, environment digests, and policy approvals into evidence locker snapshots; ensure redaction + hash chain coverage. Dependencies: TASKRUN-OBS-52-001. | TASKRUN-OBS-52-001 | ORTR0102 | +| TASKRUN-OBS-52-001 | BLOCKED (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Produce timeline events for pack runs (`pack.started`, `pack.step.completed`, `pack.failed`) containing evidence pointers and policy gate context. Provide dedupe + retry logic. Blocked: timeline event schema and evidence-pointer contract not published. | TASKRUN-OBS-51-001 | ORTR0102 | +| TASKRUN-OBS-53-001 | BLOCKED (2025-11-25) | 2025-11-25 | SPRINT_0157_0001_0001_taskrunner_i | Task Runner Guild · Evidence Locker Guild | src/TaskRunner/StellaOps.TaskRunner | Capture step transcripts, artifact manifests, environment digests, and policy approvals into evidence locker snapshots; ensure redaction + hash chain coverage. Blocked: waiting on timeline schema/evidence-pointer contract (OBS-52-001). | TASKRUN-OBS-52-001 | ORTR0102 | | TASKRUN-OBS-54-001 | TODO | | SPRINT_158_taskrunner_ii | Task Runner Guild · Provenance Guild | src/TaskRunner/StellaOps.TaskRunner | Generate DSSE attestations for pack runs (subjects = produced artifacts) and expose verification API/CLI integration. Store references in timeline events. Dependencies: TASKRUN-OBS-53-001. | TASKRUN-OBS-53-001 | ORTR0102 | | TASKRUN-OBS-55-001 | TODO | | SPRINT_158_taskrunner_ii | Task Runner Guild · DevOps Guild | src/TaskRunner/StellaOps.TaskRunner | Implement incident mode escalations (extra telemetry, debug artifact capture, retention bump) and align on automatic activation via SLO breach webhooks. Dependencies: TASKRUN-OBS-54-001. | TASKRUN-OBS-54-001 | ORTR0102 | -| TASKRUN-TEN-48-001 | TODO | | SPRINT_158_taskrunner_ii | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Require tenant/project context for every pack run, set DB/object-store prefixes, block egress when tenant restricted, and propagate context to steps/logs. | TASKRUN-AIRGAP-58-001 | ORTR0101 | +| TASKRUN-TEN-48-001 | BLOCKED (2025-11-30) | 2025-11-30 | SPRINT_0158_0001_0002_taskrunner_ii | Task Runner Guild | src/TaskRunner/StellaOps.TaskRunner | Require tenant/project context for every pack run, set DB/object-store prefixes, block egress when tenant restricted, and propagate context to steps/logs. | TASKRUN-OBS-53-001; Tenancy policy contract | ORTR0101 | | TELEMETRY-DOCS-0001 | TODO | | SPRINT_330_docs_modules_telemetry | Docs Guild | docs/modules/telemetry | Validate that telemetry module docs reflect the new storage stack and isolation rules. | Ops checklist from DVDO0103 | DOTL0101 | | TELEMETRY-DOCS-0001 | TODO | | SPRINT_330_docs_modules_telemetry | Docs Guild | docs/modules/telemetry | Validate that telemetry module docs reflect the new storage stack and isolation rules. | Ops checklist from DVDO0103 | DOTL0101 | | TELEMETRY-ENG-0001 | TODO | | SPRINT_330_docs_modules_telemetry | Module Team | docs/modules/telemetry | Ensure milestones stay in sync with telemetry sprints in `docs/implplan`. | TLTY0101 API review | DOTL0101 | diff --git a/docs/key-features.md b/docs/key-features.md index 19d0c0fa1..5dc3349ed 100644 --- a/docs/key-features.md +++ b/docs/key-features.md @@ -37,6 +37,11 @@ Each card is a fast read pairing the headline capability with the evidence that - **Evidence:** `docs/market/competitive-landscape.md` distils a 15-vendor comparison; `03_VISION.md` lists moats; `docs/reachability/lead.md` details the reachability proof moat. - **Why it matters:** Clear differentiation guides roadmap and sales; keeps us focused on replayable, sovereign, and explainable security. +## 8. Deterministic Task Packs (2025-11) +- **What it is:** TaskRunner executes declarative Task Packs with plan-hash binding, approvals, sealed-mode enforcement, and DSSE evidence bundles. +- **Evidence:** Product advisory `docs/product-advisories/29-Nov-2025 - Task Pack Orchestration and Automation.md`; architecture contract in `docs/modules/taskrunner/architecture.md`; runbook/spec in `docs/task-packs/*.md`. +- **Why it matters:** Security teams get auditable, air-gap-friendly automation with human approvals and provable provenance, reusing the same workflows online or offline. + ### Explore Further - Walk the first deployment in [quickstart.md](quickstart.md). - Dive into architectural flows in [high-level-architecture.md](high-level-architecture.md). diff --git a/docs/modules/attestor/README.md b/docs/modules/attestor/README.md index 1f99d5b44..953b31050 100644 --- a/docs/modules/attestor/README.md +++ b/docs/modules/attestor/README.md @@ -2,8 +2,10 @@ Attestor converts signed DSSE evidence from the Signer into transparency-log proofs and verifiable reports for every downstream surface (Policy Engine, Export Center, CLI, Console, Scheduler). It is the trust backbone that proves SBOM, scan, VEX, and policy artefacts were signed, witnessed, and preserved without tampering. -## Latest updates (2025-10-19) -- Platform Events refresh published canonical `attestor.logged@1` samples under `docs/events/samples/` and validated schemas (`docs/updates/2025-10-18-docs-guild.md`, `docs/updates/2025-10-19-docs-guild.md`). Consumers should align verification workflows and tests with those sample envelopes. +## Latest updates (2025-11-30) +- Sprint tracker `docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md` and module `TASKS.md` added to mirror status. +- Observability runbook stub + dashboard placeholder added under `operations/` (offline import) pending next demo outputs. +- Platform Events samples (2025-10-18/19) remain the current canonical `attestor.logged@1`; keep verification workflows aligned. ## Why it exists - **Evidence first:** organisations need portable, verifiable attestations that prove build provenance, SBOM availability, policy verdicts, and VEX statements. @@ -44,10 +46,11 @@ All predicates capture subjects, issuer metadata, policy context, materials, opt - Export Center packages attestation bundles (`stella export attestation-bundle`) for Offline Kit delivery. - Transparency logs can be mirrored; offline mode records gaps and provides compensating controls. -## Observability & performance -- Metrics: `attestor_submission_total`, `attestor_verify_seconds`, `attestor_cache_hit_ratio`, `attestor_rekor_latency_seconds`. -- Logs capture tenant, issuer, subject digests, Rekor UUID, proof status, and policy verdict. -- Performance target: ≥1 000 envelopes/minute per worker with cached verification, batched operations, and concurrency controls. +## Observability & performance +- Metrics: `attestor_submission_total`, `attestor_verify_seconds`, `attestor_cache_hit_ratio`, `attestor_rekor_latency_seconds`. +- Logs capture tenant, issuer, subject digests, Rekor UUID, proof status, and policy verdict. +- Performance target: ≥1 000 envelopes/minute per worker with cached verification, batched operations, and concurrency controls. +- Observability assets: `operations/observability.md` and `operations/dashboards/attestor-observability.json` (offline import). ## Key integrations - Signer (DSSE source), Authority (scopes & tenancy), Export Center (attestation bundles), Policy Engine (verification policies), Scanner/Excititor (subject evidence), Notify (key rotation & verification alerts), Observability stack (dashboards/alerts). diff --git a/docs/modules/attestor/TASKS.md b/docs/modules/attestor/TASKS.md new file mode 100644 index 000000000..d0d0b90f6 --- /dev/null +++ b/docs/modules/attestor/TASKS.md @@ -0,0 +1,9 @@ +# Attestor · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| ATTESTOR-DOCS-0001 | DONE (2025-11-05) | Docs Guild | README aligned with latest release notes and attestation samples. | +| ATTESTOR-OPS-0001 | BLOCKED (2025-11-30) | Ops Guild | Await next demo outputs; observability runbook stub added. | +| ATTESTOR-ENG-0001 | DONE (2025-11-27) | Module Team | Implementation plan readiness tracker added. | + +> Keep this table in lockstep with `docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md` (TODO/DOING/DONE/BLOCKED updates go to both files). diff --git a/docs/modules/attestor/operations/dashboards/attestor-observability.json b/docs/modules/attestor/operations/dashboards/attestor-observability.json new file mode 100644 index 000000000..9bf39b92a --- /dev/null +++ b/docs/modules/attestor/operations/dashboards/attestor-observability.json @@ -0,0 +1,6 @@ +{ + "_note": "Placeholder Grafana dashboard stub for Attestor. Replace panels when metrics endpoints are available; keep offline-import friendly.", + "schemaVersion": 39, + "title": "Attestor Observability (stub)", + "panels": [] +} diff --git a/docs/modules/attestor/operations/observability.md b/docs/modules/attestor/operations/observability.md new file mode 100644 index 000000000..b906c2ede --- /dev/null +++ b/docs/modules/attestor/operations/observability.md @@ -0,0 +1,39 @@ +# Attestor observability runbook (stub · 2025-11-29 demo) + +## Dashboards (offline import) +- Grafana JSON: `docs/modules/attestor/operations/dashboards/attestor-observability.json` (import locally; no external data sources assumed). +- Planned panels: signing latency p95/p99, verification failure rate, transparency log submission lag, key rotation age, DSSE verification errors, queue depth/backlog, and attestation bundle size histogram. + +## Key metrics +- `attestor_sign_latency_seconds_bucket` — signing latency per request. +- `attestor_verify_failures_total{reason}` — verification failures. +- `attestor_tlog_submission_latency_seconds_bucket` — transparency log latency. +- `attestor_key_rotation_age_seconds` — time since last rotation. +- `attestor_queue_backlog` — pending attestation jobs. +- `attestor_bundle_size_bytes_bucket` — bundle size distribution. + +## Logs & traces +- Correlate by `correlationId`, `attestationId`, `tenant`, `keyId`. Include `policyVersion`, `tlogIndex`, and `rekorUrl` where applicable. +- Traces disabled by default for air-gap; enable by pointing OTLP exporter to on-prem collector. + +## Health/diagnostics +- `/health/liveness` and `/health/readiness` check signer availability, tlog reachability, and storage. +- `/status` exposes build version, commit, feature flags; verify against offline bundle manifest. +- Verification probe: `POST /api/attestations/verify` with sample bundle once demo outputs land. + +## Alert hints +- Signing latency p99 > 1s. +- Verification failure spikes. +- Transparency log submission lag > 10s. +- Key rotation age exceeding policy threshold. +- Queue backlog above configured threshold. + +## Offline verification steps +1) Import Grafana JSON locally; point to Prometheus scrape labeled `attestor`. +2) Run sample verification once demo bundle available and confirm metrics/logs emit locally. +3) Fetch `/status` and compare commit/version to offline bundle manifest. + +## Evidence locations +- Sprint tracker: `docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md`. +- Module docs: `README.md`, `architecture.md`, `implementation_plan.md`. +- Dashboard stub: `operations/dashboards/attestor-observability.json`. diff --git a/docs/modules/authority/AGENTS.md b/docs/modules/authority/AGENTS.md index 0b609001b..fadd29cf4 100644 --- a/docs/modules/authority/AGENTS.md +++ b/docs/modules/authority/AGENTS.md @@ -8,6 +8,7 @@ Authority is the platform OIDC/OAuth2 control plane that mints short-lived, send - [Architecture](./architecture.md) - [Implementation plan](./implementation_plan.md) - [Task board](./TASKS.md) +- [Observability runbook](./operations/monitoring.md) and Grafana JSON (offline import) ## How to get started 1. Open sprint file `/docs/implplan/SPRINT_*.md` and locate the stories referencing this module. diff --git a/docs/modules/authority/README.md b/docs/modules/authority/README.md index fdce7a65f..86a7beb06 100644 --- a/docs/modules/authority/README.md +++ b/docs/modules/authority/README.md @@ -2,7 +2,12 @@ Authority is the platform OIDC/OAuth2 control plane that mints short-lived, sender-constrained operational tokens (OpToks) for every StellaOps service and tool. -## Responsibilities +## Latest updates (2025-11-30) +- Sprint tracker `docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md` and module `TASKS.md` added to mirror status. +- Monitoring/observability references consolidated; Grafana JSON remains offline import (`operations/grafana-dashboard.json`). +- Prior content retained: OpTok/DPoP/mTLS responsibilities, backup/restore, key rotation. + +## Responsibilities - Expose device-code, auth-code, and client-credential flows with DPoP or mTLS binding. - Manage signing keys, JWKS rotation, and PoE integration for plan enforcement. - Emit structured audit events and enforce tenant-aware scope policies. @@ -18,16 +23,17 @@ Authority is the platform OIDC/OAuth2 control plane that mints short-lived, send - CLI/UI for login flows and token management. - Scheduler/Scanner for machine-to-machine scope enforcement. -## Operational notes -- MongoDB for tenant, client, and token state. -- Key material in KMS/HSM with rotation runbooks (see ./operations/key-rotation.md). -- Grafana/Prometheus dashboards for auth latency/issuance. +## Operational notes +- MongoDB for tenant, client, and token state. +- Key material in KMS/HSM with rotation runbooks (`operations/key-rotation.md`). +- Monitoring runbook (`operations/monitoring.md`) and offline-import Grafana JSON (`operations/grafana-dashboard.json`). -## Related resources -- ./operations/backup-restore.md -- ./operations/key-rotation.md -- ./operations/monitoring.md -- ./operations/grafana-dashboard.json +## Related resources +- ./operations/backup-restore.md +- ./operations/key-rotation.md +- ./operations/monitoring.md +- ./operations/grafana-dashboard.json +- Sprint/status mirrors: `docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md`, `docs/modules/authority/TASKS.md` ## Backlog references - DOCS-SEC-62-001 (scope hardening doc) in ../../TASKS.md. diff --git a/docs/modules/authority/TASKS.md b/docs/modules/authority/TASKS.md new file mode 100644 index 000000000..3de5fa990 --- /dev/null +++ b/docs/modules/authority/TASKS.md @@ -0,0 +1,9 @@ +# Authority · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| AUTHORITY-DOCS-0001 | DONE (2025-11-30) | Docs Guild | README/architecture refreshed; sprint + monitoring links added. | +| AUTHORITY-ENG-0001 | DONE (2025-11-27) | Module Team | Readiness tracker in implementation_plan mapped to epics/sprints. | +| AUTHORITY-OPS-0001 | DONE (2025-11-30) | Ops Guild | TASKS board created; monitoring/grafana references aligned; offline-friendly. | + +> Keep this table in lockstep with `docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md` (TODO/DOING/DONE/BLOCKED updates go to both files). diff --git a/docs/modules/authority/implementation_plan.md b/docs/modules/authority/implementation_plan.md index 5b1e0c2e9..f23ef8c37 100644 --- a/docs/modules/authority/implementation_plan.md +++ b/docs/modules/authority/implementation_plan.md @@ -16,16 +16,21 @@ - **Epic 14 – Identity & Tenancy:** implement tenant isolation, RBAC hierarchies, audit trails, and PoE integration. - Track additional work (DOCS-SEC-62-001, AUTH-POLICY-20-001/002) in ../../TASKS.md and src/Authority/**/TASKS.md. -## Coordination -- Review ./AGENTS.md before picking up new work. -- Sync with cross-cutting teams noted in `/docs/implplan/SPRINT_*.md`. -- Update this plan whenever scope, dependencies, or guardrails change. - ---- - -## Sprint readiness tracker - -> Last updated: 2025-11-27 (AUTHORITY-ENG-0001) +## Coordination +- Review ./AGENTS.md before picking up new work. +- Sync with cross-cutting teams noted in `/docs/implplan/SPRINT_*.md`. +- Update this plan whenever scope, dependencies, or guardrails change. + +## Sprint alignment (2025-11-30) +- Docs refresh tracked in `docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md`; statuses mirrored in `docs/modules/authority/TASKS.md`. +- Observability assets remain in `operations/monitoring.md` with Grafana JSON `operations/grafana-dashboard.json` (offline import). +- Authority readiness tracker (AUTHORITY-ENG-0001) delivered 2025-11-27; future updates should adjust both sprint and TASKS. + +--- + +## Sprint readiness tracker + +> Last updated: 2025-11-27 (AUTHORITY-ENG-0001) This section maps epic milestones to implementation sprints and tracks readiness checkpoints. diff --git a/docs/modules/excititor/README.md b/docs/modules/excititor/README.md index b250e5aeb..4c78d0ef9 100644 --- a/docs/modules/excititor/README.md +++ b/docs/modules/excititor/README.md @@ -2,7 +2,10 @@ Excititor converts heterogeneous VEX feeds into raw observations and linksets that honour the Aggregation-Only Contract. -## Latest updates (2025-11-05) +## Latest updates (2025-11-30) +- Sprint tracker `docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md` and module `TASKS.md` added to mirror status. +- Observability/runbook assets remain in `operations/observability.md` and `observability/` (timeline, locker manifests); dashboards stay offline-import friendly. +- Prior updates (2025-11-05): Link-Not-Merge readiness and consensus beta note (`../../updates/2025-11-05-excitor-consensus-beta.md`), observability guide additions, DSSE packaging guidance, and Policy/CLI follow-ups tracked in SPRINT_200. - Link-Not-Merge readiness: release note [Excitor consensus beta](../../updates/2025-11-05-excitor-consensus-beta.md) captures how Excititor feeds power the Excititor consensus beta (sample payload in [consensus JSON](../../vex/consensus-json.md)). - Added [observability guide](operations/observability.md) describing the evidence metrics emitted by `EXCITITOR-AIAI-31-003` (request counters, statement histogram, signature status, guard violations) so Ops/Lens can alert on misuse. - README now points policy/UI teams to the upcoming consensus integration work. diff --git a/docs/modules/excititor/TASKS.md b/docs/modules/excititor/TASKS.md new file mode 100644 index 000000000..54f4aaf46 --- /dev/null +++ b/docs/modules/excititor/TASKS.md @@ -0,0 +1,12 @@ +# Excititor · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| EXCITOR-DOCS-0001 | DONE (2025-11-07) | Docs Guild | README aligned to consensus beta release notes. | +| EXCITOR-OPS-0001 | DONE (2025-11-07) | Ops Guild | Runbooks/observability checklist added (`mirrors.md`). | +| EXCITOR-ENG-0001 | DONE (2025-11-07) | Module Team | Implementation plan alignment with SPRINT_200 updates. | +| EXCITITOR-DOCS-0001 | BLOCKED (2025-11-19) | Docs Guild | Await chunk API CI validation + OpenAPI freeze before finalizing docs. | +| EXCITITOR-ENG-0001 | TODO | Module Team | Update engineering notes once chunk API/OpenAPI unblock. | +| EXCITITOR-OPS-0001 | TODO | Ops Guild | Sync observability/runbook updates after OpenAPI freeze. | + +> Keep this table in lockstep with `docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md` (TODO/DOING/DONE/BLOCKED updates go to both files). diff --git a/docs/modules/excititor/implementation_plan.md b/docs/modules/excititor/implementation_plan.md index 01cc3e414..a57296692 100644 --- a/docs/modules/excititor/implementation_plan.md +++ b/docs/modules/excititor/implementation_plan.md @@ -20,12 +20,15 @@ - Sync with cross-cutting teams noted in `/docs/implplan/SPRINT_*.md`. - Update this plan whenever scope, dependencies, or guardrails change. -## Sprint alignment (2025-11-07) +## Sprint alignment (2025-11-30) -| Sprint task | State (SPRINT_200) | Notes | +| Sprint task | State (SPRINT_0333_0001_0001_docs_modules_excititor) | Notes | | --- | --- | --- | -| EXCITITOR-DOCS-0001 | DONE | README release alignment + consensus beta references refreshed (DSSE/export guidance). | -| EXCITITOR-ENG-0001 | DONE | Implementation plan now mirrors `SPRINT_200_documentation_process.md` through this table. | -| EXCITITOR-OPS-0001 | DONE | Runbook/observability checklist added to `docs/modules/excititor/mirrors.md`. | +| EXCITOR-DOCS-0001 | DONE | README release alignment + consensus beta references refreshed (DSSE/export guidance). | +| EXCITOR-ENG-0001 | DONE | Implementation plan mirrored to sprint status; TASKS board created. | +| EXCITOR-OPS-0001 | DONE | Runbook/observability checklist added to `mirrors.md`. | +| EXCITITOR-DOCS-0001 | BLOCKED | Waiting on chunk API CI validation + OpenAPI freeze. | +| EXCITITOR-ENG-0001 | TODO | Will update engineering notes once DOCS unblock. | +| EXCITITOR-OPS-0001 | TODO | Sync observability/runbook updates after OpenAPI freeze. | -See `/docs/implplan/SPRINT_200_documentation_process.md` for the canonical status table. +See `/docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md` for the canonical status table. diff --git a/docs/modules/export-center/operations/kms-envelope-pattern.md b/docs/modules/export-center/operations/kms-envelope-pattern.md new file mode 100644 index 000000000..6315b3b06 --- /dev/null +++ b/docs/modules/export-center/operations/kms-envelope-pattern.md @@ -0,0 +1,47 @@ +# Export Center KMS Envelope Pattern (age + AES-GCM) + +Status: Adopted for Sprint 0164-0001-0001 (ExportCenter III) + +Scope: Defines deterministic envelope handling for mirror bundle encryption (`EXPORT-SVC-37-002`) and general export signing. Applies to worker path and verification docs. + +## Key hierarchy +- **Content key (DEK):** 32-byte random generated per export run. Used for AES-256-GCM over encrypted payloads (`/data` subtree for mirror; optional for others). +- **Nonce:** 12-byte random per file; stored alongside ciphertext; derive Additional Authenticated Data (AAD) as `{runId}:{relativePath}` to bind file path and run. +- **Wrapping keys:** + - **age recipients** (preferred for offline): each tenant can list one or more age public keys. DEK is wrapped once per recipient using age X25519. Store `recipient`, `wrappedKey` (base64), and optional `keyId` in provenance. + - **KMS envelope** (Authority/HSM): DEK wrapped with tenant-scoped KMS key alias `export/envelope`. Store `kmsKeyId` (authority URI or external ARN) and `wrappedKey` (base64) plus KMS-provided `algorithm`. + +## Write path (worker) +1) Generate DEK (32 bytes) per run; zeroize after use. +2) For each encrypted file, derive AAD = `{runId}:{relativePath}`; encrypt with AES-256-GCM (nonce per file). Store `nonce` and `ciphertext`. +3) Wrap DEK for all configured recipients: + - age: `age --encrypt --recipient ` over DEK bytes → base64. + - KMS: `Encrypt`/`WrapKey` with `KeyId=export/envelope` and `EncryptionContext={runId,tenant}` → base64. +4) Record wrapping metadata in `provenance.json` under `environment.encryption.recipients[]` preserving deterministic order (age recipients lexicographically by `recipient`, then KMS entries by `kmsKeyId`). +5) Include `encryption.mode` (`age` or `aes-gcm+kms`), `aadFormat`, and `nonceFormat` in provenance for verification tooling. + +## Read/verification path +1) Select a recipient entry that matches available keys (age private key or KMS key). +2) Unwrap DEK: + - age: `age --decrypt` → DEK bytes. + - KMS: `Decrypt`/`UnwrapKey` with same encryption context. +3) For each encrypted file, recompute AAD from `{runId}:{relativePath}`, decrypt with AES-256-GCM using stored `nonce`, verify tag. +4) Recompute SHA-256 of decrypted payload and compare with `export.json` entries. + +## Determinism & offline posture +- Recipient lists and wrapped keys are ordered deterministically to keep `provenance.json` hashes stable across retries. +- age path works fully offline; KMS path requires Authority/HSM availability but stores all metadata to allow later decryption once KMS is reachable. +- Use fixed casing and field names: `mode`, `recipients[] {type, recipient|kmsKeyId, wrappedKey, keyId?}` and `aadFormat`. + +## Testing notes +- Add regression cases that encrypt/decrypt fixtures with both age and KMS paths, asserting identical manifest/provenance hashes across reruns. +- Ensure decryption fails when AAD does not match expected `{runId}:{relativePath}` (prevents path swapping). +- Keep tests air-gap friendly: mock KMS wrapper with deterministic stub keys. + +## Rollout guidance +- Default to age recipients for Offline Kit deployments; enable KMS wrapping where Authority/HSM is reachable. +- Configuration knobs: + - `ExportCenter:Encryption:Mode` = `age` | `kms` + - `ExportCenter:Encryption:Recipients` = list of age public keys + - `ExportCenter:Encryption:KmsKeyId` = tenant-specific key alias (when using KMS) +- Documented verification commands should reference this pattern (update CLI/Console guides when payloads change). diff --git a/docs/modules/export-center/operations/runbook.md b/docs/modules/export-center/operations/runbook.md index e016469ce..4f2e40097 100644 --- a/docs/modules/export-center/operations/runbook.md +++ b/docs/modules/export-center/operations/runbook.md @@ -11,15 +11,16 @@ The Export Center packages StellaOps evidence and policy overlays into reproduci - Runbook execution for recovery, retention, and compliance. - Coordination with DevOps validation (cosign + `trivy module db import` smoke tests). -Related documentation: - -- `docs/modules/export-center/overview.md` -- `docs/modules/export-center/architecture.md` -- `docs/modules/export-center/profiles.md` -- `docs/modules/export-center/trivy-adapter.md` -- `docs/modules/export-center/mirror-bundles.md` -- `docs/modules/export-center/api.md` -- `docs/modules/export-center/cli.md` +Related documentation: + +- `docs/modules/export-center/overview.md` +- `docs/modules/export-center/architecture.md` +- `docs/modules/export-center/profiles.md` +- `docs/modules/export-center/trivy-adapter.md` +- `docs/modules/export-center/mirror-bundles.md` +- `docs/modules/export-center/api.md` +- `docs/modules/export-center/cli.md` +- `docs/modules/export-center/operations/kms-envelope-pattern.md` ## 2. Contacts & tooling diff --git a/docs/modules/notify/AGENTS.md b/docs/modules/notify/AGENTS.md index e2d81edd3..d72b1b61e 100644 --- a/docs/modules/notify/AGENTS.md +++ b/docs/modules/notify/AGENTS.md @@ -8,6 +8,7 @@ Notify evaluates operator-defined rules against platform events and dispatches c - [Architecture](./architecture.md) - [Implementation plan](./implementation_plan.md) - [Task board](./TASKS.md) +- [Observability runbook](./operations/observability.md) (offline import friendly) ## How to get started 1. Open sprint file `/docs/implplan/SPRINT_*.md` and locate the stories referencing this module. diff --git a/docs/modules/notify/README.md b/docs/modules/notify/README.md index 02b246eeb..e951af974 100644 --- a/docs/modules/notify/README.md +++ b/docs/modules/notify/README.md @@ -2,6 +2,11 @@ Notify (Notifications Studio) converts platform events into tenant-scoped alerts with deterministic delivery, offline parity, and a full audit trail. The service is split between the reusable tooling in `src/Notify/*` and the runtime host in `src/Notifier/*` (decision recorded 2025-11-02) so downstream systems can embed the rules engine without inheriting the Studio UI. +## Latest updates (2025-11-30) +- Sprint tracker `docs/implplan/SPRINT_322_docs_modules_notify.md` and module `TASKS.md` added to mirror status. +- Observability runbook stub and Grafana placeholder added under `operations/` (offline import); finalize after next demo. +- NOTIFY-DOCS-0002 remains blocked pending NOTIFY-SVC-39-001..004 outputs (correlation/digests/simulation/quiet hours). + ## Scope & responsibilities - Apply tenant-scoped rules to events from Scanner, Scheduler, VEX Lens, Attestor, Task Runner, and Zastava. - Render channel-specific payloads (Slack, Teams, Email, webhook) using deterministic templates with localisation safeguards. @@ -40,6 +45,7 @@ Status for these items is tracked in `src/Notifier/StellaOps.Notifier/TASKS.md` - Schema fixtures live in `./resources/schemas`; event and delivery samples live in `./resources/samples` for contract tests and UI mocks. - Offline Kit bundles ship plug-ins, default templates, and seed rules; update manifests under `ops/offline-kit/` when connectors change. - Dashboards and alert references depend on `DEVOPS-NOTIFY-39-002`; coordinate before renaming metrics or labels. +- Observability assets: `operations/observability.md` and `operations/dashboards/notify-observability.json` (offline import). - When releasing new rule or connector features, mirror guidance into `docs/notifications/*.md` and checklists in `docs/updates/2025-10-29-notify-docs.md` until the follow-ups are closed. ## Epic alignment diff --git a/docs/modules/notify/TASKS.md b/docs/modules/notify/TASKS.md new file mode 100644 index 000000000..98d16231e --- /dev/null +++ b/docs/modules/notify/TASKS.md @@ -0,0 +1,10 @@ +# Notify · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| NOTIFY-DOCS-0001 | DONE (2025-11-05) | Docs Guild | README refreshed for Notifications Studio pivot + release notes. | +| NOTIFY-OPS-0001 | BLOCKED (2025-11-30) | Ops Guild | Await next demo outputs; observability runbook stub added. | +| NOTIFY-ENG-0001 | DONE (2025-11-27) | Module Team | Implementation plan readiness tracker aligned with SPRINT_171..173. | +| NOTIFY-DOCS-0002 | BLOCKED (2025-11-30) | Docs Guild | Pending NOTIFY-SVC-39-001..004 correlation/digests/simulation/quiet hours evidence. | + +> Keep this table in lockstep with `docs/implplan/SPRINT_322_docs_modules_notify.md` (TODO/DOING/DONE/BLOCKED updates go to both files). diff --git a/docs/modules/notify/architecture.md b/docs/modules/notify/architecture.md index a8c46f263..5ca6f61ad 100644 --- a/docs/modules/notify/architecture.md +++ b/docs/modules/notify/architecture.md @@ -384,6 +384,7 @@ Authority signs ack tokens using keys configured under `notifications.ackTokens` * `notify.sent_total{channel}` / `notify.failed_total{channel,code}` * `notify.delivery_latency_seconds{channel}` (end‑to‑end) * **Tracing**: spans `ingest`, `match`, `render`, `send`; correlation id = `eventId`. +- Runbook + dashboard stub (offline import): `operations/observability.md`, `operations/dashboards/notify-observability.json` (to be populated after next demo). **SLO targets** diff --git a/docs/modules/notify/implementation_plan.md b/docs/modules/notify/implementation_plan.md index 846692550..e558d9d19 100644 --- a/docs/modules/notify/implementation_plan.md +++ b/docs/modules/notify/implementation_plan.md @@ -56,13 +56,18 @@ - **Security:** RBAC tests, tenant isolation, secret reference validation, DSSE signature verification. - **Offline:** export/import round-trips, Offline Kit deployment, manual delivery replay. -## Definition of done -- Notify service, workers, connectors, Console/CLI, observability, and Offline Kit assets shipped with documentation and runbooks. -- Compliance checklist appended to docs; ./TASKS.md and ../../TASKS.md updated with progress. - ---- - -## Sprint readiness tracker +## Definition of done +- Notify service, workers, connectors, Console/CLI, observability, and Offline Kit assets shipped with documentation and runbooks. +- Compliance checklist appended to docs; ./TASKS.md and ../../TASKS.md updated with progress. + +## Sprint alignment (2025-11-30) +- Docs sprint: `docs/implplan/SPRINT_322_docs_modules_notify.md`; statuses mirrored in `docs/modules/notify/TASKS.md`. +- Observability evidence stub: `operations/observability.md` and `operations/dashboards/notify-observability.json` (to be populated after next demo outputs). +- NOTIFY-DOCS-0002 remains blocked pending NOTIFY-SVC-39-001..004 (correlation/digests/simulation/quiet hours); keep sprint/TASKS synced when those land. + +--- + +## Sprint readiness tracker > Last updated: 2025-11-27 (NOTIFY-ENG-0001) diff --git a/docs/modules/notify/operations/dashboards/notify-observability.json b/docs/modules/notify/operations/dashboards/notify-observability.json new file mode 100644 index 000000000..69571921a --- /dev/null +++ b/docs/modules/notify/operations/dashboards/notify-observability.json @@ -0,0 +1,6 @@ +{ + "_note": "Placeholder Grafana dashboard stub for Notify. Replace panels when metrics endpoints are available; keep offline-import friendly.", + "schemaVersion": 39, + "title": "Notify Observability (stub)", + "panels": [] +} diff --git a/docs/modules/notify/operations/observability.md b/docs/modules/notify/operations/observability.md new file mode 100644 index 000000000..b0fc9d04d --- /dev/null +++ b/docs/modules/notify/operations/observability.md @@ -0,0 +1,38 @@ +# Notify observability runbook (stub · 2025-11-29 demo) + +## Dashboards (offline import) +- Grafana JSON: `docs/modules/notify/operations/dashboards/notify-observability.json` (import locally; no external data sources assumed). +- Planned panels: enqueue/dequeue rate, delivery latency p95/p99, channel error rate, retry/dead-letter counts, rule evaluation latency, tenant isolation breaches (should stay 0), and notification simulation outcomes. + +## Key metrics +- `notify_enqueue_total{channel}` — notifications enqueued by channel. +- `notify_delivery_latency_seconds_bucket{channel}` — delivery latency per channel. +- `notify_delivery_failures_total{channel,reason}` — failed deliveries. +- `notify_retry_total{channel}` and `notify_deadletter_total{channel}` — retries and dead letters. +- `notify_rule_eval_duration_seconds_bucket` — rule evaluation latency. +- `notify_simulation_total{result}` — simulation outcomes when quiet hours/correlation rules applied. + +## Logs & traces +- Correlate by `notificationId`, `ruleId`, `tenant`, `channel`. Include `quietHoursApplied`, `correlationKey`, `retries` fields. +- Traces disabled by default for air-gap; enable by pointing OTLP exporter to on-prem collector. + +## Health/diagnostics +- `/health/liveness` and `/health/readiness` check queue backend reachability and channel provider credentials. +- `/status` exposes build version, commit, feature flags; verify against offline bundle manifest. +- Simulation probe: `/api/notify/simulate` with sample rule set to validate correlation/digest wiring once NOTIFY-SVC-39-001..004 land. + +## Alert hints +- Delivery latency p99 > 1.5s for email/webhook channels. +- Dead-letter queue growth > threshold. +- Rule evaluation latency p99 > 500ms. +- Correlation/quiet-hours simulation failures once enabled. + +## Offline verification steps +1) Import Grafana JSON locally; point to Prometheus scrape labeled `notify`. +2) Run `stella notify simulate --rules samples/rules.yaml --dry-run` (once available) and ensure metrics/logs emit locally. +3) Fetch `/status` and compare commit/version to offline bundle manifest. + +## Evidence locations +- Sprint tracker: `docs/implplan/SPRINT_322_docs_modules_notify.md`. +- Module docs: `README.md`, `architecture.md`, `implementation_plan.md`. +- Dashboard stub: `operations/dashboards/notify-observability.json`. diff --git a/docs/modules/orchestrator/README.md b/docs/modules/orchestrator/README.md index 98ccb261c..093c4de89 100644 --- a/docs/modules/orchestrator/README.md +++ b/docs/modules/orchestrator/README.md @@ -2,8 +2,10 @@ The Orchestrator schedules, observes, and recovers ingestion and analysis jobs across the StellaOps platform. -## Latest updates (2025-11-18) -- Job leasing now flows through the Task Runner bridge: allocations carry idempotency keys, lease durations, and retry hints; workers acknowledge via claim/ack and emit heartbeats. +## Latest updates (2025-11-30) +- OpenAPI discovery published at `/.well-known/openapi` with `openapi/orchestrator.json`; includes pagination/idempotency/error-envelope examples and version headers. +- Legacy job detail/summary endpoints now emit `Deprecation` + `Link` headers pointing to the stable replacements. +- Job leasing flows through the Task Runner bridge: allocations carry idempotency keys, lease durations, and retry hints; workers acknowledge via claim/ack and emit heartbeats. - Event envelopes remain interim pending ORCH-SVC-37-101; include provenance (tenant/project, job type, correlationId, task runner id) in all notifier events. - Authority `orch:quota` / `orch:backfill` scopes require reason/ticket audit fields; include them in runbooks and dashboard overrides. diff --git a/docs/modules/orchestrator/TASKS.md b/docs/modules/orchestrator/TASKS.md index f2635e4de..63a65f3cf 100644 --- a/docs/modules/orchestrator/TASKS.md +++ b/docs/modules/orchestrator/TASKS.md @@ -5,5 +5,8 @@ | ORCH-DOCS-0001 | DONE | Docs Guild | README updated with leasing / task runner bridge notes and interim envelope guidance. | | ORCH-ENG-0001 | DONE | Module Team | Sprint references normalized; notes synced to doc sprint. | | ORCH-OPS-0001 | DONE | Ops Guild | Runbook impacts captured in README; follow-up to update ops docs. | +| ORCH-OAS-61-001 | DONE | Orchestrator Service Guild | OpenAPI spec drafted for orchestrator endpoints with pagination/idempotency/error envelopes (2025-11-30). | +| ORCH-OAS-61-002 | DONE | Orchestrator Service Guild | `/.well-known/openapi` discovery endpoint aligned to runtime build metadata (2025-11-30). | +| ORCH-OAS-63-001 | DONE | Orchestrator Service Guild | Legacy job endpoints emit `Deprecation` + `Link` headers; doc metadata updated (2025-11-30). | Status rules: mirror changes in `docs/implplan/SPRINT_0323_0001_0001_docs_modules_orchestrator.md`; use TODO → DOING → DONE/BLOCKED; add brief note if pausing. diff --git a/docs/modules/orchestrator/architecture.md b/docs/modules/orchestrator/architecture.md index 6b7299858..680285c4e 100644 --- a/docs/modules/orchestrator/architecture.md +++ b/docs/modules/orchestrator/architecture.md @@ -38,8 +38,9 @@ - `POST /api/limits/throttle` — apply throttle (requires elevated scope). - `GET /api/dashboard/metrics` — aggregated metrics for Console dashboards. - Event envelope draft (`docs/modules/orchestrator/event-envelope.md`) defines notifier/webhook/SSE payloads with idempotency keys, provenance, and task runner metadata for job/pack-run events. - -All responses include deterministic timestamps, job digests, and DSSE signature fields for offline reconciliation. +- OpenAPI discovery: `/.well-known/openapi` exposes `/openapi/orchestrator.json` (OAS 3.1) with pagination/idempotency/error-envelope examples; legacy job detail/summary endpoints now ship `Deprecation` + `Link` headers that point to their replacements. + +All responses include deterministic timestamps, job digests, and DSSE signature fields for offline reconciliation. ## 5) Observability diff --git a/docs/modules/taskrunner/architecture.md b/docs/modules/taskrunner/architecture.md new file mode 100644 index 000000000..506741dc5 --- /dev/null +++ b/docs/modules/taskrunner/architecture.md @@ -0,0 +1,87 @@ +# TaskRunner Architecture (v1) + +> Canonical contract for TaskRunner delivery scoped by SPRINT_0157_0001_0002 (TaskRunner Blockers) and SPRINT_0157_0001_0001 (TaskRunner I). Anchored in product advisory **"29-Nov-2025 - Task Pack Orchestration and Automation"** and the Task Pack runbook/spec (`docs/task-packs/*.md`). + +## 1. Purpose and Scope +- Execute Task Packs deterministically with approvals, sealed-mode enforcement, and evidence capture. +- Provide API/CLI surface for pack submission, status, logs, approvals, artifacts, and cancellation. +- Produce provenance: DSSE attestation + evidence bundle for every completed run. +- Operate offline/air-gapped with plan-hash binding and sealed-mode network allowlists. + +## 2. Components +- **WebService** (`StellaOps.TaskRunner.WebService`) - HTTP API, plan hash validation, SSE log streaming, approval endpoints. +- **Worker** (`StellaOps.TaskRunner.Worker`) - run orchestration, retries/backoff, artifact capture, attestation generation. +- **Core** (`StellaOps.TaskRunner.Core`) - execution graph builder, simulation engine, step state machine, policy/approval gate abstractions. +- **Infrastructure** (`StellaOps.TaskRunner.Infrastructure`) - storage adapters (Mongo, file), artifact/object store clients, evidence bundle writer. + +## 3. Execution Phases +1. **Plan** - parse manifest, validate schema, resolve inputs/secrets, build execution graph, compute canonical `planHash` (SHA-256 over normalised graph). +2. **Simulation (optional)** - dry-run graph; emit determinstic preview with approvals/policy gates highlighted. +3. **Execution** - verify runtime graph matches `planHash`; execute steps honoring `maxParallel`, `continueOnError`, `map`/`parallel` semantics; stream logs/events. +4. **Evidence** - capture artifacts + transcripts, emit DSSE attestation binding `planHash`, inputs/outputs, steps, and timestamps; expose artifact listings via API for post-run retrieval. + +## 4. API Surface (v1) +- `POST /api/runs` (`packs.run`) - submit pack run; requires manifest/version, inputs, tenant context; returns `runId` + `planHash`. +- `GET /api/runs/{runId}` (`packs.read`) - run status (graph, attempts, pending gates). +- `GET /api/runs/{runId}/logs` (`packs.read`) - SSE stream of ordered log events. +- `GET /api/runs/{runId}/artifacts` (`packs.read`) - list captured artifacts with digests/paths. +- `POST /api/runs/{runId}/approve` (`packs.approve`) - record approval gate decision (requires Authority token claims `pack_run_id`, `pack_gate_id`, `pack_plan_hash`). +- `POST /api/runs/{runId}/cancel` (`packs.run`) - cancel active run. +- TODO (Phase II): `GET /.well-known/openapi` (TASKRUN-OAS-61-002) after OAS publication. + +## 5. Data Model (Mongo, mirrors migration doc) +- **pack_runs**: `_id`, `planHash`, `plan`, `failurePolicy`, `requestedAt`, `createdAt`, `updatedAt`, `steps[]`, `tenantId`. +- **pack_run_logs**: `_id`, `runId`, `sequence` (monotonic), `timestamp` (UTC), `level`, `eventType`, `message`, `stepId?`, `metadata`. +- **pack_artifacts**: `_id`, `runId`, `name`, `type`, `sourcePath?`, `storedPath?`, `status`, `notes?`, `capturedAt`. +- Indexes as defined in `docs/modules/taskrunner/migrations/pack-run-collections.md`. + +## 6. Step Types and Semantics +- `run` - module invocation; declares `inputs`/`outputs`. +- `parallel` - executes nested `steps[]`; honors `maxParallel`. +- `map` - expands items into child steps (`stepId[index]::templateId`). +- `gate.approval` - human approval checkpoint; enforces TTL/required count; pauses run until satisfied or expired. +- `gate.policy` - Policy Engine evaluation; `failAction` decides halt vs. continue. + +## 7. Determinism, Air-Gap, and Security +- Plan hash binding: runtime graph must equal planned graph; mismatch aborts run. +- All timestamps UTC ISO-8601; ordered logs via `(runId, sequence)` unique index. +- Secrets never logged; evidence bundles store only redacted metadata. +- Sealed mode: reject non-allowlisted network calls; approvals can be processed offline via request/response bundles. +- RBAC scopes: `packs.read`, `packs.write`, `packs.run`, `packs.approve`. +- Approval enforcement: service rejects approval decisions when provided `planHash` does not match stored run state (protects against stale/forged tokens). + +## 8. Evidence & Attestation +- DSSE attestation payload (`payloadType`: `application/vnd.stellaops.pack-run+json`) includes `runId`, `packName/version`, `planHash`, input/output digests, step statuses, `completedAt`. +- Evidence bundle contents: signed manifest, inputs (redacted), outputs, transcripts, DSSE attestation; optional Rekor anchoring when online. + +## 9. Observability (Phase I delivered) +- Metrics: step latency, retries, queue depth, resource usage (`TASKRUN-OBS-50/51-001` DONE). +- Pending: timeline events (`TASKRUN-OBS-52-001`), evidence snapshots (`TASKRUN-OBS-53-001`), attestations (`TASKRUN-OBS-54-001`), incident mode (`TASKRUN-OBS-55-001`). + +## 10. Integration Points +- **Authority** - approval tokens, scope validation, sealed-vault secrets. +- **Policy Engine** - `gate.policy` decisions, policy context in evidence. +- **Export Center** - evidence bundles and manifests for offline/air-gapped export. +- **Orchestrator/CLI** - submission + resume flows; SSE log consumption. + +## 11. Configuration (Mongo example) +```json +\"TaskRunner\": { + \"Storage\": { + \"Mode\": \"mongo\", + \"Mongo\": { + \"ConnectionString\": \"mongodb://127.0.0.1:27017/taskrunner\", + \"Database\": \"taskrunner\", + \"RunsCollection\": \"pack_runs\", + \"LogsCollection\": \"pack_run_logs\", + \"ArtifactsCollection\": \"pack_artifacts\", + \"ApprovalsCollection\": \"pack_run_approvals\" + } + } +} +``` + +## 12. References +- Product advisory: `docs/product-advisories/29-Nov-2025 - Task Pack Orchestration and Automation.md`. +- Task Pack spec + authoring + runbook: `docs/task-packs/spec.md`, `docs/task-packs/authoring-guide.md`, `docs/task-packs/runbook.md`. +- Migration detail: `docs/modules/taskrunner/migrations/pack-run-collections.md`. diff --git a/docs/modules/telemetry/AGENTS.md b/docs/modules/telemetry/AGENTS.md index e881d2124..10b88e99b 100644 --- a/docs/modules/telemetry/AGENTS.md +++ b/docs/modules/telemetry/AGENTS.md @@ -15,6 +15,7 @@ Telemetry module captures deployment and operations guidance for the shared obse - [Architecture](./architecture.md) - [Implementation plan](./implementation_plan.md) - [Task board](./TASKS.md) +- [Observability runbook](./operations/observability.md) (offline import friendly) ## How to get started 1. Open sprint file `/docs/implplan/SPRINT_*.md` and locate the stories referencing this module. diff --git a/docs/modules/telemetry/README.md b/docs/modules/telemetry/README.md index 04adca8d3..b7e260160 100644 --- a/docs/modules/telemetry/README.md +++ b/docs/modules/telemetry/README.md @@ -2,7 +2,12 @@ Telemetry module captures deployment and operations guidance for the shared observability stack (collectors, storage, dashboards). -## Responsibilities +## Latest updates (2025-11-30) +- Sprint tracker `docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md` and module `TASKS.md` added to mirror status. +- Observability runbook stub + dashboard placeholder added under `operations/` (offline import). +- Storage/isolation posture references updated; align with platform docs. + +## Responsibilities - Deploy and operate OpenTelemetry collectors for StellaOps services. - Provide storage configuration for Prometheus/Tempo/Loki stacks. - Document smoke tests and offline bootstrapping steps. @@ -22,6 +27,7 @@ Telemetry module captures deployment and operations guidance for the shared obse - Smoke script references (../../ops/devops/telemetry). - Bundle packaging instructions in ops/devops/telemetry. - Sprint 23 console security sign-off (2025-10-27) added the `console-security.json` Grafana board and burn-rate alert pack—ensure environments import the updated dashboards/alerts referenced in `docs/updates/2025-10-27-console-security-signoff.md`. +- Observability assets for this sprint: `operations/observability.md` and `operations/dashboards/telemetry-observability.json` (offline import). ## Related resources - ./operations/collector.md diff --git a/docs/modules/telemetry/TASKS.md b/docs/modules/telemetry/TASKS.md new file mode 100644 index 000000000..014240dab --- /dev/null +++ b/docs/modules/telemetry/TASKS.md @@ -0,0 +1,9 @@ +# Telemetry · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| TELEMETRY-DOCS-0001 | DONE (2025-11-30) | Docs Guild | README/architecture refreshed for storage/isolation posture; sprint links added. | +| TELEMETRY-OPS-0001 | DONE (2025-11-30) | Ops Guild | Observability runbook stub + Grafana placeholder added under `operations/`. | +| TELEMETRY-ENG-0001 | DONE (2025-11-30) | Module Team | TASKS board created; statuses mirrored with `docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md`. | + +> Keep this table in lockstep with the sprint Delivery Tracker (TODO/DOING/DONE/BLOCKED updates go to both files). diff --git a/docs/modules/telemetry/implementation_plan.md b/docs/modules/telemetry/implementation_plan.md index 42b45b563..d37c56e60 100644 --- a/docs/modules/telemetry/implementation_plan.md +++ b/docs/modules/telemetry/implementation_plan.md @@ -58,7 +58,12 @@ - **Security:** redaction verification, RBAC/tenant scoping, sealed-mode tests, signed config verification. - **Offline:** capture bundles, transfer, replay, compliance attestation. -## Definition of done -- Collector profiles, storage backends, incident mode, dashboards, CLI, and offline kit delivered with telemetry and documentation. -- Runbooks and SOC handoff packages published; compliance checklists appended. -- ./TASKS.md and ../../TASKS.md updated; imposed rule statements confirmed in documentation. +## Definition of done +- Collector profiles, storage backends, incident mode, dashboards, CLI, and offline kit delivered with telemetry and documentation. +- Runbooks and SOC handoff packages published; compliance checklists appended. +- ./TASKS.md and ../../TASKS.md updated; imposed rule statements confirmed in documentation. + +## Sprint alignment (2025-11-30) +- Docs refresh tracked in `docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md`; statuses mirrored in `docs/modules/telemetry/TASKS.md`. +- Observability evidence lives in `operations/observability.md` with Grafana JSON stub under `operations/dashboards/`. +- Keep future doc/ops updates mirrored across sprint, TASKS, and module front doors to avoid drift. diff --git a/docs/modules/telemetry/operations/dashboards/telemetry-observability.json b/docs/modules/telemetry/operations/dashboards/telemetry-observability.json new file mode 100644 index 000000000..9822b1674 --- /dev/null +++ b/docs/modules/telemetry/operations/dashboards/telemetry-observability.json @@ -0,0 +1,6 @@ +{ + "_note": "Placeholder Grafana dashboard stub for Telemetry. Replace panels when metrics endpoints are available; keep offline-import friendly.", + "schemaVersion": 39, + "title": "Telemetry Observability (stub)", + "panels": [] +} diff --git a/docs/modules/telemetry/operations/observability.md b/docs/modules/telemetry/operations/observability.md new file mode 100644 index 000000000..2b5c4305d --- /dev/null +++ b/docs/modules/telemetry/operations/observability.md @@ -0,0 +1,38 @@ +# Telemetry observability runbook (stub · 2025-11-29 demo) + +## Dashboards (offline import) +- Grafana JSON: `docs/modules/telemetry/operations/dashboards/telemetry-observability.json` (import locally; no external data sources assumed). +- Planned panels: collector uptime, scrape errors, ingestion/backlog per tenant, storage retention headroom, query latency p95/p99, and OTLP export errors. + +## Key metrics +- `telemetry_collector_uptime_seconds` — per-collector uptime. +- `telemetry_scrape_failures_total{job}` — scrape failures per job. +- `telemetry_ingest_backlog` — queued spans/logs/metrics awaiting storage. +- `telemetry_storage_retention_percent_used` — storage utilization against retention budget. +- `telemetry_query_latency_seconds_bucket{route}` — API/query latency. +- `telemetry_otlp_export_failures_total{signal}` — OTLP export failures by signal. + +## Logs & traces +- Correlate by `trace_id` and `tenant`; include `collector_id`, `pipeline`, `exporter` fields. +- Traces disabled by default for air-gap; enable by setting OTLP endpoints to on-prem collectors. + +## Health/diagnostics +- `/health/liveness` and `/health/readiness` (collector + storage gateway) check exporter reachability and disk headroom. +- `/status` exposes build version, commit, feature flags; verify against offline bundle manifest. +- Storage probe: `GET /api/storage/usage` (if available) to confirm retention headroom; otherwise rely on Prometheus metrics. + +## Alert hints +- OTLP export failures > 0 over 5m. +- Ingest backlog above threshold (configurable per tenant/workload). +- Query latency p99 > 1s for `/api/query` routes. +- Storage utilization > 85% of retention budget. + +## Offline verification steps +1) Import Grafana JSON locally; point to Prometheus scrape labeled `telemetry`. +2) Run collector smoke: push sample OTLP spans/logs/metrics to local collector and confirm metrics emit in Prometheus. +3) Fetch `/status` and compare commit/version to offline bundle manifest. + +## Evidence locations +- Sprint tracker: `docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md`. +- Module docs: `README.md`, `architecture.md`, `implementation_plan.md`. +- Dashboard stub: `operations/dashboards/telemetry-observability.json`. diff --git a/docs/modules/ui/README.md b/docs/modules/ui/README.md index 29a64fd2e..39d723fbd 100644 --- a/docs/modules/ui/README.md +++ b/docs/modules/ui/README.md @@ -2,7 +2,11 @@ The Console presents operator dashboards for scans, policies, VEX evidence, runtime posture, and admin workflows. -## Responsibilities +## Latest updates (2025-11-30) +- Docs refreshed per `docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md`; added observability runbook stub and TASKS mirror. +- Access-control guidance from 2025-11-03 remains valid; ensure Authority scopes are verified before enabling uploads. + +## Responsibilities - Render real-time status for ingestion, scanning, policy, and exports via SSE. - Provide policy editor, SBOM explorer, and advisory views with accessibility compliance. - Integrate with Authority for fresh-auth and scope enforcement. @@ -18,14 +22,16 @@ The Console presents operator dashboards for scans, policies, VEX evidence, runt - Authority for DPoP-protected calls. - Telemetry streams for observability dashboards. -## Operational notes -- Auth smoke tests in ./operations/auth-smoke.md. -- Console architecture doc for layout and SSE fan-out. -- Accessibility and security guides in ../../ui/ & ../../security/. +## Operational notes +- Auth smoke tests in `operations/auth-smoke.md`. +- Observability runbook + dashboard stub in `operations/observability.md` and `operations/dashboards/console-ui-observability.json` (offline import). +- Console architecture doc for layout and SSE fan-out. +- Accessibility and security guides in ../../ui/ & ../../security/. -## Related resources -- ./operations/auth-smoke.md -- ./console-architecture.md +## Related resources +- ./operations/auth-smoke.md +- ./operations/observability.md +- ./console-architecture.md ## Backlog references - DOCS-CONSOLE-23-001 … DOCS-CONSOLE-23-003 baseline (done). diff --git a/docs/modules/ui/TASKS.md b/docs/modules/ui/TASKS.md new file mode 100644 index 000000000..9559eaa82 --- /dev/null +++ b/docs/modules/ui/TASKS.md @@ -0,0 +1,9 @@ +# Console UI · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| CONSOLE UI-DOCS-0001 | DONE (2025-11-30) | Docs Guild | README/architecture updated; sprint links and observability references added. | +| CONSOLE UI-ENG-0001 | DONE (2025-11-30) | Module Team | TASKS board created; statuses mirrored with `docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md`. | +| CONSOLE UI-OPS-0001 | DONE (2025-11-30) | Ops Guild | Observability runbook stub + Grafana JSON placeholder added under `operations/`. | + +> Keep this table in lockstep with the sprint Delivery Tracker (TODO/DOING/DONE/BLOCKED updates go to both places). diff --git a/docs/modules/ui/architecture.md b/docs/modules/ui/architecture.md index f0d1f319d..1e75456ba 100644 --- a/docs/modules/ui/architecture.md +++ b/docs/modules/ui/architecture.md @@ -28,8 +28,9 @@ * **State**: Angular **Signals** + `@ngrx/signals` store for cross‑page slices. * **Transport**: `fetch` + RxJS interop; **SSE** (EventSource) for progress streams. * **Build**: Angular CLI + Vite builder. -* **Testing**: Jest + Testing Library, Playwright for e2e. -* **Packaging**: Containerized NGINX (immutable assets, ETag + content hashing). +* **Testing**: Jest + Testing Library, Playwright for e2e. +* **Packaging**: Containerized NGINX (immutable assets, ETag + content hashing). +* **Observability docs**: runbook + Grafana JSON stub in `operations/observability.md` and `operations/dashboards/console-ui-observability.json` (offline import). --- diff --git a/docs/modules/ui/implementation_plan.md b/docs/modules/ui/implementation_plan.md index 5996f8177..ca15bf504 100644 --- a/docs/modules/ui/implementation_plan.md +++ b/docs/modules/ui/implementation_plan.md @@ -16,10 +16,15 @@ - **Epic 6 – Vulnerability Explorer:** surface triage dashboards, findings ledger, and audit exports. - **Epic 8 – Advisory AI:** integrate advisory summaries and remediation hints with strict provenance. - **Epic 9 – Orchestrator Dashboard:** expose job/source monitoring controls. -- **Epic 11 – Notifications Studio:** provide notifications workspace with previews and audit trails. -- Track supporting tasks (e.g., DOCS-CONSOLE-23-001, CONSOLE-OBS-52-001) in ../../TASKS.md. - -## Coordination -- Review ./AGENTS.md before picking up new work. -- Sync with cross-cutting teams noted in `/docs/implplan/SPRINT_*.md`. -- Update this plan whenever scope, dependencies, or guardrails change. +- **Epic 11 – Notifications Studio:** provide notifications workspace with previews and audit trails. +- Track supporting tasks (e.g., DOCS-CONSOLE-23-001, CONSOLE-OBS-52-001) in ../../TASKS.md. + +## Coordination +- Review ./AGENTS.md before picking up new work. +- Sync with cross-cutting teams noted in `/docs/implplan/SPRINT_*.md`. +- Update this plan whenever scope, dependencies, or guardrails change. + +## Sprint alignment (2025-11-30) +- Current doc refresh tracked in `docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md`; statuses mirrored in `docs/modules/ui/TASKS.md`. +- Observability evidence for latest demo lives in `operations/observability.md` with Grafana JSON stub under `operations/dashboards/`. +- Keep future doc/ops changes mirrored across sprint, TASKS, and module front doors to prevent drift. diff --git a/docs/modules/ui/operations/dashboards/console-ui-observability.json b/docs/modules/ui/operations/dashboards/console-ui-observability.json new file mode 100644 index 000000000..20ae9890d --- /dev/null +++ b/docs/modules/ui/operations/dashboards/console-ui-observability.json @@ -0,0 +1,6 @@ +{ + "_note": "Placeholder Grafana dashboard stub for Console UI. Replace panels when metrics endpoints are wired; keep offline-import friendly.", + "schemaVersion": 39, + "title": "Console UI Observability (stub)", + "panels": [] +} diff --git a/docs/modules/ui/operations/observability.md b/docs/modules/ui/operations/observability.md new file mode 100644 index 000000000..d07a89b5b --- /dev/null +++ b/docs/modules/ui/operations/observability.md @@ -0,0 +1,38 @@ +# Console UI observability runbook (stub · 2025-11-29 demo) + +## Dashboards (offline import) +- Grafana JSON: `docs/modules/ui/operations/dashboards/console-ui-observability.json` (import locally; no external data sources assumed). +- Panels to include: API latency (p95/p99), error rate, WebSocket/SSE connection count, asset load time, bundle size budget, Core Web Vitals (LCP/FID/CLS), and triage view render time. + +## Key metrics +- `console_ui_http_request_duration_seconds_bucket{route}` — API call latency. +- `console_ui_http_requests_total{status}` — error rate tracking. +- `console_ui_websocket_connections` — active live session count. +- `console_ui_bundle_bytes{chunk}` — bundle size by chunk (ensures offline kit budget). +- `console_ui_core_web_vitals{metric}` — LCP/FID/CLS gauges. +- `console_ui_export_duration_seconds_bucket` — export trigger to download completion. + +## Logs & traces +- Correlate by `correlationId` (propagated from API) and `tenant`. Include `feature` (triage, findings, policy) and `route` fields. +- Traces disabled by default for air-gap; enable by pointing OTLP endpoint to on-prem collector and setting `Telemetry:ExportEnabled=true`. + +## Health/diagnostics +- `/health/liveness` and `/health/readiness` (UI backend) must return 200; readiness checks asset storage + API gateway reachability. +- `/status` exposes build version, commit, feature flags; ensure it matches the offline bundle manifest when shipping sealed kits. +- Frontend self-check: open `/health/ui` to verify core bundles are reachable and integrity hashes match manifest. + +## Alert hints +- p99 API latency > 1s for `/api/findings` or `/api/policy`. +- SSE/WS disconnect rate > 2% over 5m window. +- Bundle size > 3.5 MB for main chunk after gzip (offline kit budget breach). +- Core Web Vitals: LCP > 2.5s, CLS > 0.1 on internal demo dataset. + +## Offline verification steps +1) Import Grafana JSON locally; point to Prometheus scrape labeled `console-ui`. +2) Run `npm run build -- --configuration=production` (or offline kit build) and verify bundle hashes against manifest used by `/health/ui`. +3) Fetch `/status` and compare commit/version to the static asset manifest embedded in the offline kit. + +## Evidence locations +- Sprint tracker: `docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md`. +- Module front doors: `README.md`, `architecture.md`, `implementation_plan.md`. +- Dashboard stub: `operations/dashboards/console-ui-observability.json`. diff --git a/docs/modules/vex-lens/AGENTS.md b/docs/modules/vex-lens/AGENTS.md index 06dafcaae..d751e95b2 100644 --- a/docs/modules/vex-lens/AGENTS.md +++ b/docs/modules/vex-lens/AGENTS.md @@ -8,6 +8,7 @@ VEX Lens computes deterministic consensus across conflicting VEX statements whil - [Architecture](./architecture.md) - [Implementation plan](./implementation_plan.md) - [Task board](./TASKS.md) +- [Observability runbook](./runbooks/observability.md) (offline import friendly) ## How to get started 1. Review ./architecture.md for consensus algorithm, trust model, and export contracts. diff --git a/docs/modules/vex-lens/README.md b/docs/modules/vex-lens/README.md index 51dc7fffb..013b68df8 100644 --- a/docs/modules/vex-lens/README.md +++ b/docs/modules/vex-lens/README.md @@ -27,6 +27,10 @@ VEX Lens produces a deterministic, provenance-rich consensus view of VEX stateme - `VEXLENS-30-005..007` — expose consensus APIs and export flows, aligning docs with future `/docs/vex/consensus-*.md` deliverables. - `DOCS-VEX-30-001..004` — broader documentation set (overview, algorithm, API) tracked in `docs/TASKS.md`; update this README again once those artefacts merge. +## Latest updates (2025-11-30) +- Docs refresh per `docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md`; added observability runbook stub and TASKS mirror. +- Observability assets: `runbooks/observability.md` and `runbooks/dashboards/vex-lens-observability.json` (offline import). + ## Integrations & dependencies - **Excititor** supplies signature-verified VEX observations and issuer hints. - **Policy Engine** consumes consensus verdicts for suppression/waiver logic and exposes trust controls to operators. @@ -44,7 +48,7 @@ VEX Lens produces a deterministic, provenance-rich consensus view of VEX stateme - [`architecture.md`](architecture.md) — implementation-ready blueprint covering inputs, algorithm, APIs, storage, observability, and exports. - [`implementation_plan.md`](implementation_plan.md) — phased delivery roadmap and acceptance criteria. - [`../../vex/aggregation.md`](../../vex/aggregation.md) — Aggregation-Only Contract boundaries for VEX ingestion and downstream consumers. -- Sprint tracking in `docs/implplan/SPRINT_200_documentation_process.md` and module engineering tasks in `src/VexLens/StellaOps.VexLens/TASKS.md`. +- Sprint tracking in `docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md`; module engineering tasks in `src/VexLens/StellaOps.VexLens/TASKS.md`; doc TASKS mirror in `docs/modules/vex-lens/TASKS.md`. ## Epic alignment - Epic 7 — VEX Consensus Lens: deterministic VEX adjudication, issuer directory, consensus exports. diff --git a/docs/modules/vex-lens/TASKS.md b/docs/modules/vex-lens/TASKS.md new file mode 100644 index 000000000..b1c8f0262 --- /dev/null +++ b/docs/modules/vex-lens/TASKS.md @@ -0,0 +1,9 @@ +# VEX Lens · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| VEX-CONSENSUS-LENS-DOCS-0001 | DONE (2025-11-30) | Docs Guild | README/architecture refresh with consensus workflow and release links; sprint references added. | +| VEX-LENS-OPS-0001 | DONE (2025-11-30) | Ops Guild | Observability runbook stub + Grafana JSON placeholder added under `runbooks/`. | +| VEX-LENS-ENG-0001 | DONE (2025-11-30) | Module Team | TASKS board created; statuses mirrored with `docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md`. | + +> Keep this table in lockstep with the sprint Delivery Tracker (TODO/DOING/DONE/BLOCKED updates go to both places). diff --git a/docs/modules/vex-lens/architecture.md b/docs/modules/vex-lens/architecture.md index db104969c..97ee49099 100644 --- a/docs/modules/vex-lens/architecture.md +++ b/docs/modules/vex-lens/architecture.md @@ -71,13 +71,14 @@ All responses include provenance fields (`consensus_digest`, `derived_from`, DSS - Recompute jobs run via Orchestrator; deterministic ordering ensures identical results for the same input set. - Jobs produce SRM-style manifests for recomputation verification. -## 7) Observability - -- Metrics: `vex_consensus_conflicts_total`, `vex_consensus_latency_seconds`, `vex_consensus_recompute_seconds{reason}`. -- Logs: include `artifactId`, `advisoryKey`, `issuer`, `status`, `trustTier`. -- Traces: `consensus.group`, `consensus.join`, `consensus.persist` spans. - -## 8) Offline & export - -- Bundle format: `consensus.jsonl`, `conflicts.jsonl`, `manifest.json`, `signatures/`. Each record references raw statement digests and trust metadata. -- Export Center uses the bundle for mirror profiles; CLI supports `stella vex consensus export` mirroring the API. +## 7) Observability + +- Metrics: `vex_consensus_conflicts_total`, `vex_consensus_latency_seconds`, `vex_consensus_recompute_seconds{reason}`. +- Logs: include `artifactId`, `advisoryKey`, `issuer`, `status`, `trustTier`. +- Traces: `consensus.group`, `consensus.join`, `consensus.persist` spans. +- Runbook + dashboard stub (offline import): `runbooks/observability.md`, `runbooks/dashboards/vex-lens-observability.json`. + +## 8) Offline & export + +- Bundle format: `consensus.jsonl`, `conflicts.jsonl`, `manifest.json`, `signatures/`. Each record references raw statement digests and trust metadata. +- Export Center uses the bundle for mirror profiles; CLI supports `stella vex consensus export` mirroring the API. diff --git a/docs/modules/vex-lens/implementation_plan.md b/docs/modules/vex-lens/implementation_plan.md index 5de9af176..27f2e1f7b 100644 --- a/docs/modules/vex-lens/implementation_plan.md +++ b/docs/modules/vex-lens/implementation_plan.md @@ -61,3 +61,8 @@ - Lens service, issuer directory, API/CLI/Console components deployed with telemetry and runbooks. - Documentation set (overview, algorithm, issuer directory, API, console, policy trust) updated with imposed rule statements. - ./TASKS.md and ../../TASKS.md reflect current status; Offline Kit parity confirmed. + +## Sprint alignment (2025-11-30) +- Docs refresh tracked in `docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md`; statuses mirrored in `docs/modules/vex-lens/TASKS.md`. +- Observability evidence lives in `runbooks/observability.md` with Grafana JSON stub under `runbooks/dashboards/`. +- Keep future doc/ops updates mirrored across sprint, TASKS, and module front doors to avoid drift. diff --git a/docs/modules/vex-lens/runbooks/dashboards/vex-lens-observability.json b/docs/modules/vex-lens/runbooks/dashboards/vex-lens-observability.json new file mode 100644 index 000000000..bef4096f8 --- /dev/null +++ b/docs/modules/vex-lens/runbooks/dashboards/vex-lens-observability.json @@ -0,0 +1,6 @@ +{ + "_note": "Placeholder Grafana dashboard stub for VEX Lens. Replace panels when metrics endpoints are available; keep offline-import friendly.", + "schemaVersion": 39, + "title": "VEX Lens Observability (stub)", + "panels": [] +} diff --git a/docs/modules/vex-lens/runbooks/observability.md b/docs/modules/vex-lens/runbooks/observability.md new file mode 100644 index 000000000..f464e260f --- /dev/null +++ b/docs/modules/vex-lens/runbooks/observability.md @@ -0,0 +1,38 @@ +# VEX Lens observability runbook (stub · 2025-11-29 demo) + +## Dashboards (offline import) +- Grafana JSON: `docs/modules/vex-lens/runbooks/dashboards/vex-lens-observability.json` (import locally; no external data sources assumed). +- Planned panels: consensus latency, conflict backlog, recompute duration, issuer trust changes, export job success rate, and DSSE verification failures. + +## Key metrics +- `vex_consensus_latency_seconds_bucket` — latency from observation intake to consensus write. +- `vex_conflict_queue_depth` — size of unresolved conflict queue. +- `vex_recompute_duration_seconds_bucket{reason}` — recompute times by trigger (issuer update, policy knob, ingestion delta). +- `vex_export_duration_seconds_bucket` — export job runtime. +- `vex_dsse_verification_failures_total` — failed attestations during export/ingest. +- `vex_consensus_conflicts_total{reason}` — conflict counts by reason (status disagreement, scope mismatch, missing provenance). + +## Logs & traces +- Correlate by `correlationId`, `artifactKey`, `advisoryKey`, and `issuer`. Include `trustTier`, `weightBefore`, `weightAfter`, and `justification` fields for audits. +- Traces disabled by default for air-gap; enable by setting `Telemetry:ExportEnabled=true` and pointing OTLP endpoint to on-prem collector. + +## Health/diagnostics +- `/health/liveness` and `/health/readiness` (service) must return 200; readiness checks Mongo + cache + event bus reachability. +- `/status` exposes build version, commit, feature flags; verify it matches offline bundle manifest. +- Export self-check: run `stella vex export --format json --manifest out/manifest.json` and validate hashes against manifest entries. + +## Alert hints +- Consensus latency p99 > 1.5s over 5m. +- Conflict queue depth > 500 for any tenant. +- DSSE verification failures > 0 in a 10m window. +- Export failure rate > 2% over 10m. + +## Offline verification steps +1) Import Grafana JSON locally; point to Prometheus scrape labeled `vex-lens`. +2) Run export CLI above and verify `manifest.json` hashes via `jq -r '.files[].sha256'`. +3) Fetch `/status` and confirm commit/version match the exported manifest and offline kit bundle metadata. + +## Evidence locations +- Sprint tracker: `docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md`. +- Module docs: `README.md`, `architecture.md`, `implementation_plan.md`. +- Dashboard stub: `runbooks/dashboards/vex-lens-observability.json`. diff --git a/docs/modules/vuln-explorer/README.md b/docs/modules/vuln-explorer/README.md index 79e59ba3a..f6bd2d02d 100644 --- a/docs/modules/vuln-explorer/README.md +++ b/docs/modules/vuln-explorer/README.md @@ -2,8 +2,10 @@ Vulnerability Explorer delivers policy-aware triage, investigation, and reporting surfaces for effective findings. -## Latest updates (2025-11-03) -- Access controls refresh introduced attachment signing tokens and updated scope guidance (`docs/updates/2025-11-03-vuln-explorer-access-controls.md`). Ensure operator runbooks reference the new Authority scopes (`authority-scopes.md`) and security checklist before enabling attachment uploads. +## Latest updates (2025-11-30) +- Documentation refresh aligned to sprint 0334: added observability/runbook snapshot and cross-links to OpenAPI draft (`./api.md`) and schemas in `architecture.md`. +- New offline-friendly observability runbook at `runbooks/observability.md` plus stub Grafana JSON in `runbooks/dashboards/`. +- Retained 2025-11-03 access-control changes; verify Authority scopes before enabling attachment uploads (`docs/updates/2025-11-03-vuln-explorer-access-controls.md`). ## Responsibilities - Present policy-evaluated findings with advisory, VEX, SBOM, and runtime context. @@ -22,10 +24,11 @@ Vulnerability Explorer delivers policy-aware triage, investigation, and reportin - Scheduler for remediation/verification jobs. - Notify for triage notifications. -## Operational notes -- Audit logging per Epic 6 requirements. -- Offline-ready CSV/PDF exports with deterministic hashes. -- Dashboards for MTTR and triage throughput. +## Operational notes +- Audit logging per Epic 6 requirements. +- Offline-ready CSV/PDF exports with deterministic hashes. +- Dashboards for MTTR and triage throughput. +- Observability runbook and dashboard stub: see `runbooks/observability.md` and `runbooks/dashboards/vuln-explorer-observability.json` (import locally). ## Epic alignment - Epic 6: Vulnerability Explorer. diff --git a/docs/modules/vuln-explorer/TASKS.md b/docs/modules/vuln-explorer/TASKS.md new file mode 100644 index 000000000..475f0d2a1 --- /dev/null +++ b/docs/modules/vuln-explorer/TASKS.md @@ -0,0 +1,9 @@ +# Vuln Explorer · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| VULNERABILITY-EXPLORER-DOCS-0001 | DONE (2025-11-30) | Docs Guild | README/architecture updated; OpenAPI/schema/sprint links added; runbook evidence captured. | +| VULNERABILITY-EXPLORER-OPS-0001 | DONE (2025-11-30) | Ops Guild | Observability runbook + dashboard stub added; health/alert guidance documented. | +| VULNERABILITY-EXPLORER-ENG-0001 | DONE (2025-11-30) | Module Team | Sprint alignment notes added to implementation_plan; task mirror created. | + +> Status must mirror `/docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md` (DOING/DONE/BLOCKED updates go to both files). diff --git a/docs/modules/vuln-explorer/architecture.md b/docs/modules/vuln-explorer/architecture.md index 3fe25469e..2bee6f48a 100644 --- a/docs/modules/vuln-explorer/architecture.md +++ b/docs/modules/vuln-explorer/architecture.md @@ -55,6 +55,7 @@ CLI mirrors these endpoints (`stella findings list|view|update|export`). Console - Logs: structured with `findingId`, `artifactId`, `advisory`, `policyVersion`, `actor`, `actionType`. - Audit exports: `audit_log.jsonl` appended whenever state changes; offline bundles include signed audit log and manifest. - Compliance: accepted risk requires dual approval and stores justification plus expiry reminders (raised through Notify). +- Runbook and dashboard stub for demo snapshot: `runbooks/observability.md` and `runbooks/dashboards/vuln-explorer-observability.json` (offline import). ## 6) Identity & access integration diff --git a/docs/modules/vuln-explorer/implementation_plan.md b/docs/modules/vuln-explorer/implementation_plan.md index 30dbfa925..c3b8882f2 100644 --- a/docs/modules/vuln-explorer/implementation_plan.md +++ b/docs/modules/vuln-explorer/implementation_plan.md @@ -68,3 +68,8 @@ - Services, UI/CLI, integrations, exports, and observability deployed with runbooks and Offline Kit parity. - Documentation suite (overview, using-console, API, CLI, findings ledger, policy mapping, VEX/SBOM integration, telemetry, security, runbooks, install) updated with imposed rule statement. - ./TASKS.md and ../../TASKS.md reflect active progress; compliance checklists appended where required. + +## Sprint alignment (2025-11-30) +- Docs refresh tracked in `docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md` (Docs/OPS/ENG streams). +- Observability runbook evidence lives under `runbooks/observability.md` with stub dashboard JSON for offline demos. +- API/OpenAPI references: `api.md` and `openapi/vuln-explorer.v1.yaml` are the current contract drafts; keep Console/CLI generators synced to these. diff --git a/docs/modules/vuln-explorer/runbooks/dashboards/vuln-explorer-observability.json b/docs/modules/vuln-explorer/runbooks/dashboards/vuln-explorer-observability.json new file mode 100644 index 000000000..cf86c141a --- /dev/null +++ b/docs/modules/vuln-explorer/runbooks/dashboards/vuln-explorer-observability.json @@ -0,0 +1,6 @@ +{ + "_note": "Placeholder Grafana dashboard stub for offline import. Populate with panel definitions when metrics endpoints are available; see runbooks/observability.md for expected panels.", + "schemaVersion": 39, + "title": "Vuln Explorer Observability (stub)", + "panels": [] +} diff --git a/docs/modules/vuln-explorer/runbooks/observability.md b/docs/modules/vuln-explorer/runbooks/observability.md new file mode 100644 index 000000000..16672127f --- /dev/null +++ b/docs/modules/vuln-explorer/runbooks/observability.md @@ -0,0 +1,38 @@ +# Vuln Explorer observability runbook (demo snapshot · 2025-11-29) + +## Dashboards (offline-friendly) +- Grafana JSON: `docs/modules/vuln-explorer/runbooks/dashboards/vuln-explorer-observability.json` (import locally; no external data sources assumed). +- Panels: projection lag, open findings by severity/tenant, accepted-risk ageing, API 5xx rate, export duration p95, ledger replay backlog. + +## Key metrics +- `vuln_projection_lag_seconds{tenant}` – seconds between latest ledger event and projector head. +- `vuln_findings_open_total{severity,tenant}` – count of open findings by severity. +- `vuln_export_duration_seconds_bucket` – histogram for export job runtime. +- `vuln_projection_backlog_total` – queued events awaiting projection. +- `vuln_triage_actions_total{type}` – immutable triage actions (assign, comment, risk_accept, remediation_note). +- `vuln_api_request_duration_seconds_bucket{route}` – API latency for `GET /v1/findings*` and `POST /v1/reports`. + +## Logs & traces +- Correlate by `correlationId` and `findingId`. Structured fields: `tenant`, `advisoryKey`, `policyVersion`, `projectId`, `route`. +- Trace exemplar anchors: `traceparent` headers are copied into logs; exporters stay disabled by default for air-gap. Enable by setting `Telemetry:ExportEnabled=true` and pointing to on-prem Tempo/Jaeger. + +## Health/diagnostics +- `/health/liveness` and `/health/readiness` (HTTP 200 expected; readiness checks Mongo + cache reachability). +- `/status` returns build version, git commit, and enabled features; safe for anonymous fetch in sealed environments. +- Ledger replay check: `GET /v1/findings?projectionMode=verify` emits `X-Vuln-Projection-Head` for quick consistency probes. + +## Alert hints (wire to local Alertmanager or watchdog) +- Projection lag > 120s for any tenant. +- API p99 latency > 800ms for `GET /v1/findings` or `POST /v1/reports`. +- Export failure rate > 2% over 10m window. +- Accepted-risk approaching expiry within 7d (emit Notify event `vuln.accepted_risk.expiring`). + +## Offline verification steps +1) Import Grafana JSON locally and point to Prometheus scrape job `vuln-explorer`. +2) Run `stella vuln export --format json --manifest out/manifest.json` and validate hashes using `jq -r '.files[].sha256'` against generated bundle. +3) Use `curl -s "$BASEURL/status" | jq '{commit,version,features}'` to confirm expected build metadata matches the exported bundle manifest. + +## Evidence locations +- Sprint alignment: `docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md`. +- API contract draft: `docs/modules/vuln-explorer/api.md` and OpenAPI at `docs/modules/vuln-explorer/openapi/vuln-explorer.v1.yaml`. +- Schema references: `docs/modules/vuln-explorer/architecture.md` (ledger model, VEX decision schemas). diff --git a/docs/modules/zastava/AGENTS.md b/docs/modules/zastava/AGENTS.md index 4cff64825..0b39a15d7 100644 --- a/docs/modules/zastava/AGENTS.md +++ b/docs/modules/zastava/AGENTS.md @@ -15,6 +15,7 @@ Zastava monitors running workloads, verifies supply chain posture, and enforces - [Architecture](./architecture.md) - [Implementation plan](./implementation_plan.md) - [Task board](./TASKS.md) +- [Observability runbook](./operations/observability.md) (offline import friendly) ## How to get started 1. Open sprint file `/docs/implplan/SPRINT_*.md` and locate the stories referencing this module. diff --git a/docs/modules/zastava/README.md b/docs/modules/zastava/README.md index 7cf45c386..ccfaddb0e 100644 --- a/docs/modules/zastava/README.md +++ b/docs/modules/zastava/README.md @@ -2,7 +2,12 @@ Zastava monitors running workloads, verifies supply chain posture, and enforces runtime policy via Kubernetes admission webhooks. -## Responsibilities +## Latest updates (2025-11-30) +- Sprint tracker `docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md` and module `TASKS.md` added to mirror status. +- Observability runbook stub + dashboard placeholder added under `operations/` (offline import). +- Surface.Env/Surface.Secrets adoption remains pending platform contracts; align with platform docs before enabling sealed mode. + +## Responsibilities - Observe node/container activity and emit runtime events. - Validate signatures, SBOM presence, and backend verdicts before allowing containers. - Buffer and replay events during disconnections. @@ -18,10 +23,10 @@ Zastava monitors running workloads, verifies supply chain posture, and enforces - Scanner/Scheduler for remediation triggers. - Notify/UI for runtime alerts and dashboards. -## Operational notes -- Runbook ./operations/runtime.md with Grafana/Prometheus assets. -- Offline kit assets bundling webhook charts. -- DPoP/mTLS rotation guidance shared with Authority. +## Operational notes +- Runbook `./operations/observability.md` (stub) plus dashboard placeholder `./operations/dashboards/zastava-observability.json`. +- Legacy runtime runbook assets remain under ./operations if present; keep offline kit bundles deterministic. +- DPoP/mTLS rotation guidance shared with Authority. ## Related resources - ./operations/runtime.md diff --git a/docs/modules/zastava/TASKS.md b/docs/modules/zastava/TASKS.md new file mode 100644 index 000000000..4ba0e603b --- /dev/null +++ b/docs/modules/zastava/TASKS.md @@ -0,0 +1,9 @@ +# Zastava · TASKS (status mirror) + +| Task ID | Status | Owner(s) | Notes / Evidence | +| --- | --- | --- | --- | +| ZASTAVA-DOCS-0001 | DONE (2025-11-30) | Docs Guild | README/architecture refreshed; Surface Env/Secrets and sprint links added. | +| ZASTAVA-ENG-0001 | DONE (2025-11-30) | Module Team | TASKS board created; statuses mirrored with `docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md`. | +| ZASTAVA-OPS-0001 | DONE (2025-11-30) | Ops Guild | Observability runbook stub + Grafana JSON placeholder added under `operations/`. | + +> Keep this table in lockstep with the sprint Delivery Tracker (TODO/DOING/DONE/BLOCKED updates go to both places). diff --git a/docs/modules/zastava/architecture.md b/docs/modules/zastava/architecture.md index 4bee85e9b..730e24e29 100644 --- a/docs/modules/zastava/architecture.md +++ b/docs/modules/zastava/architecture.md @@ -486,11 +486,20 @@ webhooks: --- -## 15) Roadmap - -* **eBPF** option for syscall/library load tracing (kernel‑level, opt‑in). -* **Windows containers** support (ETW providers, loaded modules). -* **Network posture** checks: listening ports vs policy. -* **Live **used‑by‑entrypoint** synthesis**: send compact bitset diff to backend to tighten Usage view. -* **Admission dry‑run** dashboards (simulate block lists before enforcing). +## 15) Roadmap + +* **eBPF** option for syscall/library load tracing (kernel‑level, opt‑in). +* **Windows containers** support (ETW providers, loaded modules). +* **Network posture** checks: listening ports vs policy. +* **Live **used‑by‑entrypoint** synthesis**: send compact bitset diff to backend to tighten Usage view. +* **Admission dry‑run** dashboards (simulate block lists before enforcing). + +--- + +## 16) Observability (stub) + +- Runbook + dashboard placeholder for offline import: `operations/observability.md`, `operations/dashboards/zastava-observability.json`. +- Metrics to surface: admission latency p95/p99, allow/deny counts, Surface.Env miss rate, Surface.Secrets failures, Surface.FS cache freshness, drift events. +- Health endpoints: `/health/liveness`, `/health/readiness`, `/status`, `/surface/fs/cache/status` (see runbook). +- Alert hints: deny spikes, latency > 800ms p99, cache freshness lag > 10m, any secrets failure. diff --git a/docs/modules/zastava/implementation_plan.md b/docs/modules/zastava/implementation_plan.md index ae87c4c51..e854176ea 100644 --- a/docs/modules/zastava/implementation_plan.md +++ b/docs/modules/zastava/implementation_plan.md @@ -13,7 +13,12 @@ - ZASTAVA runtime tasks in ../../TASKS.md. - Webhook smoke tests tracked in src/Zastava/**/TASKS.md. -## Coordination -- Review ./AGENTS.md before picking up new work. -- Sync with cross-cutting teams noted in `/docs/implplan/SPRINT_*.md`. -- Update this plan whenever scope, dependencies, or guardrails change. +## Coordination +- Review ./AGENTS.md before picking up new work. +- Sync with cross-cutting teams noted in `/docs/implplan/SPRINT_*.md`. +- Update this plan whenever scope, dependencies, or guardrails change. + +## Sprint alignment (2025-11-30) +- Docs refresh tracked in `docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md`; statuses mirrored in `docs/modules/zastava/TASKS.md`. +- Observability evidence lives in `operations/observability.md` with Grafana JSON stub under `operations/dashboards/`. +- Surface.Env/Surface.Secrets contracts remain dependencies; align with platform docs before enabling sealed mode. diff --git a/docs/modules/zastava/operations/dashboards/zastava-observability.json b/docs/modules/zastava/operations/dashboards/zastava-observability.json new file mode 100644 index 000000000..dc46308e7 --- /dev/null +++ b/docs/modules/zastava/operations/dashboards/zastava-observability.json @@ -0,0 +1,6 @@ +{ + "_note": "Placeholder Grafana dashboard stub for Zastava. Replace panels when metrics endpoints are available; keep offline-import friendly.", + "schemaVersion": 39, + "title": "Zastava Observability (stub)", + "panels": [] +} diff --git a/docs/modules/zastava/operations/observability.md b/docs/modules/zastava/operations/observability.md new file mode 100644 index 000000000..f409f617d --- /dev/null +++ b/docs/modules/zastava/operations/observability.md @@ -0,0 +1,38 @@ +# Zastava observability runbook (stub · 2025-11-29 demo) + +## Dashboards (offline import) +- Grafana JSON: `docs/modules/zastava/operations/dashboards/zastava-observability.json` (import locally; no external data sources assumed). +- Planned panels: admission decision rate, webhook latency p95/p99, cache freshness (Surface.FS), Surface.Env key misses, Secrets fetch failures, policy violation counts, and drift events. + +## Key metrics +- `zastava_admission_latency_seconds_bucket{webhook}` — admission webhook latency. +- `zastava_admission_decisions_total{result}` — allow/deny counts. +- `zastava_surface_env_miss_total` — Surface.Env key misses. +- `zastava_surface_secrets_failures_total{reason}` — secret retrieval failures. +- `zastava_surface_fs_cache_freshness_seconds` — cache age vs Scanner surface metadata. +- `zastava_drift_events_total{type}` — drift detections by category. + +## Logs & traces +- Correlate by `correlationId`, `tenant`, `cluster`, and `admissionId`. Include `policyVersion`, `surfaceEnvProfile`, and `secretsProvider` fields. +- Traces disabled by default for air-gap; enable via `Telemetry:ExportEnabled=true` pointing to on-prem collector. + +## Health/diagnostics +- `/health/liveness` and `/health/readiness` (webhook + observer) check cache reachability, Secrets provider connectivity, and policy fetch. +- `/status` exposes build version, commit, feature flags; verify against offline bundle manifest. +- Cache probe: `GET /surface/fs/cache/status` returns freshness and hash for cached surfaces. + +## Alert hints +- Admission latency p99 > 800ms. +- Deny rate spike > 5% over 10m without policy change. +- Surface.Env miss rate > 1% or Secrets failure > 0 over 10m. +- Cache freshness > 10m behind Scanner surface metadata. + +## Offline verification steps +1) Import Grafana JSON locally; point to Prometheus scrape labeled `zastava`. +2) Replay a sealed admission bundle and verify `/status` + cache probe hashes match the manifest in the offline kit. +3) Run webhook smoke (`kubectl apply --dry-run=server -f samples/admission-request.yaml`) and confirm metrics increment locally. + +## Evidence locations +- Sprint tracker: `docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md`. +- Module docs: `README.md`, `architecture.md`, `implementation_plan.md`. +- Dashboard stub: `operations/dashboards/zastava-observability.json`. diff --git a/ops/devops/airgap/README.md b/ops/devops/airgap/README.md new file mode 100644 index 000000000..e033754c8 --- /dev/null +++ b/ops/devops/airgap/README.md @@ -0,0 +1,13 @@ +# Air-gap Egress Guard Rails + +Artifacts supporting `DEVOPS-AIRGAP-56-001`: + +- `k8s-deny-egress.yaml` — NetworkPolicy template that denies all egress for pods labeled `sealed=true`, except optional in-cluster DNS when enabled. +- `compose-egress-guard.sh` — Idempotent iptables guard for Docker/compose using the `DOCKER-USER` chain to drop all outbound traffic from a compose project network while allowing loopback and RFC1918 intra-cluster ranges. +- `verify-egress-block.sh` — Verification harness that runs curl probes from Docker or Kubernetes and reports JSON results; exits non-zero if any target is reachable. +- `bundle_stage_import.py` — Deterministic bundle staging helper: validates sha256 manifest, copies bundles to staging dir as `-`, emits `staging-report.json` for evidence. +- `stage-bundle.sh` — Thin wrapper around `bundle_stage_import.py` with positional args. +- `build_bootstrap_pack.py` — Builds a Bootstrap Pack from images/charts/extras listed in a JSON config, writing `bootstrap-manifest.json` + `checksums.sha256` deterministically. +- `build_bootstrap_pack.sh` — Wrapper for the bootstrap pack builder. + +See also `ops/devops/sealed-mode-ci/` for the full sealed-mode compose harness and `egress_probe.py`, which this verification script wraps. diff --git a/ops/devops/airgap/build_bootstrap_pack.py b/ops/devops/airgap/build_bootstrap_pack.py new file mode 100644 index 000000000..5ec1a5c72 --- /dev/null +++ b/ops/devops/airgap/build_bootstrap_pack.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +"""Build a deterministic Bootstrap Pack bundle for sealed/offline transfer. + +- Reads a JSON config listing artefacts to include (images, Helm charts, extras). +- Copies artefacts into an output directory with preserved basenames. +- Generates `bootstrap-manifest.json` and `checksums.sha256` with sha256 hashes + and sizes for evidence/verification. +- Intended to satisfy DEVOPS-AIRGAP-56-003. + +Config schema (JSON): +{ + "name": "bootstrap-pack", + "images": ["release/containers/taskrunner.tar", "release/containers/orchestrator.tar"], + "charts": ["deploy/helm/stella.tgz"], + "extras": ["docs/24_OFFLINE_KIT.md"] +} + +Usage: + build_bootstrap_pack.py --config bootstrap.json --output out/bootstrap-pack + build_bootstrap_pack.py --self-test +""" +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import shutil +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Tuple + +DEFAULT_NAME = "bootstrap-pack" + + +def sha256_file(path: Path) -> Tuple[str, int]: + h = hashlib.sha256() + size = 0 + with path.open("rb") as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + size += len(chunk) + return h.hexdigest(), size + + +def load_config(path: Path) -> Dict: + with path.open("r", encoding="utf-8") as handle: + cfg = json.load(handle) + if not isinstance(cfg, dict): + raise ValueError("config must be a JSON object") + return cfg + + +def ensure_list(cfg: Dict, key: str) -> List[str]: + value = cfg.get(key, []) + if value is None: + return [] + if not isinstance(value, list): + raise ValueError(f"config.{key} must be a list") + return [str(x) for x in value] + + +def copy_item(src: Path, dest_root: Path, rel_dir: str) -> Tuple[str, str, int]: + dest_dir = dest_root / rel_dir + dest_dir.mkdir(parents=True, exist_ok=True) + dest_path = dest_dir / src.name + shutil.copy2(src, dest_path) + digest, size = sha256_file(dest_path) + rel_path = dest_path.relative_to(dest_root).as_posix() + return rel_path, digest, size + + +def build_pack(config_path: Path, output_dir: Path) -> Dict: + cfg = load_config(config_path) + name = cfg.get("name", DEFAULT_NAME) + images = ensure_list(cfg, "images") + charts = ensure_list(cfg, "charts") + extras = ensure_list(cfg, "extras") + + output_dir.mkdir(parents=True, exist_ok=True) + items = [] + + def process_list(paths: List[str], kind: str, rel_dir: str): + for raw in sorted(paths): + src = Path(raw).expanduser().resolve() + if not src.exists(): + items.append({ + "type": kind, + "source": raw, + "status": "missing" + }) + continue + rel_path, digest, size = copy_item(src, output_dir, rel_dir) + items.append({ + "type": kind, + "source": raw, + "path": rel_path, + "sha256": digest, + "size": size, + "status": "ok", + }) + + process_list(images, "image", "images") + process_list(charts, "chart", "charts") + process_list(extras, "extra", "extras") + + manifest = { + "name": name, + "created": datetime.now(timezone.utc).isoformat(), + "items": items, + } + + # checksums file (only for ok items) + checksum_lines = [f"{item['sha256']} {item['path']}" for item in items if item.get("status") == "ok"] + (output_dir / "checksums.sha256").write_text("\n".join(checksum_lines) + ("\n" if checksum_lines else ""), encoding="utf-8") + (output_dir / "bootstrap-manifest.json").write_text(json.dumps(manifest, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + return manifest + + +def parse_args(argv: List[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--config", type=Path, help="Path to bootstrap pack config JSON") + parser.add_argument("--output", type=Path, help="Output directory for the pack") + parser.add_argument("--self-test", action="store_true", help="Run internal self-test and exit") + return parser.parse_args(argv) + + +def self_test() -> int: + import tempfile + + with tempfile.TemporaryDirectory() as tmp: + tmpdir = Path(tmp) + files = [] + for name, content in [("img1.tar", b"image-one"), ("chart1.tgz", b"chart-one"), ("readme.txt", b"hello")]: + p = tmpdir / name + p.write_bytes(content) + files.append(p) + cfg = { + "images": [str(files[0])], + "charts": [str(files[1])], + "extras": [str(files[2])], + } + cfg_path = tmpdir / "bootstrap.json" + cfg_path.write_text(json.dumps(cfg), encoding="utf-8") + outdir = tmpdir / "out" + manifest = build_pack(cfg_path, outdir) + assert all(item.get("status") == "ok" for item in manifest["items"]), manifest + for rel in ["images/img1.tar", "charts/chart1.tgz", "extras/readme.txt", "checksums.sha256", "bootstrap-manifest.json"]: + assert (outdir / rel).exists(), f"missing {rel}" + print("self-test passed") + return 0 + + +def main(argv: List[str]) -> int: + args = parse_args(argv) + if args.self_test: + return self_test() + if not (args.config and args.output): + print("--config and --output are required unless --self-test", file=sys.stderr) + return 2 + manifest = build_pack(args.config, args.output) + missing = [i for i in manifest["items"] if i.get("status") == "missing"] + if missing: + print("Pack built with missing items:") + for item in missing: + print(f" - {item['source']}") + return 1 + print(f"Bootstrap pack written to {args.output}") + return 0 + + +if __name__ == "__main__": # pragma: no cover + sys.exit(main(sys.argv[1:])) diff --git a/ops/devops/airgap/build_bootstrap_pack.sh b/ops/devops/airgap/build_bootstrap_pack.sh new file mode 100644 index 000000000..9e8ace6f8 --- /dev/null +++ b/ops/devops/airgap/build_bootstrap_pack.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +# Thin wrapper for build_bootstrap_pack.py +# Usage: ./build_bootstrap_pack.sh config.json out/bootstrap-pack +set -euo pipefail +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " >&2 + exit 2 +fi +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +python3 "$SCRIPT_DIR/build_bootstrap_pack.py" --config "$1" --output "$2" diff --git a/ops/devops/airgap/bundle_stage_import.py b/ops/devops/airgap/bundle_stage_import.py new file mode 100644 index 000000000..087b4e444 --- /dev/null +++ b/ops/devops/airgap/bundle_stage_import.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +"""Bundle staging helper for sealed-mode imports. + +Validates bundle files against a manifest and stages them into a target directory +with deterministic names (`-`). Emits a JSON report detailing +success/failure per file for evidence capture. + +Manifest format (JSON): +[ + {"file": "bundle1.tar.gz", "sha256": "..."}, + {"file": "bundle2.ndjson", "sha256": "..."} +] + +Usage: + bundle_stage_import.py --manifest bundles.json --root /path/to/files --out staging + bundle_stage_import.py --manifest bundles.json --root . --out staging --prefix mirror/ + bundle_stage_import.py --self-test +""" +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import shutil +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List + + +def sha256_file(path: Path) -> str: + h = hashlib.sha256() + with path.open('rb') as f: + for chunk in iter(lambda: f.read(1024 * 1024), b""): + h.update(chunk) + return h.hexdigest() + + +def load_manifest(path: Path) -> List[Dict[str, str]]: + with path.open('r', encoding='utf-8') as handle: + data = json.load(handle) + if not isinstance(data, list): + raise ValueError("Manifest must be a list of objects") + normalized = [] + for idx, entry in enumerate(data): + if not isinstance(entry, dict): + raise ValueError(f"Manifest entry {idx} is not an object") + file = entry.get("file") + digest = entry.get("sha256") + if not file or not digest: + raise ValueError(f"Manifest entry {idx} missing file or sha256") + normalized.append({"file": str(file), "sha256": str(digest).lower()}) + return normalized + + +def stage_file(src: Path, digest: str, out_dir: Path, prefix: str) -> Path: + dest_name = f"{digest}-{src.name}" + dest_rel = Path(prefix) / dest_name if prefix else Path(dest_name) + dest_path = out_dir / dest_rel + dest_path.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(src, dest_path) + return dest_rel + + +def process(manifest: Path, root: Path, out_dir: Path, prefix: str) -> Dict: + items = load_manifest(manifest) + results = [] + success = True + for entry in items: + rel = Path(entry["file"]) + src = (root / rel).resolve() + expected = entry["sha256"].lower() + status = "ok" + actual = None + staged = None + message = "" + if not src.exists(): + status = "missing" + message = "file not found" + success = False + else: + actual = sha256_file(src) + if actual != expected: + status = "checksum-mismatch" + message = "sha256 mismatch" + success = False + else: + staged = str(stage_file(src, expected, out_dir, prefix)) + results.append( + { + "file": str(rel), + "expectedSha256": expected, + "actualSha256": actual, + "status": status, + "stagedPath": staged, + "message": message, + } + ) + report = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "root": str(root), + "output": str(out_dir), + "prefix": prefix, + "summary": { + "total": len(results), + "success": success, + "ok": sum(1 for r in results if r["status"] == "ok"), + "missing": sum(1 for r in results if r["status"] == "missing"), + "checksumMismatch": sum(1 for r in results if r["status"] == "checksum-mismatch"), + }, + "items": results, + } + return report + + +def parse_args(argv: List[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--manifest", type=Path, help="Path to bundle manifest JSON") + parser.add_argument("--root", type=Path, help="Root directory containing bundle files") + parser.add_argument("--out", type=Path, help="Output directory for staged bundles and report") + parser.add_argument("--prefix", default="", help="Optional prefix within output dir (e.g., mirror/)") + parser.add_argument("--report", type=Path, help="Override report path (defaults to /staging-report.json)") + parser.add_argument("--self-test", action="store_true", help="Run internal self-test and exit") + return parser.parse_args(argv) + + +def write_report(report: Dict, report_path: Path) -> None: + report_path.parent.mkdir(parents=True, exist_ok=True) + with report_path.open('w', encoding='utf-8') as handle: + json.dump(report, handle, ensure_ascii=False, indent=2) + handle.write("\n") + + +def self_test() -> int: + import tempfile + + with tempfile.TemporaryDirectory() as tmp: + tmpdir = Path(tmp) + sample = tmpdir / "sample.bin" + sample.write_bytes(b"offline-bundle") + digest = sha256_file(sample) + manifest = tmpdir / "manifest.json" + manifest.write_text(json.dumps([{ "file": "sample.bin", "sha256": digest }]), encoding='utf-8') + out = tmpdir / "out" + report = process(manifest, tmpdir, out, prefix="mirror/") + assert report["summary"]["success"] is True, report + staged = out / report["items"][0]["stagedPath"] + assert staged.exists(), f"staged file missing: {staged}" + print("self-test passed") + return 0 + + +def main(argv: List[str]) -> int: + args = parse_args(argv) + if args.self_test: + return self_test() + if not (args.manifest and args.root and args.out): + print("--manifest, --root, and --out are required unless --self-test", file=sys.stderr) + return 2 + report = process(args.manifest, args.root, args.out, args.prefix) + report_path = args.report or args.out / "staging-report.json" + write_report(report, report_path) + print(f"Staged bundles → {args.out} (report {report_path})") + return 0 if report["summary"]["success"] else 1 + + +if __name__ == "__main__": # pragma: no cover + sys.exit(main(sys.argv[1:])) diff --git a/ops/devops/airgap/compose-egress-guard.sh b/ops/devops/airgap/compose-egress-guard.sh new file mode 100644 index 000000000..28266c160 --- /dev/null +++ b/ops/devops/airgap/compose-egress-guard.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Enforce deny-all egress for a Docker/Compose project using DOCKER-USER chain. +# Usage: COMPOSE_PROJECT=stella ./compose-egress-guard.sh +# Optional env: ALLOW_RFC1918=true to allow east-west traffic inside 10/172/192 ranges. +set -euo pipefail + +PROJECT=${COMPOSE_PROJECT:-stella} +ALLOW_RFC1918=${ALLOW_RFC1918:-true} +NETWORK=${COMPOSE_NETWORK:-${PROJECT}_default} + +chain=STELLAOPS_SEALED_${PROJECT^^} +ipset_name=${PROJECT}_cidrs + +insert_accept() { + local dest=$1 + iptables -C DOCKER-USER -d "$dest" -j ACCEPT 2>/dev/null || iptables -I DOCKER-USER -d "$dest" -j ACCEPT +} + +# 1) Ensure DOCKER-USER exists +iptables -nL DOCKER-USER >/dev/null 2>&1 || iptables -N DOCKER-USER + +# 2) Create dedicated chain per project for clarity +iptables -nL "$chain" >/dev/null 2>&1 || iptables -N "$chain" + +# 2b) Populate ipset with compose network CIDRs (if available) +if command -v ipset >/dev/null; then + ipset list "$ipset_name" >/dev/null 2>&1 || ipset create "$ipset_name" hash:net -exist + cidrs=$(docker network inspect "$NETWORK" -f '{{range .IPAM.Config}}{{.Subnet}} {{end}}') + for cidr in $cidrs; do + ipset add "$ipset_name" "$cidr" 2>/dev/null || true + done +fi + +# 3) Allow loopback and optional RFC1918 intra-cluster ranges, then drop everything else +insert_accept 127.0.0.0/8 +if [[ "$ALLOW_RFC1918" == "true" ]]; then + insert_accept 10.0.0.0/8 + insert_accept 172.16.0.0/12 + insert_accept 192.168.0.0/16 +fi +iptables -C "$chain" -j DROP 2>/dev/null || iptables -A "$chain" -j DROP + +# 4) Hook chain into DOCKER-USER for containers in this project network +iptables -C DOCKER-USER -m addrtype --src-type LOCAL -j RETURN 2>/dev/null || true +if command -v ipset >/dev/null && ipset list "$ipset_name" >/dev/null 2>&1; then + iptables -C DOCKER-USER -m set --match-set "$ipset_name" dst -j "$chain" 2>/dev/null || iptables -I DOCKER-USER -m set --match-set "$ipset_name" dst -j "$chain" +else + # Fallback: match by destination subnet from docker inspect (first subnet only) + first_cidr=$(docker network inspect "$NETWORK" -f '{{(index .IPAM.Config 0).Subnet}}') + iptables -C DOCKER-USER -d "$first_cidr" -j "$chain" 2>/dev/null || iptables -I DOCKER-USER -d "$first_cidr" -j "$chain" +fi + +echo "Applied compose egress guard via DOCKER-USER -> $chain" >&2 +iptables -vnL "$chain" diff --git a/ops/devops/airgap/k8s-deny-egress.yaml b/ops/devops/airgap/k8s-deny-egress.yaml new file mode 100644 index 000000000..44f55cc83 --- /dev/null +++ b/ops/devops/airgap/k8s-deny-egress.yaml @@ -0,0 +1,42 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: sealed-deny-all-egress + namespace: default + labels: + stellaops.dev/owner: devops + stellaops.dev/purpose: sealed-mode +spec: + podSelector: + matchLabels: + sealed: "true" + policyTypes: + - Egress + egress: [] +--- +# Optional patch to allow in-cluster DNS while still blocking external egress. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: sealed-allow-dns + namespace: default + labels: + stellaops.dev/owner: devops + stellaops.dev/purpose: sealed-mode +spec: + podSelector: + matchLabels: + sealed: "true" + policyTypes: + - Egress + egress: + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: kube-system + podSelector: + matchLabels: + k8s-app: kube-dns + ports: + - protocol: UDP + port: 53 diff --git a/ops/devops/airgap/stage-bundle.sh b/ops/devops/airgap/stage-bundle.sh new file mode 100644 index 000000000..a1299aa03 --- /dev/null +++ b/ops/devops/airgap/stage-bundle.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Wrapper for bundle_stage_import.py with sane defaults. +# Usage: ./stage-bundle.sh manifest.json /path/to/files out/staging [prefix] +set -euo pipefail +if [[ $# -lt 3 ]]; then + echo "Usage: $0 [prefix]" >&2 + exit 2 +fi +manifest=$1 +root=$2 +out=$3 +prefix=${4:-} +SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd) +python3 "$SCRIPT_DIR/bundle_stage_import.py" --manifest "$manifest" --root "$root" --out "$out" --prefix "$prefix" diff --git a/ops/devops/airgap/verify-egress-block.sh b/ops/devops/airgap/verify-egress-block.sh new file mode 100644 index 000000000..6732c4ecc --- /dev/null +++ b/ops/devops/airgap/verify-egress-block.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Verification harness for sealed-mode egress: Docker/Compose or Kubernetes. +# Examples: +# ./verify-egress-block.sh docker stella_default out/airgap-probe.json +# ./verify-egress-block.sh k8s default out/k8s-probe.json +set -euo pipefail + +mode=${1:-} +context=${2:-} +out=${3:-} + +if [[ -z "$mode" || -z "$context" || -z "$out" ]]; then + echo "Usage: $0 [target ...]" >&2 + exit 2 +fi +shift 3 +TARGETS=($@) + +ROOT=$(cd "$(dirname "$0")/../.." && pwd) +PROBE_PY="$ROOT/ops/devops/sealed-mode-ci/egress_probe.py" + +case "$mode" in + docker) + network="$context" + python3 "$PROBE_PY" --network "$network" --output "$out" "${TARGETS[@]}" + ;; + k8s|kubernetes) + ns="$context" + targets=("${TARGETS[@]}") + if [[ ${#targets[@]} -eq 0 ]]; then + targets=("https://example.com" "https://www.cloudflare.com" "https://releases.stella-ops.org/healthz") + fi + image="curlimages/curl:8.6.0" + tmpfile=$(mktemp) + cat > "$tmpfile" < + set -euo pipefail; + rc=0; + for url in ${targets[@]}; do + echo "PROBE $url"; + if curl -fsS --max-time 8 "$url"; then + echo "UNEXPECTED_SUCCESS $url"; + rc=1; + else + echo "BLOCKED $url"; + fi; + done; + exit $rc; + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true +MANIFEST + kubectl apply -f "$tmpfile" >/dev/null + kubectl wait --for=condition=Ready pod/sealed-egress-probe -n "$ns" --timeout=30s >/dev/null 2>&1 || true + set +e + kubectl logs -n "$ns" sealed-egress-probe > "$out.log" 2>&1 + kubectl wait --for=condition=Succeeded pod/sealed-egress-probe -n "$ns" --timeout=60s + pod_rc=$? + kubectl get pod/sealed-egress-probe -n "$ns" -o json > "$out" + kubectl delete pod/sealed-egress-probe -n "$ns" >/dev/null 2>&1 || true + set -e + if [[ $pod_rc -ne 0 ]]; then + echo "Egress check failed; see $out and $out.log" >&2 + exit 1 + fi + ;; + *) + echo "Unknown mode: $mode" >&2 + exit 2 + ;; +esac + +echo "Egress verification complete → $out" diff --git a/src/ExportCenter/AGENTS.md b/src/ExportCenter/AGENTS.md new file mode 100644 index 000000000..1d46e818a --- /dev/null +++ b/src/ExportCenter/AGENTS.md @@ -0,0 +1,53 @@ +# Export Center · AGENTS Charter (Sprint 0164-0001-0001) + +## Module Scope & Working Directory +- Working directory: `src/ExportCenter/**` (API/WebService, Worker, Core/Infrastructure libs, Trivy/Mirror/DevPortal adapters, RiskBundles pipeline, tests, seed/config). Cross-module edits require an explicit note in the sprint Decisions & Risks. +- Mission: produce deterministic evidence exports (JSON, Trivy DB, mirror/delta, devportal offline) with provenance, signing, and distribution (HTTP, OCI, object) that remain offline-friendly and tenant-safe. + +## Roles +- **Backend engineer (.NET 10 / ASP.NET Core):** API surface, planner/run lifecycle, RBAC/tenant guards, SSE events, download endpoints. +- **Adapter engineer:** Trivy DB/Java DB, mirror delta, OCI distribution, encryption/KMS wrapping, pack-run integration. +- **Worker/Concurrency engineer:** job leasing, retries/idempotency, retention pruning, scheduler hooks. +- **Crypto/Provenance steward:** signing, DSSE/in-toto, age/AES-GCM envelope handling, provenance schemas. +- **QA automation:** WebApplicationFactory + Mongo/Mongo2Go fixtures, adapter regression harnesses, determinism checks, offline-kit verification scripts. +- **Docs steward:** keep `docs/modules/export-center/*.md`, sprint Decisions & Risks, and CLI docs aligned with behavior. + +## Required Reading (treat as read before setting DOING) +- `docs/README.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/modules/export-center/architecture.md` +- `docs/modules/export-center/profiles.md` +- `docs/modules/export-center/trivy-adapter.md` (for 36-001/36-002) +- `docs/modules/export-center/mirror-bundles.md` (for 37-001/37-002) +- `docs/modules/export-center/provenance-and-signing.md` +- `docs/modules/export-center/operations/kms-envelope-pattern.md` (for 37-002 encryption/KMS) +- Sprint file `docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md` + +## Working Agreements +- Enforce tenant scoping and RBAC on every API, worker fetch, and distribution path; no cross-tenant exports unless explicitly whitelisted and logged. +- Maintain determinism: sorted outputs, canonical JSON, UTC RFC3339 timestamps, stable hashing; identical selectors yield identical manifests. +- Offline-first: avoid new external calls; OCI distribution must be feature-flagged/disable-able for air-gap; tests must not reach the network. +- Aggregation-Only Contract for evidence: no derived modifications; policy outputs stay separate and clearly labeled. +- Concurrency: default per-tenant run caps (4 active) and idempotent retries; cooperative cancellation must clean partial artefacts and audit outcome. +- Cross-module changes (Authority/Orchestrator/CLI) only when sprint explicitly covers them; log in Decisions & Risks. + +## Coding & Observability Standards +- Target **.NET 10** with curated `local-nugets/`; MongoDB driver ≥ 3.x; ORAS/OCI client where applicable. +- Metrics under `StellaOps.ExportCenter.*`; tag `tenant`, `profile`, `adapter`, `result`; document new counters/histograms. +- Logs structured, no PII; include `runId`, `tenant`, `profile`, `adapter`, `correlationId`; map phases (`plan`, `resolve`, `adapter`, `manifest`, `sign`, `distribute`). +- SSE/telemetry events must be deterministic and replay-safe; backpressure aware. +- Signing/encryption: default cosign-style KMS signing; age/AES-GCM envelopes with key wrapping; store references in provenance only (no raw keys). + +## Testing Rules +- API/worker tests: `StellaOps.ExportCenter.Tests` with WebApplicationFactory + in-memory/Mongo2Go fixtures; assert tenant guards, RBAC, quotas, SSE timelines. +- Adapter regression: deterministic fixtures for Trivy DB/Java DB, mirror delta/base comparison, OCI manifest generation; no network. +- Risk bundle pipeline: tests in `StellaOps.ExportCenter.RiskBundles.Tests` (or add) covering bundle layout, DSSE signatures, checksum publication. +- Determinism checks: stable ordering/hashes in manifests, provenance, and distribution descriptors; retry paths must not duplicate outputs. +- Keep tests air-gap friendly; seeded data under `seed-data/` or inline fixtures. + +## Delivery Discipline +- Update sprint tracker statuses (`TODO → DOING → DONE/BLOCKED`) in `docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md` when starting/finishing/blocking work; mirror design decisions in Decisions & Risks and Execution Log. +- If a decision is needed (API contract, KMS envelope pattern), mark the task `BLOCKED`, describe the decision in sprint Decisions & Risks, and continue with other unblocked tasks. +- When contracts or schemas change (API, manifest, provenance, adapter outputs), update module docs and link them from the sprint. +- Retain deterministic retention/pruning behavior; document feature flags and defaults in `docs/modules/export-center/operations/*.md` when modified. diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/Postgres/PostgresPackRunLogRepository.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/Postgres/PostgresPackRunLogRepository.cs new file mode 100644 index 000000000..55e9bb95c --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/Postgres/PostgresPackRunLogRepository.cs @@ -0,0 +1,218 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using NpgsqlTypes; +using StellaOps.Orchestrator.Core.Domain; +using PackLogLevel = StellaOps.Orchestrator.Core.Domain.LogLevel; +using StellaOps.Orchestrator.Infrastructure.Repositories; + +namespace StellaOps.Orchestrator.Infrastructure.Postgres; + +/// +/// PostgreSQL implementation for pack run logs. +/// +public sealed class PostgresPackRunLogRepository : IPackRunLogRepository +{ + private const string Columns = "log_id, pack_run_id, tenant_id, sequence, log_level, source, message, data, created_at"; + + private const string InsertSql = """ + INSERT INTO pack_run_logs (log_id, tenant_id, pack_run_id, sequence, log_level, source, message, data, created_at) + VALUES (@log_id, @tenant_id, @pack_run_id, @sequence, @log_level, @source, @message, @data, @created_at) + """; + + private const string SelectLogsSql = $""" + SELECT {Columns} + FROM pack_run_logs + WHERE tenant_id = @tenant_id AND pack_run_id = @pack_run_id AND sequence > @after + ORDER BY sequence + LIMIT @limit + """; + + private const string SelectLogsByLevelSql = $""" + SELECT {Columns} + FROM pack_run_logs + WHERE tenant_id = @tenant_id AND pack_run_id = @pack_run_id AND sequence > @after AND log_level >= @min_level + ORDER BY sequence + LIMIT @limit + """; + + private const string SearchLogsSql = $""" + SELECT {Columns} + FROM pack_run_logs + WHERE tenant_id = @tenant_id AND pack_run_id = @pack_run_id AND sequence > @after AND message ILIKE @pattern + ORDER BY sequence + LIMIT @limit + """; + + private const string StatsSql = """ + SELECT COUNT(*)::BIGINT, COALESCE(MAX(sequence), -1) + FROM pack_run_logs + WHERE tenant_id = @tenant_id AND pack_run_id = @pack_run_id + """; + + private const string DeleteSql = """ + DELETE FROM pack_run_logs + WHERE tenant_id = @tenant_id AND pack_run_id = @pack_run_id + """; + + private readonly OrchestratorDataSource _dataSource; + private readonly ILogger _logger; + + public PostgresPackRunLogRepository(OrchestratorDataSource dataSource, ILogger logger) + { + _dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task AppendAsync(PackRunLog log, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(log.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(InsertSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + + AddParameters(command.Parameters, log); + + await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + public async Task AppendBatchAsync(IReadOnlyList logs, CancellationToken cancellationToken) + { + if (logs.Count == 0) + { + return; + } + + var tenantId = logs[0].TenantId; + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var batch = new NpgsqlBatch(connection); + foreach (var log in logs) + { + var cmd = new NpgsqlBatchCommand(InsertSql); + AddParameters(cmd.Parameters, log); + batch.BatchCommands.Add(cmd); + } + + await batch.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + public async Task GetLogsAsync(string tenantId, Guid packRunId, long afterSequence, int limit, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(SelectLogsSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + command.Parameters.AddWithValue("after", afterSequence); + command.Parameters.AddWithValue("limit", limit); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + return await ReadBatchAsync(reader, tenantId, packRunId, cancellationToken).ConfigureAwait(false); + } + + public async Task<(long Count, long LatestSequence)> GetLogStatsAsync(string tenantId, Guid packRunId, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(StatsSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + return (0, -1); + } + + var count = reader.GetInt64(0); + var latest = reader.GetInt64(1); + return (count, latest); + } + + public async Task GetLogsByLevelAsync(string tenantId, Guid packRunId, PackLogLevel minLevel, long afterSequence, int limit, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(SelectLogsByLevelSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + command.Parameters.AddWithValue("after", afterSequence); + command.Parameters.AddWithValue("limit", limit); + command.Parameters.AddWithValue("min_level", (int)minLevel); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + return await ReadBatchAsync(reader, tenantId, packRunId, cancellationToken).ConfigureAwait(false); + } + + public async Task SearchLogsAsync(string tenantId, Guid packRunId, string pattern, long afterSequence, int limit, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(SearchLogsSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + command.Parameters.AddWithValue("after", afterSequence); + command.Parameters.AddWithValue("limit", limit); + command.Parameters.AddWithValue("pattern", $"%{pattern}%"); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + return await ReadBatchAsync(reader, tenantId, packRunId, cancellationToken).ConfigureAwait(false); + } + + public async Task DeleteLogsAsync(string tenantId, Guid packRunId, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(DeleteSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows; + } + + private static void AddParameters(NpgsqlParameterCollection parameters, PackRunLog log) + { + parameters.AddWithValue("log_id", log.LogId); + parameters.AddWithValue("tenant_id", log.TenantId); + parameters.AddWithValue("pack_run_id", log.PackRunId); + parameters.AddWithValue("sequence", log.Sequence); + parameters.AddWithValue("log_level", (int)log.Level); + parameters.AddWithValue("source", (object?)log.Source ?? DBNull.Value); + parameters.AddWithValue("message", log.Message); + parameters.Add(new NpgsqlParameter("data", NpgsqlDbType.Jsonb) { Value = (object?)log.Data ?? DBNull.Value }); + parameters.AddWithValue("created_at", log.Timestamp); + } + + private static async Task ReadBatchAsync(NpgsqlDataReader reader, string tenantId, Guid packRunId, CancellationToken cancellationToken) + { + var logs = new List(); + long startSequence = -1; + + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + var log = new PackRunLog( + LogId: reader.GetGuid(0), + TenantId: reader.GetString(2), + PackRunId: reader.GetGuid(1), + Sequence: reader.GetInt64(3), + Level: (PackLogLevel)reader.GetInt32(4), + Source: reader.IsDBNull(5) ? "unknown" : reader.GetString(5), + Message: reader.GetString(6), + Timestamp: reader.GetFieldValue(8), + Data: reader.IsDBNull(7) ? null : reader.GetString(7)); + + if (startSequence < 0) + { + startSequence = log.Sequence; + } + + logs.Add(log); + } + + if (startSequence < 0) + { + startSequence = 0; + } + + return new PackRunLogBatch(packRunId, tenantId, startSequence, logs); + } +} diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/Postgres/PostgresPackRunRepository.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/Postgres/PostgresPackRunRepository.cs new file mode 100644 index 000000000..6d69b3d26 --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/Postgres/PostgresPackRunRepository.cs @@ -0,0 +1,525 @@ +using System.Globalization; +using Microsoft.Extensions.Logging; +using Npgsql; +using NpgsqlTypes; +using StellaOps.Orchestrator.Core.Domain; +using StellaOps.Orchestrator.Infrastructure.Repositories; + +namespace StellaOps.Orchestrator.Infrastructure.Postgres; + +/// +/// PostgreSQL implementation for pack run persistence. +/// +public sealed class PostgresPackRunRepository : IPackRunRepository +{ + private const string Columns = """ + pack_run_id, tenant_id, project_id, pack_id, pack_version, status, priority, attempt, max_attempts, + parameters, parameters_digest, idempotency_key, correlation_id, lease_id, task_runner_id, lease_until, + created_at, scheduled_at, leased_at, started_at, completed_at, not_before, reason, exit_code, duration_ms, + created_by, metadata + """; + + private const string SelectByIdSql = $"SELECT {Columns} FROM pack_runs WHERE tenant_id = @tenant_id AND pack_run_id = @pack_run_id"; + private const string SelectByIdempotencySql = $"SELECT {Columns} FROM pack_runs WHERE tenant_id = @tenant_id AND idempotency_key = @idempotency_key"; + + private const string InsertSql = """ + INSERT INTO pack_runs ( + pack_run_id, tenant_id, project_id, pack_id, pack_version, status, priority, attempt, max_attempts, + parameters, parameters_digest, idempotency_key, correlation_id, lease_id, task_runner_id, lease_until, + created_at, scheduled_at, leased_at, started_at, completed_at, not_before, reason, exit_code, duration_ms, + created_by, metadata) + VALUES ( + @pack_run_id, @tenant_id, @project_id, @pack_id, @pack_version, @status::pack_run_status, @priority, + @attempt, @max_attempts, @parameters, @parameters_digest, @idempotency_key, @correlation_id, @lease_id, + @task_runner_id, @lease_until, @created_at, @scheduled_at, @leased_at, @started_at, @completed_at, + @not_before, @reason, @exit_code, @duration_ms, @created_by, @metadata) + """; + + private const string UpdateStatusSql = """ + UPDATE pack_runs + SET status = @status::pack_run_status, + attempt = @attempt, + lease_id = @lease_id, + task_runner_id = @task_runner_id, + lease_until = @lease_until, + scheduled_at = @scheduled_at, + leased_at = @leased_at, + started_at = @started_at, + completed_at = @completed_at, + not_before = @not_before, + reason = @reason, + exit_code = @exit_code, + duration_ms = @duration_ms + WHERE tenant_id = @tenant_id AND pack_run_id = @pack_run_id + """; + + private const string LeaseNextSqlTemplate = """ + UPDATE pack_runs + SET status = 'leased'::pack_run_status, + lease_id = @lease_id, + task_runner_id = @task_runner_id, + lease_until = @lease_until, + leased_at = @leased_at + WHERE tenant_id = @tenant_id + AND pack_run_id = ( + SELECT pack_run_id + FROM pack_runs + WHERE tenant_id = @tenant_id + AND status = 'scheduled'::pack_run_status + AND (not_before IS NULL OR not_before <= @now) + {0} + ORDER BY priority DESC, created_at + LIMIT 1 + FOR UPDATE SKIP LOCKED) + RETURNING {1}; + """; + + private const string ExtendLeaseSql = """ + UPDATE pack_runs + SET lease_until = @new_lease_until + WHERE tenant_id = @tenant_id + AND pack_run_id = @pack_run_id + AND lease_id = @lease_id + AND status = 'leased'::pack_run_status + AND lease_until > @now + """; + + private const string ReleaseLeaseSql = """ + UPDATE pack_runs + SET status = @status::pack_run_status, + lease_id = NULL, + task_runner_id = NULL, + lease_until = NULL, + completed_at = CASE WHEN @completed_at IS NULL THEN completed_at ELSE @completed_at END, + reason = @reason + WHERE tenant_id = @tenant_id AND pack_run_id = @pack_run_id AND lease_id = @lease_id + """; + + private const string ListSqlTemplate = "SELECT " + Columns + @" + FROM pack_runs + WHERE tenant_id = @tenant_id + {0} + ORDER BY created_at DESC + LIMIT @limit OFFSET @offset"; + + private const string CountSqlTemplate = @"SELECT COUNT(*) + FROM pack_runs + WHERE tenant_id = @tenant_id + {0}"; + + private const string ExpiredLeaseSql = $""" + SELECT {Columns} + FROM pack_runs + WHERE status = 'leased'::pack_run_status + AND lease_until < @cutoff + ORDER BY lease_until + LIMIT @limit + """; + + private const string CancelPendingSql = """ + UPDATE pack_runs + SET status = 'canceled'::pack_run_status, + reason = @reason, + completed_at = NOW() + WHERE tenant_id = @tenant_id + AND status = 'pending'::pack_run_status + {0} + """; + + private readonly OrchestratorDataSource _dataSource; + private readonly ILogger _logger; + + public PostgresPackRunRepository(OrchestratorDataSource dataSource, ILogger logger) + { + _dataSource = dataSource ?? throw new ArgumentNullException(nameof(dataSource)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task GetByIdAsync(string tenantId, Guid packRunId, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(SelectByIdSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + return null; + } + + return Map(reader); + } + + public async Task GetByIdempotencyKeyAsync(string tenantId, string idempotencyKey, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(SelectByIdempotencySql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("idempotency_key", idempotencyKey); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + return null; + } + + return Map(reader); + } + + public async Task CreateAsync(PackRun packRun, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(packRun.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(InsertSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + + AddParameters(command, packRun); + + try + { + await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + OrchestratorMetrics.PackRunCreated(packRun.TenantId, packRun.PackId); + } + catch (PostgresException ex) when (string.Equals(ex.SqlState, PostgresErrorCodes.UniqueViolation, StringComparison.Ordinal)) + { + _logger.LogWarning(ex, "Duplicate pack run idempotency key {Key} for tenant {Tenant}", packRun.IdempotencyKey, packRun.TenantId); + throw; + } + } + + public async Task UpdateStatusAsync( + string tenantId, + Guid packRunId, + PackRunStatus status, + int attempt, + Guid? leaseId, + string? taskRunnerId, + DateTimeOffset? leaseUntil, + DateTimeOffset? scheduledAt, + DateTimeOffset? leasedAt, + DateTimeOffset? startedAt, + DateTimeOffset? completedAt, + DateTimeOffset? notBefore, + string? reason, + int? exitCode, + long? durationMs, + CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(UpdateStatusSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + command.Parameters.AddWithValue("status", StatusToString(status)); + command.Parameters.AddWithValue("attempt", attempt); + command.Parameters.AddWithValue("lease_id", (object?)leaseId ?? DBNull.Value); + command.Parameters.AddWithValue("task_runner_id", (object?)taskRunnerId ?? DBNull.Value); + command.Parameters.AddWithValue("lease_until", (object?)leaseUntil ?? DBNull.Value); + command.Parameters.AddWithValue("scheduled_at", (object?)scheduledAt ?? DBNull.Value); + command.Parameters.AddWithValue("leased_at", (object?)leasedAt ?? DBNull.Value); + command.Parameters.AddWithValue("started_at", (object?)startedAt ?? DBNull.Value); + command.Parameters.AddWithValue("completed_at", (object?)completedAt ?? DBNull.Value); + command.Parameters.AddWithValue("not_before", (object?)notBefore ?? DBNull.Value); + command.Parameters.AddWithValue("reason", (object?)reason ?? DBNull.Value); + command.Parameters.AddWithValue("exit_code", (object?)exitCode ?? DBNull.Value); + command.Parameters.AddWithValue("duration_ms", (object?)durationMs ?? DBNull.Value); + + await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + public async Task LeaseNextAsync( + string tenantId, + string? packId, + Guid leaseId, + string taskRunnerId, + DateTimeOffset leaseUntil, + CancellationToken cancellationToken) + { + var packFilter = string.IsNullOrWhiteSpace(packId) ? string.Empty : "AND pack_id = @pack_id"; + var sql = string.Format(LeaseNextSqlTemplate, packFilter, Columns); + + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(sql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + var now = DateTimeOffset.UtcNow; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("lease_id", leaseId); + command.Parameters.AddWithValue("task_runner_id", taskRunnerId); + command.Parameters.AddWithValue("lease_until", leaseUntil); + command.Parameters.AddWithValue("leased_at", now); + command.Parameters.AddWithValue("now", now); + if (!string.IsNullOrWhiteSpace(packId)) + { + command.Parameters.AddWithValue("pack_id", packId!); + } + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + if (!await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + return null; + } + + return Map(reader); + } + + public async Task ExtendLeaseAsync(string tenantId, Guid packRunId, Guid leaseId, DateTimeOffset newLeaseUntil, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(ExtendLeaseSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + command.Parameters.AddWithValue("lease_id", leaseId); + command.Parameters.AddWithValue("new_lease_until", newLeaseUntil); + command.Parameters.AddWithValue("now", DateTimeOffset.UtcNow); + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task ReleaseLeaseAsync(string tenantId, Guid packRunId, Guid leaseId, PackRunStatus newStatus, string? reason, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(ReleaseLeaseSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("pack_run_id", packRunId); + command.Parameters.AddWithValue("lease_id", leaseId); + command.Parameters.AddWithValue("status", StatusToString(newStatus)); + command.Parameters.AddWithValue("reason", (object?)reason ?? DBNull.Value); + command.Parameters.AddWithValue("completed_at", DateTimeOffset.UtcNow); + + await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + public async Task> ListAsync( + string tenantId, + string? packId, + PackRunStatus? status, + string? projectId, + DateTimeOffset? createdAfter, + DateTimeOffset? createdBefore, + int limit, + int offset, + CancellationToken cancellationToken) + { + var filters = BuildFilters(packId, status, projectId, createdAfter, createdBefore, out var parameters); + var sql = string.Format(ListSqlTemplate, filters); + + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(sql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("limit", limit); + command.Parameters.AddWithValue("offset", offset); + foreach (var param in parameters) + { + command.Parameters.Add(param); + } + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + var results = new List(); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(Map(reader)); + } + + return results; + } + + public async Task CountAsync(string tenantId, string? packId, PackRunStatus? status, string? projectId, CancellationToken cancellationToken) + { + var filters = BuildFilters(packId, status, projectId, null, null, out var parameters); + var sql = string.Format(CountSqlTemplate, filters); + + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(sql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + foreach (var param in parameters) + { + command.Parameters.Add(param); + } + + var countObj = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false); + return Convert.ToInt32(countObj, CultureInfo.InvariantCulture); + } + + public async Task> GetExpiredLeasesAsync(DateTimeOffset cutoff, int limit, CancellationToken cancellationToken) + { + await using var connection = await _dataSource.OpenConnectionAsync("", "reader", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(ExpiredLeaseSql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("cutoff", cutoff); + command.Parameters.AddWithValue("limit", limit); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + var results = new List(); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(Map(reader)); + } + + return results; + } + + public async Task CancelPendingAsync(string tenantId, string? packId, string reason, CancellationToken cancellationToken) + { + var filter = string.IsNullOrWhiteSpace(packId) ? string.Empty : "AND pack_id = @pack_id"; + var sql = string.Format(CancelPendingSql, filter); + + await using var connection = await _dataSource.OpenConnectionAsync(tenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = new NpgsqlCommand(sql, connection); + command.CommandTimeout = _dataSource.CommandTimeoutSeconds; + command.Parameters.AddWithValue("tenant_id", tenantId); + command.Parameters.AddWithValue("reason", reason); + if (!string.IsNullOrWhiteSpace(packId)) + { + command.Parameters.AddWithValue("pack_id", packId!); + } + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows; + } + + private static string BuildFilters( + string? packId, + PackRunStatus? status, + string? projectId, + DateTimeOffset? createdAfter, + DateTimeOffset? createdBefore, + out List parameters) + { + var filters = new List(); + parameters = new List(); + + if (!string.IsNullOrWhiteSpace(packId)) + { + filters.Add("pack_id = @pack_id"); + parameters.Add(new NpgsqlParameter("pack_id", packId!)); + } + + if (status.HasValue) + { + filters.Add("status = @status::pack_run_status"); + parameters.Add(new NpgsqlParameter("status", StatusToString(status.Value))); + } + + if (!string.IsNullOrWhiteSpace(projectId)) + { + filters.Add("project_id = @project_id"); + parameters.Add(new NpgsqlParameter("project_id", projectId!)); + } + + if (createdAfter.HasValue) + { + filters.Add("created_at >= @created_after"); + parameters.Add(new NpgsqlParameter("created_after", createdAfter.Value)); + } + + if (createdBefore.HasValue) + { + filters.Add("created_at <= @created_before"); + parameters.Add(new NpgsqlParameter("created_before", createdBefore.Value)); + } + + return filters.Count == 0 ? string.Empty : " AND " + string.Join(" AND ", filters); + } + + private static void AddParameters(NpgsqlCommand command, PackRun packRun) + { + command.Parameters.AddWithValue("pack_run_id", packRun.PackRunId); + command.Parameters.AddWithValue("tenant_id", packRun.TenantId); + command.Parameters.AddWithValue("project_id", (object?)packRun.ProjectId ?? DBNull.Value); + command.Parameters.AddWithValue("pack_id", packRun.PackId); + command.Parameters.AddWithValue("pack_version", packRun.PackVersion); + command.Parameters.AddWithValue("status", StatusToString(packRun.Status)); + command.Parameters.AddWithValue("priority", packRun.Priority); + command.Parameters.AddWithValue("attempt", packRun.Attempt); + command.Parameters.AddWithValue("max_attempts", packRun.MaxAttempts); + command.Parameters.AddWithValue("parameters", packRun.Parameters); + command.Parameters.AddWithValue("parameters_digest", packRun.ParametersDigest); + command.Parameters.AddWithValue("idempotency_key", packRun.IdempotencyKey); + command.Parameters.AddWithValue("correlation_id", (object?)packRun.CorrelationId ?? DBNull.Value); + command.Parameters.AddWithValue("lease_id", (object?)packRun.LeaseId ?? DBNull.Value); + command.Parameters.AddWithValue("task_runner_id", (object?)packRun.TaskRunnerId ?? DBNull.Value); + command.Parameters.AddWithValue("lease_until", (object?)packRun.LeaseUntil ?? DBNull.Value); + command.Parameters.AddWithValue("created_at", packRun.CreatedAt); + command.Parameters.AddWithValue("scheduled_at", (object?)packRun.ScheduledAt ?? DBNull.Value); + command.Parameters.AddWithValue("leased_at", (object?)packRun.LeasedAt ?? DBNull.Value); + command.Parameters.AddWithValue("started_at", (object?)packRun.StartedAt ?? DBNull.Value); + command.Parameters.AddWithValue("completed_at", (object?)packRun.CompletedAt ?? DBNull.Value); + command.Parameters.AddWithValue("not_before", (object?)packRun.NotBefore ?? DBNull.Value); + command.Parameters.AddWithValue("reason", (object?)packRun.Reason ?? DBNull.Value); + command.Parameters.AddWithValue("exit_code", (object?)packRun.ExitCode ?? DBNull.Value); + command.Parameters.AddWithValue("duration_ms", (object?)packRun.DurationMs ?? DBNull.Value); + command.Parameters.AddWithValue("created_by", packRun.CreatedBy); + command.Parameters.Add(new NpgsqlParameter("metadata", NpgsqlDbType.Jsonb) + { + Value = (object?)packRun.Metadata ?? DBNull.Value + }); + } + + private static string StatusToString(PackRunStatus status) => status switch + { + PackRunStatus.Pending => "pending", + PackRunStatus.Scheduled => "scheduled", + PackRunStatus.Leased => "leased", + PackRunStatus.Running => "running", + PackRunStatus.Succeeded => "succeeded", + PackRunStatus.Failed => "failed", + PackRunStatus.Canceled => "canceled", + PackRunStatus.TimedOut => "timed_out", + _ => throw new ArgumentOutOfRangeException(nameof(status), status, null) + }; + + private static PackRun Map(NpgsqlDataReader reader) + { + return new PackRun( + PackRunId: reader.GetGuid(0), + TenantId: reader.GetString(1), + ProjectId: reader.IsDBNull(2) ? null : reader.GetString(2), + PackId: reader.GetString(3), + PackVersion: reader.GetString(4), + Status: ParseStatus(reader.GetString(5)), + Priority: reader.GetInt32(6), + Attempt: reader.GetInt32(7), + MaxAttempts: reader.GetInt32(8), + Parameters: reader.GetString(9), + ParametersDigest: reader.GetString(10), + IdempotencyKey: reader.GetString(11), + CorrelationId: reader.IsDBNull(12) ? null : reader.GetString(12), + LeaseId: reader.IsDBNull(13) ? null : reader.GetGuid(13), + TaskRunnerId: reader.IsDBNull(14) ? null : reader.GetString(14), + LeaseUntil: reader.IsDBNull(15) ? null : reader.GetFieldValue(15), + CreatedAt: reader.GetFieldValue(16), + ScheduledAt: reader.IsDBNull(17) ? null : reader.GetFieldValue(17), + LeasedAt: reader.IsDBNull(18) ? null : reader.GetFieldValue(18), + StartedAt: reader.IsDBNull(19) ? null : reader.GetFieldValue(19), + CompletedAt: reader.IsDBNull(20) ? null : reader.GetFieldValue(20), + NotBefore: reader.IsDBNull(21) ? null : reader.GetFieldValue(21), + Reason: reader.IsDBNull(22) ? null : reader.GetString(22), + ExitCode: reader.IsDBNull(23) ? null : reader.GetInt32(23), + DurationMs: reader.IsDBNull(24) ? null : reader.GetInt64(24), + CreatedBy: reader.GetString(25), + Metadata: reader.IsDBNull(26) ? null : reader.GetString(26)); + } + + private static PackRunStatus ParseStatus(string value) => value switch + { + "pending" => PackRunStatus.Pending, + "scheduled" => PackRunStatus.Scheduled, + "leased" => PackRunStatus.Leased, + "running" => PackRunStatus.Running, + "succeeded" => PackRunStatus.Succeeded, + "failed" => PackRunStatus.Failed, + "canceled" => PackRunStatus.Canceled, + "timed_out" => PackRunStatus.TimedOut, + _ => throw new ArgumentOutOfRangeException(nameof(value), value, "Unknown pack_run_status") + }; +} diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/ServiceCollectionExtensions.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/ServiceCollectionExtensions.cs index 33e03fab1..3b6a833e4 100644 --- a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/ServiceCollectionExtensions.cs +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/ServiceCollectionExtensions.cs @@ -39,6 +39,8 @@ public static class ServiceCollectionExtensions services.AddScoped(); services.AddScoped(); services.AddScoped(); + services.AddScoped(); + services.AddScoped(); // Register audit and ledger repositories services.AddScoped(); diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/migrations/006_pack_runs.sql b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/migrations/006_pack_runs.sql new file mode 100644 index 000000000..27c63823b --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/migrations/006_pack_runs.sql @@ -0,0 +1,81 @@ +-- 006_pack_runs.sql +-- Pack run persistence and log streaming schema (ORCH-SVC-41/42-101) + +BEGIN; + +-- Enum for pack run lifecycle +CREATE TYPE pack_run_status AS ENUM ( + 'pending', + 'scheduled', + 'leased', + 'running', + 'succeeded', + 'failed', + 'canceled', + 'timed_out' +); + +-- Pack runs +CREATE TABLE pack_runs ( + pack_run_id UUID NOT NULL, + tenant_id TEXT NOT NULL, + project_id TEXT, + pack_id TEXT NOT NULL, + pack_version TEXT NOT NULL, + status pack_run_status NOT NULL DEFAULT 'pending', + priority INTEGER NOT NULL DEFAULT 0, + attempt INTEGER NOT NULL DEFAULT 1, + max_attempts INTEGER NOT NULL DEFAULT 3, + parameters TEXT NOT NULL, + parameters_digest CHAR(64) NOT NULL, + idempotency_key TEXT NOT NULL, + correlation_id TEXT, + lease_id UUID, + task_runner_id TEXT, + lease_until TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + scheduled_at TIMESTAMPTZ, + leased_at TIMESTAMPTZ, + started_at TIMESTAMPTZ, + completed_at TIMESTAMPTZ, + not_before TIMESTAMPTZ, + reason TEXT, + exit_code INTEGER, + duration_ms BIGINT, + created_by TEXT NOT NULL, + metadata JSONB, + CONSTRAINT pk_pack_runs PRIMARY KEY (tenant_id, pack_run_id), + CONSTRAINT uq_pack_runs_idempotency UNIQUE (tenant_id, idempotency_key), + CONSTRAINT ck_pack_runs_attempt_positive CHECK (attempt >= 1), + CONSTRAINT ck_pack_runs_max_attempts_positive CHECK (max_attempts >= 1), + CONSTRAINT ck_pack_runs_parameters_digest_hex CHECK (parameters_digest ~ '^[0-9a-f]{64}$') +) PARTITION BY LIST (tenant_id); + +CREATE TABLE pack_runs_default PARTITION OF pack_runs DEFAULT; + +CREATE INDEX ix_pack_runs_status ON pack_runs (tenant_id, status, priority DESC, created_at); +CREATE INDEX ix_pack_runs_pack ON pack_runs (tenant_id, pack_id, status, created_at DESC); +CREATE INDEX ix_pack_runs_not_before ON pack_runs (tenant_id, not_before) WHERE not_before IS NOT NULL; +CREATE INDEX ix_pack_runs_lease_until ON pack_runs (tenant_id, lease_until) WHERE status = 'leased' AND lease_until IS NOT NULL; + +-- Pack run logs +CREATE TABLE pack_run_logs ( + log_id UUID NOT NULL, + tenant_id TEXT NOT NULL, + pack_run_id UUID NOT NULL, + sequence BIGINT NOT NULL, + log_level SMALLINT NOT NULL, + source TEXT, + message TEXT NOT NULL, + data JSONB, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT pk_pack_run_logs PRIMARY KEY (tenant_id, pack_run_id, sequence), + CONSTRAINT uq_pack_run_logs_log_id UNIQUE (log_id), + CONSTRAINT fk_pack_run_logs_run FOREIGN KEY (tenant_id, pack_run_id) REFERENCES pack_runs (tenant_id, pack_run_id) +) PARTITION BY LIST (tenant_id); + +CREATE TABLE pack_run_logs_default PARTITION OF pack_run_logs DEFAULT; + +CREATE INDEX ix_pack_run_logs_level ON pack_run_logs (tenant_id, pack_run_id, log_level, sequence); +CREATE INDEX ix_pack_run_logs_created ON pack_run_logs (tenant_id, pack_run_id, created_at); +COMMIT; diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/ControlPlane/OpenApiDocumentsTests.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/ControlPlane/OpenApiDocumentsTests.cs new file mode 100644 index 000000000..a41cf57d7 --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/ControlPlane/OpenApiDocumentsTests.cs @@ -0,0 +1,76 @@ +using System.Text.Json; +using Microsoft.AspNetCore.Http; +using StellaOps.Orchestrator.WebService.Contracts; +using StellaOps.Orchestrator.WebService.Services; + +namespace StellaOps.Orchestrator.Tests.ControlPlane; + +/// +/// Unit coverage for OpenAPI discovery documents and deprecation headers (ORCH-OAS-61/63). +/// +public sealed class OpenApiDocumentsTests +{ + [Fact] + public void DiscoveryDocument_ContainsServiceMetadata() + { + var doc = OpenApiDocuments.CreateDiscoveryDocument("1.2.3"); + + Assert.Equal("orchestrator", doc.Service); + Assert.Equal("3.1.0", doc.SpecVersion); + Assert.Equal("1.2.3", doc.Version); + Assert.Equal("/openapi/orchestrator.json", doc.Url); + Assert.Equal("application/json", doc.Format); + Assert.Equal("#/components/schemas/Error", doc.ErrorEnvelopeSchema); + Assert.True(doc.Notifications.ContainsKey("topic")); + } + + [Fact] + public void Specification_IncludesKeyPathsAndIdempotencyHeaders() + { + var spec = OpenApiDocuments.CreateSpecification("1.2.3"); + var json = JsonSerializer.Serialize(spec, OpenApiDocuments.SerializerOptions); + + Assert.Contains("/api/v1/orchestrator/jobs", json); + Assert.DoesNotContain("/.well-known/openapi", json); // spec is per-service + Assert.Contains("Idempotency-Key", json); + Assert.Contains("deprecated", json); + Assert.Contains("error", json); + } + + [Fact] + public void Specification_ExposesPaginationForJobs() + { + var spec = OpenApiDocuments.CreateSpecification("1.2.3"); + var json = JsonSerializer.Serialize(spec, OpenApiDocuments.SerializerOptions); + + Assert.Contains("/api/v1/orchestrator/jobs", json); + Assert.Contains("nextCursor", json); + Assert.Contains("cursor=", json); // RFC 8288 Link header example for SDK paginators + } + + [Fact] + public void Specification_IncludesPackRunScheduleAndRetry() + { + var spec = OpenApiDocuments.CreateSpecification("1.2.3"); + var json = JsonSerializer.Serialize(spec, OpenApiDocuments.SerializerOptions); + + Assert.Contains("/api/v1/orchestrator/pack-runs", json); + Assert.Contains("SchedulePackRunRequest", json); + Assert.Contains("/api/v1/orchestrator/pack-runs/{packRunId}/retry", json); + Assert.Contains("RetryPackRunResponse", json); + } + + [Fact] + public void DeprecationHeaders_AddsStandardMetadata() + { + var context = new DefaultHttpContext(); + + DeprecationHeaders.Apply(context.Response, "/api/v1/orchestrator/jobs"); + + var headers = context.Response.Headers; + Assert.Equal("true", headers["Deprecation"].ToString()); + Assert.Contains("alternate", headers["Link"].ToString()); + Assert.False(string.IsNullOrWhiteSpace(headers["Sunset"])); + Assert.Equal("orchestrator:legacy-endpoint", headers["X-StellaOps-Deprecated"].ToString()); + } +} diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/Export/ExportScheduleTests.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/Export/ExportScheduleTests.cs index 147cb4b15..f47eabdbb 100644 --- a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/Export/ExportScheduleTests.cs +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/Export/ExportScheduleTests.cs @@ -571,7 +571,9 @@ public sealed class ExportAlertTests var after = DateTimeOffset.UtcNow; Assert.NotNull(resolved.ResolvedAt); - Assert.InRange(resolved.ResolvedAt.Value, before, after); + var windowStart = before <= after ? before : after; + var windowEnd = before >= after ? before : after; + Assert.InRange(resolved.ResolvedAt.Value, windowStart, windowEnd); Assert.Equal("Fixed database connection issue", resolved.ResolutionNotes); Assert.False(resolved.IsActive); } diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/PackRun/PackRunStreamCoordinatorTests.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/PackRun/PackRunStreamCoordinatorTests.cs new file mode 100644 index 000000000..6185ca784 --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Tests/PackRun/PackRunStreamCoordinatorTests.cs @@ -0,0 +1,120 @@ +using System.Text; +using Microsoft.AspNetCore.Http; +using Microsoft.Extensions.Logging.Abstractions; +using Microsoft.Extensions.Options; +using StellaOps.Orchestrator.Core.Domain; +using StellaOps.Orchestrator.Infrastructure.Repositories; +using StellaOps.Orchestrator.WebService.Streaming; +using PackRunDomain = StellaOps.Orchestrator.Core.Domain.PackRun; + +namespace StellaOps.Orchestrator.Tests.PackRuns; + +public sealed class PackRunStreamCoordinatorTests +{ + [Fact] + public async Task StreamAsync_TerminalRun_WritesInitialHeartbeatAndCompleted() + { + var now = DateTimeOffset.UtcNow; + var packRun = new PackRunDomain( + PackRunId: Guid.NewGuid(), + TenantId: "tenantA", + ProjectId: null, + PackId: "pack.demo", + PackVersion: "1.0.0", + Status: PackRunStatus.Succeeded, + Priority: 0, + Attempt: 1, + MaxAttempts: 3, + Parameters: "{}", + ParametersDigest: new string('a', 64), + IdempotencyKey: "idem-1", + CorrelationId: null, + LeaseId: null, + TaskRunnerId: "runner-1", + LeaseUntil: null, + CreatedAt: now.AddMinutes(-2), + ScheduledAt: now.AddMinutes(-2), + LeasedAt: now.AddMinutes(-1), + StartedAt: now.AddMinutes(-1), + CompletedAt: now, + NotBefore: null, + Reason: null, + ExitCode: 0, + DurationMs: 120_000, + CreatedBy: "tester", + Metadata: null); + + var logRepo = new StubPackRunLogRepository((2, 5)); + var streamOptions = Options.Create(new StreamOptions + { + PollInterval = TimeSpan.FromMilliseconds(150), + HeartbeatInterval = TimeSpan.FromMilliseconds(150), + MaxStreamDuration = TimeSpan.FromMinutes(1) + }); + var coordinator = new PackRunStreamCoordinator( + new StubPackRunRepository(packRun), + logRepo, + streamOptions, + TimeProvider.System, + NullLogger.Instance); + + var context = new DefaultHttpContext(); + await using var body = new MemoryStream(); + context.Response.Body = body; + + await coordinator.StreamAsync(context, packRun.TenantId, packRun, CancellationToken.None); + + body.Position = 0; + var payload = Encoding.UTF8.GetString(body.ToArray()); + + Assert.Contains("event: initial", payload); + Assert.Contains("event: heartbeat", payload); + Assert.Contains("event: completed", payload); + } + + private sealed class StubPackRunRepository : IPackRunRepository + { + private readonly PackRunDomain _packRun; + + public StubPackRunRepository(PackRunDomain packRun) + { + _packRun = packRun; + } + + public Task GetByIdAsync(string tenantId, Guid packRunId, CancellationToken cancellationToken) + => Task.FromResult(_packRun); + + public Task GetByIdempotencyKeyAsync(string tenantId, string idempotencyKey, CancellationToken cancellationToken) => Task.FromResult(_packRun); + public Task CreateAsync(PackRunDomain packRun, CancellationToken cancellationToken) => Task.CompletedTask; + public Task UpdateStatusAsync(string tenantId, Guid packRunId, PackRunStatus status, int attempt, Guid? leaseId, string? taskRunnerId, DateTimeOffset? leaseUntil, DateTimeOffset? scheduledAt, DateTimeOffset? leasedAt, DateTimeOffset? startedAt, DateTimeOffset? completedAt, DateTimeOffset? notBefore, string? reason, int? exitCode, long? durationMs, CancellationToken cancellationToken) => Task.CompletedTask; + public Task LeaseNextAsync(string tenantId, string? packId, Guid leaseId, string taskRunnerId, DateTimeOffset leaseUntil, CancellationToken cancellationToken) => Task.FromResult(_packRun); + public Task ExtendLeaseAsync(string tenantId, Guid packRunId, Guid leaseId, DateTimeOffset newLeaseUntil, CancellationToken cancellationToken) => Task.FromResult(true); + public Task ReleaseLeaseAsync(string tenantId, Guid packRunId, Guid leaseId, PackRunStatus newStatus, string? reason, CancellationToken cancellationToken) => Task.CompletedTask; + public Task> ListAsync(string tenantId, string? packId, PackRunStatus? status, string? projectId, DateTimeOffset? createdAfter, DateTimeOffset? createdBefore, int limit, int offset, CancellationToken cancellationToken) => Task.FromResult>(new[] { _packRun }); + public Task CountAsync(string tenantId, string? packId, PackRunStatus? status, string? projectId, CancellationToken cancellationToken) => Task.FromResult(1); + public Task> GetExpiredLeasesAsync(DateTimeOffset cutoff, int limit, CancellationToken cancellationToken) => Task.FromResult>(Array.Empty()); + public Task CancelPendingAsync(string tenantId, string? packId, string reason, CancellationToken cancellationToken) => Task.FromResult(0); + } + + private sealed class StubPackRunLogRepository : IPackRunLogRepository + { + private readonly (long Count, long Latest) _stats; + + public StubPackRunLogRepository((long Count, long Latest) stats) + { + _stats = stats; + } + + public Task AppendAsync(PackRunLog log, CancellationToken cancellationToken) => Task.CompletedTask; + public Task AppendBatchAsync(IReadOnlyList logs, CancellationToken cancellationToken) => Task.CompletedTask; + public Task GetLogsAsync(string tenantId, Guid packRunId, long afterSequence, int limit, CancellationToken cancellationToken) + => Task.FromResult(new PackRunLogBatch(packRunId, tenantId, afterSequence, new List())); + public Task<(long Count, long LatestSequence)> GetLogStatsAsync(string tenantId, Guid packRunId, CancellationToken cancellationToken) + => Task.FromResult(_stats); + public Task GetLogsByLevelAsync(string tenantId, Guid packRunId, LogLevel minLevel, long afterSequence, int limit, CancellationToken cancellationToken) + => Task.FromResult(new PackRunLogBatch(packRunId, tenantId, afterSequence, new List())); + public Task SearchLogsAsync(string tenantId, Guid packRunId, string pattern, long afterSequence, int limit, CancellationToken cancellationToken) + => Task.FromResult(new PackRunLogBatch(packRunId, tenantId, afterSequence, new List())); + public Task DeleteLogsAsync(string tenantId, Guid packRunId, CancellationToken cancellationToken) => Task.FromResult(0L); + } +} diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/JobContracts.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/JobContracts.cs index 4db7e8a6c..4796c0fee 100644 --- a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/JobContracts.cs +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/JobContracts.cs @@ -6,6 +6,8 @@ namespace StellaOps.Orchestrator.WebService.Contracts; /// Response representing a job. /// public sealed record JobResponse( + string TenantId, + string? ProjectId, Guid JobId, Guid? RunId, string JobType, @@ -26,6 +28,8 @@ public sealed record JobResponse( string CreatedBy) { public static JobResponse FromDomain(Job job) => new( + job.TenantId, + job.ProjectId, job.JobId, job.RunId, job.JobType, @@ -50,6 +54,8 @@ public sealed record JobResponse( /// Response representing a job with its full payload. /// public sealed record JobDetailResponse( + string TenantId, + string? ProjectId, Guid JobId, Guid? RunId, string JobType, @@ -75,6 +81,8 @@ public sealed record JobDetailResponse( string CreatedBy) { public static JobDetailResponse FromDomain(Job job) => new( + job.TenantId, + job.ProjectId, job.JobId, job.RunId, job.JobType, diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/OpenApiDocuments.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/OpenApiDocuments.cs new file mode 100644 index 000000000..6eb116ec7 --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/OpenApiDocuments.cs @@ -0,0 +1,760 @@ +using System.Reflection; +using System.Text.Json; +using System.Text.Json.Serialization; + +namespace StellaOps.Orchestrator.WebService.Contracts; + +/// +/// Factory for per-service OpenAPI discovery and specification documents. +/// +public static class OpenApiDocuments +{ + public static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web) + { + WriteIndented = true + }; + + /// + /// Return the service build/version string based on the executing assembly. + /// + public static string GetServiceVersion() + => Assembly.GetExecutingAssembly().GetName().Version?.ToString() ?? "0.0.0"; + + public static OpenApiDiscoveryDocument CreateDiscoveryDocument(string version) + { + return new OpenApiDiscoveryDocument( + Service: "orchestrator", + SpecVersion: "3.1.0", + Version: version, + Format: "application/json", + Url: "/openapi/orchestrator.json", + ErrorEnvelopeSchema: "#/components/schemas/Error", + Notifications: new Dictionary + { + ["topic"] = "orchestrator.contracts", + ["event"] = "orchestrator.openapi.updated" + }); + } + + public static OpenApiSpecDocument CreateSpecification(string version) + { + var exampleJob = ExampleJob(); + var exampleJobDetail = ExampleJobDetail(); + var exampleClaimRequest = new + { + workerId = "worker-7f9", + jobType = "sbom.build", + idempotencyKey = "claim-12af", + leaseSeconds = 300, + taskRunnerId = "runner-01" + }; + + var exampleClaimResponse = new + { + jobId = Guid.Parse("11111111-2222-3333-4444-555555555555"), + leaseId = Guid.Parse("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"), + leaseUntil = "2025-11-30T12:05:00Z", + job = exampleJobDetail + }; + + var examplePackRunRequest = new + { + packId = "pack.advisory.sbom", + packVersion = "1.2.3", + parameters = @"{""image"":""registry.example/app:1.0.0""}", + projectId = "proj-17", + idempotencyKey = "packrun-123", + priority = 5, + maxAttempts = 3 + }; + + var examplePackRunResponse = new + { + packRunId = Guid.Parse("99999999-0000-1111-2222-333333333333"), + packId = "pack.advisory.sbom", + packVersion = "1.2.3", + status = "scheduled", + idempotencyKey = "packrun-123", + createdAt = "2025-11-30T12:00:00Z", + wasAlreadyScheduled = false + }; + + var exampleRetryRequest = new + { + parameters = @"{""image"":""registry.example/app:1.0.1""}", + idempotencyKey = "retry-123" + }; + + var exampleRetryResponse = new + { + originalPackRunId = Guid.Parse("99999999-0000-1111-2222-333333333333"), + newPackRunId = Guid.Parse("aaaaaaaa-0000-1111-2222-bbbbbbbbbbbb"), + status = "scheduled", + createdAt = "2025-11-30T12:10:00Z" + }; + + var paths = new Dictionary + { + ["/api/v1/orchestrator/jobs"] = new + { + get = new + { + summary = "List jobs", + description = "Paginated job listing with deterministic cursor ordering and idempotent retries.", + parameters = new object[] + { + QueryParameter("status", "query", "Job status filter (pending|scheduled|leased|succeeded|failed)", "string", "scheduled"), + QueryParameter("jobType", "query", "Filter by job type", "string", "sbom.build"), + QueryParameter("projectId", "query", "Filter by project identifier", "string", "proj-17"), + QueryParameter("createdAfter", "query", "RFC3339 timestamp for start of window", "string", "2025-11-01T00:00:00Z"), + QueryParameter("createdBefore", "query", "RFC3339 timestamp for end of window", "string", "2025-11-30T00:00:00Z"), + QueryParameter("limit", "query", "Results per page (max 200)", "integer", 50), + QueryParameter("cursor", "query", "Opaque pagination cursor", "string", "c3RhcnQ6NTA=") + }, + responses = new Dictionary + { + ["200"] = new + { + description = "Jobs page", + headers = new Dictionary + { + ["Link"] = new + { + description = "RFC 8288 pagination cursor links", + schema = new { type = "string" }, + example = "; rel=\"next\"" + }, + ["X-StellaOps-Api-Version"] = new + { + description = "Service build version", + schema = new { type = "string" }, + example = version + } + }, + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/JobList" }, + examples = new Dictionary + { + ["default"] = new + { + value = new + { + jobs = new[] { exampleJob }, + nextCursor = "c3RhcnQ6NTA=" + } + } + } + } + } + }, + ["400"] = ErrorResponse("Invalid filter") + } + } + }, + ["/api/v1/orchestrator/jobs/{jobId}"] = new + { + get = new + { + summary = "Get job", + description = "Fetch job metadata by identifier.", + parameters = new object[] + { + RouteParameter("jobId", "Job identifier", "string") + }, + responses = new Dictionary + { + ["200"] = new + { + description = "Job metadata", + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/Job" }, + examples = new Dictionary + { + ["default"] = new { value = exampleJob } + } + } + } + }, + ["404"] = ErrorResponse("Not found") + } + } + }, + ["/api/v1/orchestrator/jobs/{jobId}/detail"] = new + { + get = new + { + summary = "Legacy job detail (deprecated)", + description = "Legacy payload-inclusive job detail; prefer GET /api/v1/orchestrator/jobs/{jobId} plus artifact lookup.", + deprecated = true, + parameters = new object[] + { + RouteParameter("jobId", "Job identifier", "string") + }, + responses = new Dictionary + { + ["200"] = new + { + description = "Job detail including payload (deprecated)", + headers = StandardDeprecationHeaders("/api/v1/orchestrator/jobs/{jobId}"), + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/JobDetail" }, + examples = new Dictionary + { + ["legacy"] = new { value = exampleJobDetail } + } + } + } + }, + ["404"] = ErrorResponse("Not found") + } + } + }, + ["/api/v1/orchestrator/jobs/summary"] = new + { + get = new + { + summary = "Legacy job summary (deprecated)", + description = "Legacy summary endpoint; use pagination + counts or analytics feed.", + deprecated = true, + responses = new Dictionary + { + ["200"] = new + { + description = "Summary counts", + headers = StandardDeprecationHeaders("/api/v1/orchestrator/jobs"), + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/JobSummary" }, + examples = new Dictionary + { + ["summary"] = new + { + value = new { totalJobs = 120, pendingJobs = 12, scheduledJobs = 30, leasedJobs = 20, succeededJobs = 45, failedJobs = 8, canceledJobs = 3, timedOutJobs = 2 } + } + } + } + } + } + } + } + }, + ["/api/v1/orchestrator/pack-runs"] = new + { + post = new + { + summary = "Schedule pack run", + description = "Schedule an orchestrated pack run with idempotency and quota enforcement.", + requestBody = new + { + required = true, + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/SchedulePackRunRequest" }, + examples = new Dictionary { ["default"] = new { value = examplePackRunRequest } } + } + } + }, + responses = new Dictionary + { + ["201"] = new + { + description = "Pack run scheduled", + headers = new Dictionary + { + ["Location"] = new { description = "Pack run resource URL", schema = new { type = "string" }, example = "/api/v1/orchestrator/pack-runs/99999999-0000-1111-2222-333333333333" } + }, + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/SchedulePackRunResponse" }, + examples = new Dictionary { ["default"] = new { value = examplePackRunResponse } } + } + } + }, + ["429"] = new + { + description = "Quota exceeded", + headers = new Dictionary { ["Retry-After"] = new { description = "Seconds until retry", schema = new { type = "integer" }, example = 60 } }, + content = new Dictionary { ["application/json"] = new { schema = new { @ref = "#/components/schemas/PackRunError" } } } + } + } + } + }, + ["/api/v1/orchestrator/pack-runs/{packRunId}/retry"] = new + { + post = new + { + summary = "Retry failed pack run", + description = "Create a new pack run based on a failed one with optional parameter override.", + parameters = new object[] { RouteParameter("packRunId", "Pack run identifier", "string") }, + requestBody = new + { + required = true, + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/RetryPackRunRequest" }, + examples = new Dictionary { ["default"] = new { value = exampleRetryRequest } } + } + } + }, + responses = new Dictionary + { + ["201"] = new + { + description = "Retry scheduled", + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/RetryPackRunResponse" }, + examples = new Dictionary { ["default"] = new { value = exampleRetryResponse } } + } + } + }, + ["404"] = ErrorResponse("Pack run not found"), + ["409"] = new + { + description = "Retry not allowed", + content = new Dictionary { ["application/json"] = new { schema = new { @ref = "#/components/schemas/PackRunError" } } } + } + } + } + }, + ["/api/v1/orchestrator/worker/claim"] = new + { + post = new + { + summary = "Claim next job", + description = "Idempotent worker claim endpoint with optional idempotency key and task runner context.", + parameters = new object[] + { + HeaderParameter("Idempotency-Key", "Optional idempotency key for claim replay safety", "string", "claim-12af") + }, + requestBody = new + { + required = true, + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/WorkerClaimRequest" }, + examples = new Dictionary + { + ["default"] = new { value = exampleClaimRequest } + } + } + } + }, + responses = new Dictionary + { + ["200"] = new + { + description = "Job claim response", + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/WorkerClaimResponse" }, + examples = new Dictionary + { + ["default"] = new { value = exampleClaimResponse } + } + } + } + }, + ["204"] = new { description = "No jobs available" }, + ["400"] = ErrorResponse("Invalid claim request") + } + } + }, + ["/healthz"] = new + { + get = new + { + summary = "Health check", + description = "Basic service health probe.", + responses = new Dictionary + { + ["200"] = new + { + description = "Healthy", + content = new Dictionary + { + ["application/json"] = new + { + examples = new Dictionary + { + ["example"] = new + { + value = new { status = "ok", timestamp = "2025-11-30T00:00:00Z" } + } + } + } + } + } + } + } + } + }; + + var components = new OpenApiComponents( + Schemas: new Dictionary + { + ["Error"] = new + { + type = "object", + properties = new + { + error = new { type = "string" }, + detail = new { type = "string" } + }, + required = new[] { "error" } + }, + ["Job"] = new + { + type = "object", + properties = new + { + jobId = new { type = "string", format = "uuid" }, + runId = new { type = "string", format = "uuid", nullable = true }, + jobType = new { type = "string" }, + status = new { type = "string" }, + priority = new { type = "integer" }, + attempt = new { type = "integer" }, + maxAttempts = new { type = "integer" }, + correlationId = new { type = "string", nullable = true }, + workerId = new { type = "string", nullable = true }, + taskRunnerId = new { type = "string", nullable = true }, + createdAt = new { type = "string", format = "date-time" }, + scheduledAt = new { type = "string", format = "date-time", nullable = true }, + leasedAt = new { type = "string", format = "date-time", nullable = true }, + completedAt = new { type = "string", format = "date-time", nullable = true }, + notBefore = new { type = "string", format = "date-time", nullable = true }, + reason = new { type = "string", nullable = true }, + replayOf = new { type = "string", format = "uuid", nullable = true }, + createdBy = new { type = "string" } + }, + required = new[] { "jobId", "jobType", "status", "priority", "attempt", "maxAttempts", "createdAt", "createdBy" } + }, + ["JobDetail"] = new + { + allOf = new object[] + { + new { @ref = "#/components/schemas/Job" }, + new + { + type = "object", + properties = new + { + payloadDigest = new { type = "string" }, + payload = new { type = "string" }, + idempotencyKey = new { type = "string" }, + leaseId = new { type = "string", format = "uuid", nullable = true }, + leaseUntil = new { type = "string", format = "date-time", nullable = true } + } + } + } + }, + ["JobList"] = new + { + type = "object", + properties = new + { + jobs = new + { + type = "array", + items = new { @ref = "#/components/schemas/Job" } + }, + nextCursor = new { type = "string", nullable = true } + }, + required = new[] { "jobs" } + }, + ["JobSummary"] = new + { + type = "object", + properties = new + { + totalJobs = new { type = "integer" }, + pendingJobs = new { type = "integer" }, + scheduledJobs = new { type = "integer" }, + leasedJobs = new { type = "integer" }, + succeededJobs = new { type = "integer" }, + failedJobs = new { type = "integer" }, + canceledJobs = new { type = "integer" }, + timedOutJobs = new { type = "integer" } + } + }, + ["WorkerClaimRequest"] = new + { + type = "object", + properties = new + { + workerId = new { type = "string" }, + jobType = new { type = "string" }, + idempotencyKey = new { type = "string", nullable = true }, + leaseSeconds = new { type = "integer", nullable = true }, + taskRunnerId = new { type = "string", nullable = true } + }, + required = new[] { "workerId" } + }, + ["WorkerClaimResponse"] = new + { + type = "object", + properties = new + { + jobId = new { type = "string", format = "uuid" }, + leaseId = new { type = "string", format = "uuid" }, + leaseUntil = new { type = "string", format = "date-time" }, + job = new { @ref = "#/components/schemas/JobDetail" } + }, + required = new[] { "jobId", "leaseId", "leaseUntil", "job" } + }, + ["SchedulePackRunRequest"] = new + { + type = "object", + properties = new + { + packId = new { type = "string" }, + packVersion = new { type = "string" }, + parameters = new { type = "string", nullable = true }, + projectId = new { type = "string", nullable = true }, + idempotencyKey = new { type = "string", nullable = true }, + correlationId = new { type = "string", nullable = true }, + priority = new { type = "integer", nullable = true }, + maxAttempts = new { type = "integer", nullable = true }, + metadata = new { type = "string", nullable = true } + }, + required = new[] { "packId", "packVersion" } + }, + ["SchedulePackRunResponse"] = new + { + type = "object", + properties = new + { + packRunId = new { type = "string", format = "uuid" }, + packId = new { type = "string" }, + packVersion = new { type = "string" }, + status = new { type = "string" }, + idempotencyKey = new { type = "string" }, + createdAt = new { type = "string", format = "date-time" }, + wasAlreadyScheduled = new { type = "boolean" } + }, + required = new[] { "packRunId", "packId", "packVersion", "status", "createdAt", "wasAlreadyScheduled" } + }, + ["RetryPackRunRequest"] = new + { + type = "object", + properties = new + { + parameters = new { type = "string", nullable = true }, + idempotencyKey = new { type = "string", nullable = true } + } + }, + ["RetryPackRunResponse"] = new + { + type = "object", + properties = new + { + originalPackRunId = new { type = "string", format = "uuid" }, + newPackRunId = new { type = "string", format = "uuid" }, + status = new { type = "string" }, + createdAt = new { type = "string", format = "date-time" } + }, + required = new[] { "originalPackRunId", "newPackRunId", "status", "createdAt" } + }, + ["PackRunError"] = new + { + type = "object", + properties = new + { + code = new { type = "string" }, + message = new { type = "string" }, + packRunId = new { type = "string", format = "uuid", nullable = true }, + retryAfterSeconds = new { type = "integer", nullable = true } + }, + required = new[] { "code", "message" } + } + }, + Headers: new Dictionary + { + ["Deprecation"] = new { description = "RFC 8594 deprecation marker", schema = new { type = "string" }, example = "true" }, + ["Sunset"] = new { description = "Target removal date", schema = new { type = "string" }, example = "Tue, 31 Mar 2026 00:00:00 GMT" }, + ["Link"] = new { description = "Alternate endpoint for deprecated operation", schema = new { type = "string" } } + }); + + return new OpenApiSpecDocument( + OpenApi: "3.1.0", + Info: new OpenApiInfo("StellaOps Orchestrator API", version, "Scheduling and automation control plane APIs with pagination, idempotency, and error envelopes."), + Paths: paths, + Components: components, + Servers: new List + { + new { url = "https://api.stella-ops.local" }, + new { url = "http://localhost:5201" } + }); + + // Local helper functions keep the anonymous object creation terse. + static object QueryParameter(string name, string @in, string description, string type, object? example = null) + { + return new Dictionary + { + ["name"] = name, + ["in"] = @in, + ["description"] = description, + ["required"] = false, + ["schema"] = new { type }, + ["example"] = example + }; + } + + static object RouteParameter(string name, string description, string type) + { + return new Dictionary + { + ["name"] = name, + ["in"] = "path", + ["description"] = description, + ["required"] = true, + ["schema"] = new { type } + }; + } + + static object HeaderParameter(string name, string description, string type, object? example = null) + { + return new Dictionary + { + ["name"] = name, + ["in"] = "header", + ["description"] = description, + ["required"] = false, + ["schema"] = new { type }, + ["example"] = example + }; + } + + static object ErrorResponse(string description) + { + return new + { + description, + content = new Dictionary + { + ["application/json"] = new + { + schema = new { @ref = "#/components/schemas/Error" }, + examples = new Dictionary + { + ["error"] = new { value = new { error = "invalid_request", detail = description } } + } + } + } + }; + } + + static Dictionary StandardDeprecationHeaders(string alternate) + { + return new Dictionary + { + ["Deprecation"] = new { description = "This endpoint is deprecated", schema = new { type = "string" }, example = "true" }, + ["Link"] = new { description = "Alternate endpoint", schema = new { type = "string" }, example = $"<{alternate}>; rel=\"alternate\"" }, + ["Sunset"] = new { description = "Planned removal", schema = new { type = "string" }, example = "Tue, 31 Mar 2026 00:00:00 GMT" } + }; + } + } + + private static object ExampleJob() + { + return new + { + jobId = Guid.Parse("aaaaaaaa-1111-2222-3333-bbbbbbbbbbbb"), + runId = Guid.Parse("cccccccc-1111-2222-3333-dddddddddddd"), + jobType = "scan.image", + status = "scheduled", + priority = 5, + attempt = 0, + maxAttempts = 3, + correlationId = "corr-abc", + workerId = (string?)null, + taskRunnerId = "runner-01", + createdAt = "2025-11-30T12:00:00Z", + scheduledAt = "2025-11-30T12:05:00Z", + leasedAt = (string?)null, + completedAt = (string?)null, + notBefore = "2025-11-30T12:04:00Z", + reason = (string?)null, + replayOf = (string?)null, + createdBy = "scheduler" + }; + } + + private static object ExampleJobDetail() + { + return new + { + jobId = Guid.Parse("aaaaaaaa-1111-2222-3333-bbbbbbbbbbbb"), + runId = Guid.Parse("cccccccc-1111-2222-3333-dddddddddddd"), + jobType = "scan.image", + status = "leased", + priority = 5, + attempt = 1, + maxAttempts = 3, + payloadDigest = "sha256:abc123", + payload = "{\"image\":\"alpine:3.18\"}", + idempotencyKey = "claim-12af", + correlationId = "corr-abc", + leaseId = Guid.Parse("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"), + leaseUntil = "2025-11-30T12:05:00Z", + workerId = "worker-7f9", + taskRunnerId = "runner-01", + createdAt = "2025-11-30T12:00:00Z", + scheduledAt = "2025-11-30T12:05:00Z", + leasedAt = "2025-11-30T12:00:15Z", + completedAt = (string?)null, + notBefore = "2025-11-30T12:04:00Z", + reason = (string?)null, + replayOf = (string?)null, + createdBy = "scheduler" + }; + } +} + +public sealed record OpenApiDiscoveryDocument( + [property: JsonPropertyName("service")] string Service, + [property: JsonPropertyName("specVersion")] string SpecVersion, + [property: JsonPropertyName("version")] string Version, + [property: JsonPropertyName("format")] string Format, + [property: JsonPropertyName("url")] string Url, + [property: JsonPropertyName("errorEnvelopeSchema")] string ErrorEnvelopeSchema, + [property: JsonPropertyName("notifications")] IReadOnlyDictionary Notifications); + +public sealed record OpenApiSpecDocument( + [property: JsonPropertyName("openapi")] string OpenApi, + [property: JsonPropertyName("info")] OpenApiInfo Info, + [property: JsonPropertyName("paths")] IReadOnlyDictionary Paths, + [property: JsonPropertyName("components")] OpenApiComponents Components, + [property: JsonPropertyName("servers")] IReadOnlyList? Servers = null); + +public sealed record OpenApiInfo( + [property: JsonPropertyName("title")] string Title, + [property: JsonPropertyName("version")] string Version, + [property: JsonPropertyName("description")] string Description); + +public sealed record OpenApiComponents( + [property: JsonPropertyName("schemas")] IReadOnlyDictionary Schemas, + [property: JsonPropertyName("headers")] IReadOnlyDictionary? Headers = null); diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/PackRunContracts.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/PackRunContracts.cs index d97a99b07..530a92752 100644 --- a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/PackRunContracts.cs +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Contracts/PackRunContracts.cs @@ -97,6 +97,24 @@ public sealed record PackRunListResponse( int TotalCount, string? NextCursor); +/// +/// Manifest response summarizing pack run state and log statistics. +/// +public sealed record PackRunManifestResponse( + Guid PackRunId, + string PackId, + string PackVersion, + string Status, + int Attempt, + int MaxAttempts, + DateTimeOffset CreatedAt, + DateTimeOffset? ScheduledAt, + DateTimeOffset? StartedAt, + DateTimeOffset? CompletedAt, + string? Reason, + long LogCount, + long LatestSequence); + // ========== Task Runner (Worker) Requests/Responses ========== /// diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/JobEndpoints.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/JobEndpoints.cs index 6d31ee9b0..5219e8446 100644 --- a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/JobEndpoints.cs +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/JobEndpoints.cs @@ -120,6 +120,7 @@ public static class JobEndpoints try { var tenantId = tenantResolver.Resolve(context); + DeprecationHeaders.Apply(context.Response, "/api/v1/orchestrator/jobs/{jobId}"); var job = await repository.GetByIdAsync(tenantId, jobId, cancellationToken).ConfigureAwait(false); if (job is null) @@ -146,6 +147,7 @@ public static class JobEndpoints try { var tenantId = tenantResolver.Resolve(context); + DeprecationHeaders.Apply(context.Response, "/api/v1/orchestrator/jobs"); // Get counts for each status var pending = await repository.CountAsync(tenantId, Core.Domain.JobStatus.Pending, jobType, projectId, cancellationToken).ConfigureAwait(false); diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/OpenApiEndpoints.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/OpenApiEndpoints.cs new file mode 100644 index 000000000..58a07cfaf --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/OpenApiEndpoints.cs @@ -0,0 +1,41 @@ +using StellaOps.Orchestrator.WebService.Contracts; + +namespace StellaOps.Orchestrator.WebService.Endpoints; + +/// +/// OpenAPI discovery and specification endpoints. +/// +public static class OpenApiEndpoints +{ + /// + /// Maps OpenAPI discovery endpoints. + /// + public static IEndpointRouteBuilder MapOpenApiEndpoints(this IEndpointRouteBuilder app) + { + app.MapGet("/.well-known/openapi", (HttpContext context) => + { + var version = OpenApiDocuments.GetServiceVersion(); + var discovery = OpenApiDocuments.CreateDiscoveryDocument(version); + + context.Response.Headers.CacheControl = "private, max-age=300"; + context.Response.Headers.ETag = $"W/\"oas-{version}\""; + context.Response.Headers["X-StellaOps-Service"] = "orchestrator"; + context.Response.Headers["X-StellaOps-Api-Version"] = version; + + return Results.Json(discovery, OpenApiDocuments.SerializerOptions); + }) + .WithName("Orchestrator_OpenApiDiscovery") + .WithTags("OpenAPI"); + + app.MapGet("/openapi/orchestrator.json", () => + { + var version = OpenApiDocuments.GetServiceVersion(); + var spec = OpenApiDocuments.CreateSpecification(version); + return Results.Json(spec, OpenApiDocuments.SerializerOptions); + }) + .WithName("Orchestrator_OpenApiSpec") + .WithTags("OpenAPI"); + + return app; + } +} diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/PackRunEndpoints.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/PackRunEndpoints.cs index 70236a423..bb5343e77 100644 --- a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/PackRunEndpoints.cs +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/PackRunEndpoints.cs @@ -1,3 +1,4 @@ +using System.Globalization; using System.Security.Cryptography; using System.Text; using System.Text.Json; @@ -23,6 +24,11 @@ public static class PackRunEndpoints private const int MaxExtendSeconds = 1800; // 30 minutes private const int DefaultLogLimit = 100; private const int MaxLogLimit = 1000; + private const string PackRunJobType = "pack-run"; + private const int PackRunQuotaMaxActive = 10; + private const int PackRunQuotaMaxPerHour = 200; + private const int PackRunQuotaBurst = 20; + private const double PackRunQuotaRefillPerSecond = 1.0; /// /// Maps pack run endpoints to the route builder. @@ -45,6 +51,10 @@ public static class PackRunEndpoints .WithName("Orchestrator_ListPackRuns") .WithDescription("List pack runs with filters"); + group.MapGet("{packRunId:guid}/manifest", GetPackRunManifest) + .WithName("Orchestrator_GetPackRunManifest") + .WithDescription("Get pack run manifest including log stats and status"); + // Task runner (worker) endpoints group.MapPost("claim", ClaimPackRun) .WithName("Orchestrator_ClaimPackRun") @@ -90,6 +100,7 @@ public static class PackRunEndpoints [FromBody] SchedulePackRunRequest request, [FromServices] TenantResolver tenantResolver, [FromServices] IPackRunRepository packRunRepository, + [FromServices] IQuotaRepository quotaRepository, [FromServices] IEventPublisher eventPublisher, [FromServices] TimeProvider timeProvider, CancellationToken cancellationToken) @@ -107,6 +118,12 @@ public static class PackRunEndpoints "invalid_request", "PackVersion is required", null, null)); } + if (string.IsNullOrWhiteSpace(request.ProjectId)) + { + return Results.BadRequest(new PackRunErrorResponse( + "invalid_request", "ProjectId is required", null, null)); + } + var tenantId = tenantResolver.Resolve(context); var now = timeProvider.GetUtcNow(); var parameters = request.Parameters ?? "{}"; @@ -132,7 +149,7 @@ public static class PackRunEndpoints var packRun = PackRun.Create( packRunId: packRunId, tenantId: tenantId, - projectId: request.ProjectId, + projectId: request.ProjectId!.Trim(), packId: request.PackId, packVersion: request.PackVersion, parameters: parameters, @@ -145,9 +162,49 @@ public static class PackRunEndpoints metadata: request.Metadata, createdAt: now); + // Enforce pack-run quota + var quotaResult = await TryConsumePackRunQuotaAsync(quotaRepository, tenantId, context.User?.Identity?.Name ?? "system", now, cancellationToken); + if (!quotaResult.Allowed) + { + if (quotaResult.RetryAfter.HasValue) + { + context.Response.Headers.RetryAfter = ((int)Math.Ceiling(quotaResult.RetryAfter.Value.TotalSeconds)).ToString(CultureInfo.InvariantCulture); + } + + return Results.Json( + new PackRunErrorResponse( + "quota_exhausted", + "Pack run quota exceeded", + null, + quotaResult.RetryAfter.HasValue + ? (int?)Math.Ceiling(quotaResult.RetryAfter.Value.TotalSeconds) + : null), + statusCode: StatusCodes.Status429TooManyRequests); + } + await packRunRepository.CreateAsync(packRun, cancellationToken); + // Mark as scheduled immediately + await packRunRepository.UpdateStatusAsync( + tenantId, + packRunId, + PackRunStatus.Scheduled, + packRun.Attempt, + null, + null, + null, + now, + null, + null, + null, + null, + null, + null, + null, + cancellationToken); + OrchestratorMetrics.PackRunCreated(tenantId, request.PackId); + OrchestratorMetrics.PackRunScheduled(tenantId, request.PackId); // Publish event var envelope = EventEnvelope.Create( @@ -163,7 +220,7 @@ public static class PackRunEndpoints packRunId, request.PackId, request.PackVersion, - packRun.Status.ToString().ToLowerInvariant(), + PackRunStatus.Scheduled.ToString().ToLowerInvariant(), idempotencyKey, now, WasAlreadyScheduled: false)); @@ -188,6 +245,42 @@ public static class PackRunEndpoints return Results.Ok(PackRunResponse.FromDomain(packRun)); } + private static async Task GetPackRunManifest( + HttpContext context, + [FromRoute] Guid packRunId, + [FromServices] TenantResolver tenantResolver, + [FromServices] IPackRunRepository packRunRepository, + [FromServices] IPackRunLogRepository logRepository, + CancellationToken cancellationToken) + { + var tenantId = tenantResolver.Resolve(context); + var packRun = await packRunRepository.GetByIdAsync(tenantId, packRunId, cancellationToken); + if (packRun is null) + { + return Results.NotFound(new PackRunErrorResponse( + "not_found", $"Pack run {packRunId} not found", packRunId, null)); + } + + var (logCount, latestSeq) = await logRepository.GetLogStatsAsync(tenantId, packRunId, cancellationToken); + + var response = new PackRunManifestResponse( + PackRunId: packRun.PackRunId, + PackId: packRun.PackId, + PackVersion: packRun.PackVersion, + Status: packRun.Status.ToString().ToLowerInvariant(), + Attempt: packRun.Attempt, + MaxAttempts: packRun.MaxAttempts, + CreatedAt: packRun.CreatedAt, + ScheduledAt: packRun.ScheduledAt, + StartedAt: packRun.StartedAt, + CompletedAt: packRun.CompletedAt, + Reason: packRun.Reason, + LogCount: logCount, + LatestSequence: latestSeq); + + return Results.Ok(response); + } + private static async Task ListPackRuns( HttpContext context, [FromQuery] string? packId, @@ -403,6 +496,7 @@ public static class PackRunEndpoints [FromServices] TenantResolver tenantResolver, [FromServices] IPackRunRepository packRunRepository, [FromServices] IPackRunLogRepository logRepository, + [FromServices] IQuotaRepository quotaRepository, [FromServices] IArtifactRepository artifactRepository, [FromServices] IEventPublisher eventPublisher, [FromServices] TimeProvider timeProvider, @@ -503,6 +597,8 @@ public static class PackRunEndpoints OrchestratorMetrics.RecordPackRunDuration(tenantId, packRun.PackId, durationSeconds); OrchestratorMetrics.RecordPackRunLogCount(tenantId, packRun.PackId, logCount + 1); + await ReleasePackRunQuotaAsync(quotaRepository, tenantId, cancellationToken); + // Publish event var eventType = request.Success ? OrchestratorEventType.PackRunCompleted @@ -664,6 +760,7 @@ public static class PackRunEndpoints [FromServices] TenantResolver tenantResolver, [FromServices] IPackRunRepository packRunRepository, [FromServices] IPackRunLogRepository logRepository, + [FromServices] IQuotaRepository quotaRepository, [FromServices] IEventPublisher eventPublisher, [FromServices] TimeProvider timeProvider, CancellationToken cancellationToken) @@ -709,6 +806,8 @@ public static class PackRunEndpoints OrchestratorMetrics.PackRunCanceled(tenantId, packRun.PackId); + await ReleasePackRunQuotaAsync(quotaRepository, tenantId, cancellationToken); + // Publish event var envelope = EventEnvelope.Create( eventType: OrchestratorEventType.PackRunFailed, // Use Failed for canceled @@ -818,6 +917,102 @@ public static class PackRunEndpoints packRun.Metadata); } + private static async Task<(bool Allowed, TimeSpan? RetryAfter)> TryConsumePackRunQuotaAsync( + IQuotaRepository quotaRepository, + string tenantId, + string actor, + DateTimeOffset now, + CancellationToken cancellationToken) + { + var quota = await quotaRepository.GetByTenantAndJobTypeAsync(tenantId, PackRunJobType, cancellationToken).ConfigureAwait(false) + ?? await CreateDefaultPackRunQuotaAsync(quotaRepository, tenantId, actor, now, cancellationToken).ConfigureAwait(false); + + var tokens = Math.Min(quota.BurstCapacity, quota.CurrentTokens + (now - quota.LastRefillAt).TotalSeconds * quota.RefillRate); + + var hourStart = quota.CurrentHourStart; + var hourCount = quota.CurrentHourCount; + if (now - hourStart >= TimeSpan.FromHours(1)) + { + hourStart = now; + hourCount = 0; + } + + if (tokens < 1) + { + var deficitSeconds = (1 - tokens) / quota.RefillRate; + return (false, TimeSpan.FromSeconds(Math.Ceiling(deficitSeconds))); + } + + if (quota.CurrentActive >= quota.MaxActive) + { + return (false, TimeSpan.FromSeconds(5)); + } + + if (hourCount >= quota.MaxPerHour) + { + return (false, TimeSpan.FromMinutes(5)); + } + + tokens -= 1; + hourCount += 1; + + await quotaRepository.UpdateStateAsync( + tenantId, + quota.QuotaId, + currentTokens: Math.Max(0, tokens), + lastRefillAt: now, + currentActive: quota.CurrentActive + 1, + currentHourCount: hourCount, + currentHourStart: hourStart, + updatedBy: actor, + cancellationToken: cancellationToken).ConfigureAwait(false); + + return (true, null); + } + + private static async Task ReleasePackRunQuotaAsync(IQuotaRepository quotaRepository, string tenantId, CancellationToken cancellationToken) + { + var quota = await quotaRepository.GetByTenantAndJobTypeAsync(tenantId, PackRunJobType, cancellationToken).ConfigureAwait(false); + if (quota is null || quota.CurrentActive <= 0) + { + return; + } + + await quotaRepository.DecrementActiveAsync(tenantId, quota.QuotaId, cancellationToken).ConfigureAwait(false); + } + + private static async Task CreateDefaultPackRunQuotaAsync( + IQuotaRepository quotaRepository, + string tenantId, + string actor, + DateTimeOffset now, + CancellationToken cancellationToken) + { + var quota = new Quota( + QuotaId: Guid.NewGuid(), + TenantId: tenantId, + JobType: PackRunJobType, + MaxActive: PackRunQuotaMaxActive, + MaxPerHour: PackRunQuotaMaxPerHour, + BurstCapacity: PackRunQuotaBurst, + RefillRate: PackRunQuotaRefillPerSecond, + CurrentTokens: PackRunQuotaBurst, + LastRefillAt: now, + CurrentActive: 0, + CurrentHourCount: 0, + CurrentHourStart: now, + Paused: false, + PauseReason: null, + QuotaTicket: null, + CreatedAt: now, + UpdatedAt: now, + UpdatedBy: actor); + + await quotaRepository.CreateAsync(quota, cancellationToken).ConfigureAwait(false); + OrchestratorMetrics.QuotaCreated(tenantId, PackRunJobType); + return quota; + } + private static string ComputeDigest(string content) { var bytes = Encoding.UTF8.GetBytes(content); diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/StreamEndpoints.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/StreamEndpoints.cs index 71ccf20bf..5ff0232bf 100644 --- a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/StreamEndpoints.cs +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Endpoints/StreamEndpoints.cs @@ -26,6 +26,10 @@ public static class StreamEndpoints .WithName("Orchestrator_StreamRun") .WithDescription("Stream real-time run progress updates via SSE"); + group.MapGet("pack-runs/{packRunId:guid}", StreamPackRun) + .WithName("Orchestrator_StreamPackRun") + .WithDescription("Stream real-time pack run log and status updates via SSE"); + return group; } @@ -100,4 +104,38 @@ public static class StreamEndpoints } } } + + private static async Task StreamPackRun( + HttpContext context, + [FromRoute] Guid packRunId, + [FromServices] TenantResolver tenantResolver, + [FromServices] IPackRunRepository packRunRepository, + [FromServices] IPackRunStreamCoordinator streamCoordinator, + CancellationToken cancellationToken) + { + try + { + var tenantId = tenantResolver.Resolve(context); + var packRun = await packRunRepository.GetByIdAsync(tenantId, packRunId, cancellationToken).ConfigureAwait(false); + if (packRun is null) + { + context.Response.StatusCode = StatusCodes.Status404NotFound; + await context.Response.WriteAsJsonAsync(new { error = "Pack run not found" }, cancellationToken).ConfigureAwait(false); + return; + } + + await streamCoordinator.StreamAsync(context, tenantId, packRun, cancellationToken).ConfigureAwait(false); + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + } + catch (InvalidOperationException ex) + { + if (!context.Response.HasStarted) + { + context.Response.StatusCode = StatusCodes.Status400BadRequest; + await context.Response.WriteAsJsonAsync(new { error = ex.Message }, cancellationToken).ConfigureAwait(false); + } + } + } } diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Program.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Program.cs index a058527aa..f9a453046 100644 --- a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Program.cs +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Program.cs @@ -21,6 +21,7 @@ builder.Services.AddSingleton(TimeProvider.System); builder.Services.Configure(builder.Configuration.GetSection(StreamOptions.SectionName)); builder.Services.AddSingleton(); builder.Services.AddSingleton(); +builder.Services.AddSingleton(); // Register scale metrics and load shedding services builder.Services.AddSingleton(); @@ -34,6 +35,9 @@ if (app.Environment.IsDevelopment()) app.MapOpenApi(); } +// OpenAPI discovery endpoints (available in all environments) +app.MapOpenApiEndpoints(); + // Register health endpoints (replaces simple /healthz and /readyz) app.MapHealthEndpoints(); @@ -45,6 +49,7 @@ app.MapSourceEndpoints(); app.MapRunEndpoints(); app.MapJobEndpoints(); app.MapDagEndpoints(); +app.MapPackRunEndpoints(); // Register streaming endpoints app.MapStreamEndpoints(); diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Services/DeprecationHeaders.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Services/DeprecationHeaders.cs new file mode 100644 index 000000000..32a4f58cb --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Services/DeprecationHeaders.cs @@ -0,0 +1,36 @@ +using System.Globalization; +using Microsoft.AspNetCore.Http; + +namespace StellaOps.Orchestrator.WebService.Services; + +/// +/// Helper for applying HTTP deprecation metadata to legacy endpoints. +/// +public static class DeprecationHeaders +{ + /// + /// Apply standard deprecation headers and alternate link hint to the response. + /// + /// HTTP response to annotate. + /// Alternate endpoint that supersedes the deprecated one. + /// Optional sunset date (UTC). + public static void Apply(HttpResponse response, string alternate, DateTimeOffset? sunset = null) + { + // RFC 8594 recommends HTTP-date for Sunset; default to a near-term horizon to prompt migrations. + var sunsetValue = (sunset ?? new DateTimeOffset(2026, 03, 31, 0, 0, 0, TimeSpan.Zero)) + .UtcDateTime + .ToString("r", CultureInfo.InvariantCulture); + + if (!response.Headers.ContainsKey("Deprecation")) + { + response.Headers.Append("Deprecation", "true"); + } + + // Link: <...>; rel="alternate"; title="Replacement" + var linkValue = $"<{alternate}>; rel=\"alternate\"; title=\"Replacement endpoint\""; + response.Headers.Append("Link", linkValue); + + response.Headers.Append("Sunset", sunsetValue); + response.Headers.Append("X-StellaOps-Deprecated", "orchestrator:legacy-endpoint"); + } +} diff --git a/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Streaming/PackRunStreamCoordinator.cs b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Streaming/PackRunStreamCoordinator.cs new file mode 100644 index 000000000..99f5007d5 --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/Streaming/PackRunStreamCoordinator.cs @@ -0,0 +1,200 @@ +using System.Text.Json; +using Microsoft.Extensions.Options; +using StellaOps.Orchestrator.Core.Domain; +using StellaOps.Orchestrator.Infrastructure.Repositories; + +namespace StellaOps.Orchestrator.WebService.Streaming; + +public interface IPackRunStreamCoordinator +{ + Task StreamAsync(HttpContext context, string tenantId, PackRun packRun, CancellationToken cancellationToken); +} + +/// +/// Streams pack run status/log updates over SSE. +/// +public sealed class PackRunStreamCoordinator : IPackRunStreamCoordinator +{ + private const int DefaultBatchSize = 200; + private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web); + + private readonly IPackRunRepository _packRunRepository; + private readonly IPackRunLogRepository _logRepository; + private readonly TimeProvider _timeProvider; + private readonly StreamOptions _options; + private readonly ILogger _logger; + + public PackRunStreamCoordinator( + IPackRunRepository packRunRepository, + IPackRunLogRepository logRepository, + IOptions options, + TimeProvider? timeProvider, + ILogger logger) + { + _packRunRepository = packRunRepository ?? throw new ArgumentNullException(nameof(packRunRepository)); + _logRepository = logRepository ?? throw new ArgumentNullException(nameof(logRepository)); + _options = (options ?? throw new ArgumentNullException(nameof(options))).Value.Validate(); + _timeProvider = timeProvider ?? TimeProvider.System; + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + public async Task StreamAsync(HttpContext context, string tenantId, PackRun packRun, CancellationToken cancellationToken) + { + var response = context.Response; + SseWriter.ConfigureSseHeaders(response); + await SseWriter.WriteRetryAsync(response, _options.ReconnectDelay, cancellationToken).ConfigureAwait(false); + + var (logCount, latestSeq) = await _logRepository.GetLogStatsAsync(tenantId, packRun.PackRunId, cancellationToken).ConfigureAwait(false); + await SseWriter.WriteEventAsync(response, "initial", PackRunSnapshotPayload.From(packRun, logCount, latestSeq), SerializerOptions, cancellationToken).ConfigureAwait(false); + await SseWriter.WriteEventAsync(response, "heartbeat", HeartbeatPayload.Create(_timeProvider.GetUtcNow()), SerializerOptions, cancellationToken).ConfigureAwait(false); + + if (IsTerminal(packRun.Status)) + { + await EmitCompletedAsync(response, packRun, logCount, latestSeq, cancellationToken).ConfigureAwait(false); + return; + } + + var last = packRun; + var lastSeq = latestSeq; + var start = _timeProvider.GetUtcNow(); + using var poll = new PeriodicTimer(_options.PollInterval); + using var heartbeat = new PeriodicTimer(_options.HeartbeatInterval); + + try + { + while (!cancellationToken.IsCancellationRequested) + { + if (_timeProvider.GetUtcNow() - start > _options.MaxStreamDuration) + { + await SseWriter.WriteEventAsync(response, "timeout", new { packRunId = last.PackRunId, reason = "Max stream duration reached" }, SerializerOptions, cancellationToken).ConfigureAwait(false); + break; + } + + var pollTask = poll.WaitForNextTickAsync(cancellationToken).AsTask(); + var hbTask = heartbeat.WaitForNextTickAsync(cancellationToken).AsTask(); + var completed = await Task.WhenAny(pollTask, hbTask).ConfigureAwait(false); + + if (completed == hbTask && await hbTask.ConfigureAwait(false)) + { + await SseWriter.WriteEventAsync(response, "heartbeat", HeartbeatPayload.Create(_timeProvider.GetUtcNow()), SerializerOptions, cancellationToken).ConfigureAwait(false); + continue; + } + + if (completed == pollTask && await pollTask.ConfigureAwait(false)) + { + var current = await _packRunRepository.GetByIdAsync(tenantId, last.PackRunId, cancellationToken).ConfigureAwait(false); + if (current is null) + { + await SseWriter.WriteEventAsync(response, "notFound", new NotFoundPayload(last.PackRunId.ToString(), "pack-run"), SerializerOptions, cancellationToken).ConfigureAwait(false); + break; + } + + // Send new logs + var batch = await _logRepository.GetLogsAsync(tenantId, current.PackRunId, lastSeq, DefaultBatchSize, cancellationToken).ConfigureAwait(false); + if (batch.Logs.Count > 0) + { + lastSeq = batch.Logs[^1].Sequence; + await SseWriter.WriteEventAsync(response, "logs", batch.Logs.Select(PackRunLogPayload.FromDomain), SerializerOptions, cancellationToken).ConfigureAwait(false); + } + + if (HasStatusChanged(last, current)) + { + await SseWriter.WriteEventAsync(response, "statusChanged", PackRunSnapshotPayload.From(current, batch.Logs.Count, lastSeq), SerializerOptions, cancellationToken).ConfigureAwait(false); + last = current; + + if (IsTerminal(current.Status)) + { + await EmitCompletedAsync(response, current, batch.Logs.Count, lastSeq, cancellationToken).ConfigureAwait(false); + break; + } + } + } + } + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + _logger.LogDebug("Pack run stream cancelled for {PackRunId}.", last.PackRunId); + } + } + + private static bool HasStatusChanged(PackRun previous, PackRun current) + { + return previous.Status != current.Status || previous.Attempt != current.Attempt || previous.LeaseId != current.LeaseId; + } + + private async Task EmitCompletedAsync(HttpResponse response, PackRun packRun, long logCount, long latestSequence, CancellationToken cancellationToken) + { + var durationSeconds = packRun.CompletedAt.HasValue && packRun.StartedAt.HasValue + ? (packRun.CompletedAt.Value - packRun.StartedAt.Value).TotalSeconds + : packRun.CompletedAt.HasValue ? (packRun.CompletedAt.Value - packRun.CreatedAt).TotalSeconds : 0; + + var payload = new PackRunCompletedPayload( + PackRunId: packRun.PackRunId, + Status: packRun.Status.ToString().ToLowerInvariant(), + CompletedAt: packRun.CompletedAt ?? _timeProvider.GetUtcNow(), + DurationSeconds: durationSeconds, + LogCount: logCount, + LatestSequence: latestSequence); + + await SseWriter.WriteEventAsync(response, "completed", payload, SerializerOptions, cancellationToken).ConfigureAwait(false); + } + + private static bool IsTerminal(PackRunStatus status) => + status is PackRunStatus.Succeeded or PackRunStatus.Failed or PackRunStatus.Canceled or PackRunStatus.TimedOut; +} + +internal sealed record PackRunSnapshotPayload( + Guid PackRunId, + string Status, + string PackId, + string PackVersion, + int Attempt, + int MaxAttempts, + string? TaskRunnerId, + Guid? LeaseId, + DateTimeOffset CreatedAt, + DateTimeOffset? StartedAt, + DateTimeOffset? CompletedAt, + long LogCount, + long LatestSequence) +{ + public static PackRunSnapshotPayload From(PackRun packRun, long logCount, long latestSequence) => new( + packRun.PackRunId, + packRun.Status.ToString().ToLowerInvariant(), + packRun.PackId, + packRun.PackVersion, + packRun.Attempt, + packRun.MaxAttempts, + packRun.TaskRunnerId, + packRun.LeaseId, + packRun.CreatedAt, + packRun.StartedAt, + packRun.CompletedAt, + logCount, + latestSequence); +} + +internal sealed record PackRunLogPayload( + long Sequence, + string Level, + string Source, + string Message, + DateTimeOffset Timestamp, + string? Data) +{ + public static PackRunLogPayload FromDomain(PackRunLog log) => new( + log.Sequence, + log.Level.ToString().ToLowerInvariant(), + log.Source, + log.Message, + log.Timestamp, + log.Data); +} + +internal sealed record PackRunCompletedPayload( + Guid PackRunId, + string Status, + DateTimeOffset CompletedAt, + double DurationSeconds, + long LogCount, + long LatestSequence); diff --git a/src/Orchestrator/StellaOps.Orchestrator/TASKS.md b/src/Orchestrator/StellaOps.Orchestrator/TASKS.md new file mode 100644 index 000000000..012d9ed28 --- /dev/null +++ b/src/Orchestrator/StellaOps.Orchestrator/TASKS.md @@ -0,0 +1,23 @@ +# StellaOps Orchestrator · Sprint 0152-0001-0002 Mirror + +Status mirror for `docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md`. Update alongside the sprint file to avoid drift. + +| # | Task ID | Status | Notes | +| --- | --- | --- | --- | +| 1 | ORCH-SVC-32-002 | DONE | DAG planner + job state machine implemented. | +| 2 | ORCH-SVC-32-003 | DONE | Read-only REST APIs with pagination/idempotency. | +| 3 | ORCH-SVC-32-004 | DONE | SSE streams, metrics, health probes delivered. | +| 4 | ORCH-SVC-32-005 | DONE | Worker claim/heartbeat/progress/complete endpoints live. | +| 5 | ORCH-SVC-33-001 | DONE | Sources control-plane validation + Postgres repos. | +| 6 | ORCH-SVC-33-002 | DONE | Adaptive rate limiting (token bucket + concurrency + backpressure). | +| 7 | ORCH-SVC-33-003 | DONE | Watermark/backfill manager with duplicate suppression. | +| 8 | ORCH-SVC-33-004 | DONE | Dead-letter store, replay, notifications. | +| 9 | ORCH-SVC-34-001 | DONE | Quotas + SLO burn-rate computation and alerts. | +| 10 | ORCH-SVC-34-002 | DONE | Audit log + run ledger export with signed manifest. | +| 11 | ORCH-SVC-34-003 | DONE | Perf/scale validation + autoscale/load-shed hooks. | +| 12 | ORCH-SVC-34-004 | DONE | GA packaging (Docker/Helm/air-gap bundle/provenance checklist). | +| 13 | ORCH-SVC-35-101 | DONE | Export job class registration + quotas and telemetry. | +| 14 | ORCH-SVC-36-101 | DONE | Export distribution + retention lifecycle metadata. | +| 15 | ORCH-SVC-37-101 | DONE | Scheduled exports, pruning, failure alerting. | + +Last synced: 2025-11-30 (UTC). diff --git a/src/Orchestrator/TASKS.md b/src/Orchestrator/TASKS.md new file mode 100644 index 000000000..757351e0d --- /dev/null +++ b/src/Orchestrator/TASKS.md @@ -0,0 +1,26 @@ +# Orchestrator · Sprint Mirrors (0151 / 0152) + +Local status mirror for orchestration sprints to keep doc and code views aligned. Update this alongside the canonical sprint files: +- `docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md` +- `docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md` + +| Sprint | Task ID | Status | Notes | +| --- | --- | --- | --- | +| 0151 | ORCH-OAS-61-001 | DONE | Per-service OpenAPI doc with pagination/idempotency/error envelopes. | +| 0151 | ORCH-OAS-61-002 | DONE | `/.well-known/openapi` discovery and version metadata. | +| 0151 | ORCH-OAS-62-001 | DONE | OpenAPI + SDK smoke tests for pagination and pack-run schedule/retry endpoints. | +| 0151 | ORCH-OAS-63-001 | DONE | Deprecation headers/metadata for legacy job endpoints. | +| 0151 | ORCH-OBS-50-001 | BLOCKED | Waiting on Telemetry Core (Sprint 0174). | +| 0151 | ORCH-OBS-51-001 | BLOCKED | Depends on 50-001 and telemetry schema. | +| 0151 | ORCH-OBS-52-001 | BLOCKED | Needs event schema from Sprint 0150.A. | +| 0151 | ORCH-OBS-53-001 | BLOCKED | Evidence Locker capsule inputs not frozen. | +| 0151 | ORCH-OBS-54-001 | BLOCKED | Provenance attestations depend on 53-001. | +| 0151 | ORCH-OBS-55-001 | BLOCKED | Incident-mode hooks depend on 54-001. | +| 0151 | ORCH-AIRGAP-56-001 | BLOCKED | Await AirGap staleness contracts (Sprint 0120.A). | +| 0151 | ORCH-AIRGAP-56-002 | BLOCKED | Await upstream 56-001. | +| 0151 | ORCH-AIRGAP-57-001 | BLOCKED | Await upstream 56-002. | +| 0151 | ORCH-AIRGAP-58-001 | BLOCKED | Await upstream 57-001. | +| 0151 | ORCH-SVC-32-001 | DONE | Service bootstrap + initial schema/migrations. | +| 0152 | ORCH-SVC-32-002…37-101 | DONE | See `src/Orchestrator/StellaOps.Orchestrator/TASKS.md` for per-task detail. | + +Last synced: 2025-11-30 (UTC). diff --git a/src/TaskRunner/StellaOps.TaskRunner/AGENTS.md b/src/TaskRunner/StellaOps.TaskRunner/AGENTS.md index 30cdd8a08..9ec428151 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/AGENTS.md +++ b/src/TaskRunner/StellaOps.TaskRunner/AGENTS.md @@ -19,10 +19,13 @@ Execute Task Packs safely and deterministically. Provide remote pack execution, ## Required Reading - `docs/modules/platform/architecture.md` - `docs/modules/platform/architecture-overview.md` +- `docs/modules/taskrunner/architecture.md` +- `docs/product-advisories/29-Nov-2025 - Task Pack Orchestration and Automation.md` +- `docs/task-packs/spec.md`, `docs/task-packs/authoring-guide.md`, `docs/task-packs/runbook.md` ## Working Agreement - 1. Update task status to `DOING`/`DONE` in both correspoding sprint file `/docs/implplan/SPRINT_*.md` and the local `TASKS.md` when you start or finish work. - 2. Review this charter and the Required Reading documents before coding; confirm prerequisites are met. -- 3. Keep changes deterministic (stable ordering, timestamps, hashes) and align with offline/air-gap expectations. -- 4. Coordinate doc updates, tests, and cross-guild communication whenever contracts or workflows change. +- 3. Keep changes deterministic (stable ordering, timestamps, hashes) and align with offline/air-gap expectations; enforce plan-hash binding for every run. +- 4. Coordinate doc updates, tests, and cross-guild communication whenever contracts or workflows change; sync sprint Decisions/Risks when advisory-driven changes land. - 5. Revert to `TODO` if you pause the task without shipping changes; leave notes in commit/PR descriptions for context. diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/IPackRunArtifactReader.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/IPackRunArtifactReader.cs new file mode 100644 index 000000000..93dd9e2c6 --- /dev/null +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/IPackRunArtifactReader.cs @@ -0,0 +1,16 @@ +namespace StellaOps.TaskRunner.Core.Execution; + +public interface IPackRunArtifactReader +{ + Task> ListAsync(string runId, CancellationToken cancellationToken); +} + +public sealed record PackRunArtifactRecord( + string Name, + string Type, + string? SourcePath, + string? StoredPath, + string Status, + string? Notes, + DateTimeOffset CapturedAt, + string? ExpressionJson = null); diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/IPackRunProvenanceWriter.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/IPackRunProvenanceWriter.cs new file mode 100644 index 000000000..73803dd89 --- /dev/null +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/IPackRunProvenanceWriter.cs @@ -0,0 +1,6 @@ +namespace StellaOps.TaskRunner.Core.Execution; + +public interface IPackRunProvenanceWriter +{ + Task WriteAsync(PackRunExecutionContext context, PackRunState state, CancellationToken cancellationToken); +} diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunExecutionContext.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunExecutionContext.cs index c7843ea5a..e32743fdf 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunExecutionContext.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunExecutionContext.cs @@ -2,21 +2,24 @@ using StellaOps.TaskRunner.Core.Planning; namespace StellaOps.TaskRunner.Core.Execution; -public sealed class PackRunExecutionContext -{ - public PackRunExecutionContext(string runId, TaskPackPlan plan, DateTimeOffset requestedAt) - { - ArgumentException.ThrowIfNullOrWhiteSpace(runId); - ArgumentNullException.ThrowIfNull(plan); - - RunId = runId; - Plan = plan; - RequestedAt = requestedAt; - } - - public string RunId { get; } - - public TaskPackPlan Plan { get; } - - public DateTimeOffset RequestedAt { get; } -} +public sealed class PackRunExecutionContext +{ + public PackRunExecutionContext(string runId, TaskPackPlan plan, DateTimeOffset requestedAt, string? tenantId = null) + { + ArgumentException.ThrowIfNullOrWhiteSpace(runId); + ArgumentNullException.ThrowIfNull(plan); + + RunId = runId; + Plan = plan; + RequestedAt = requestedAt; + TenantId = string.IsNullOrWhiteSpace(tenantId) ? null : tenantId.Trim(); + } + + public string RunId { get; } + + public TaskPackPlan Plan { get; } + + public DateTimeOffset RequestedAt { get; } + + public string? TenantId { get; } +} diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunState.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunState.cs index bb940b78a..9e0b4dce7 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunState.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunState.cs @@ -11,16 +11,18 @@ public sealed record PackRunState( DateTimeOffset RequestedAt, DateTimeOffset CreatedAt, DateTimeOffset UpdatedAt, - IReadOnlyDictionary Steps) -{ - public static PackRunState Create( - string runId, + IReadOnlyDictionary Steps, + string? TenantId = null) +{ + public static PackRunState Create( + string runId, string planHash, TaskPackPlan plan, TaskPackPlanFailurePolicy failurePolicy, DateTimeOffset requestedAt, IReadOnlyDictionary steps, - DateTimeOffset timestamp) + DateTimeOffset timestamp, + string? tenantId = null) => new( runId, planHash, @@ -29,8 +31,9 @@ public sealed record PackRunState( requestedAt, timestamp, timestamp, - new ReadOnlyDictionary(new Dictionary(steps, StringComparer.Ordinal))); -} + new ReadOnlyDictionary(new Dictionary(steps, StringComparer.Ordinal)), + tenantId); +} public sealed record PackRunStepStateRecord( string StepId, diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunStateFactory.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunStateFactory.cs index 26a9f0c3f..3b2bce25a 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunStateFactory.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/PackRunStateFactory.cs @@ -74,7 +74,8 @@ public static class PackRunStateFactory failurePolicy, context.RequestedAt, stepRecords, - timestamp); + timestamp, + context.TenantId); } private static Dictionary IndexSimulation(IReadOnlyList nodes) diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/ProvenanceManifestFactory.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/ProvenanceManifestFactory.cs new file mode 100644 index 000000000..00107dbc9 --- /dev/null +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Core/Execution/ProvenanceManifestFactory.cs @@ -0,0 +1,65 @@ +using StellaOps.TaskRunner.Core.Planning; + +namespace StellaOps.TaskRunner.Core.Execution; + +public static class ProvenanceManifestFactory +{ + public static ProvenanceManifest Create(PackRunExecutionContext context, PackRunState state, DateTimeOffset completedAt) + { + ArgumentNullException.ThrowIfNull(context); + ArgumentNullException.ThrowIfNull(state); + + var steps = state.Steps.Values + .OrderBy(step => step.StepId, StringComparer.Ordinal) + .Select(step => new ProvenanceStep( + step.StepId, + step.Kind.ToString(), + step.Status.ToString(), + step.Attempts, + step.LastTransitionAt, + step.StatusReason)) + .ToList(); + + var outputs = context.Plan.Outputs + .Select(output => new ProvenanceOutput(output.Name, output.Type)) + .ToList(); + + return new ProvenanceManifest( + context.RunId, + context.TenantId, + context.Plan.Hash, + context.Plan.Metadata.Name, + context.Plan.Metadata.Version, + context.Plan.Metadata.Description, + context.Plan.Metadata.Tags, + context.RequestedAt, + state.CreatedAt, + completedAt, + steps, + outputs); + } +} + +public sealed record ProvenanceManifest( + string RunId, + string? TenantId, + string PlanHash, + string PackName, + string PackVersion, + string? PackDescription, + IReadOnlyList PackTags, + DateTimeOffset RequestedAt, + DateTimeOffset CreatedAt, + DateTimeOffset CompletedAt, + IReadOnlyList Steps, + IReadOnlyList Outputs); + +public sealed record ProvenanceStep( + string Id, + string Kind, + string Status, + int Attempts, + DateTimeOffset? LastTransitionAt, + string? StatusReason); + +public sealed record ProvenanceOutput(string Name, string Type); diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilePackRunStateStore.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilePackRunStateStore.cs index e29791515..87d57289e 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilePackRunStateStore.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilePackRunStateStore.cs @@ -118,7 +118,8 @@ public sealed class FilePackRunStateStore : IPackRunStateStore DateTimeOffset RequestedAt, DateTimeOffset CreatedAt, DateTimeOffset UpdatedAt, - IReadOnlyList Steps) + IReadOnlyList Steps, + string? TenantId) { public static StateDocument FromDomain(PackRunState state) { @@ -147,11 +148,12 @@ public sealed class FilePackRunStateStore : IPackRunStateStore state.RequestedAt, state.CreatedAt, state.UpdatedAt, - steps); + steps, + state.TenantId); } - public PackRunState ToDomain() - { + public PackRunState ToDomain() + { var steps = Steps.ToDictionary( step => step.StepId, step => new PackRunStepStateRecord( @@ -177,9 +179,10 @@ public sealed class FilePackRunStateStore : IPackRunStateStore RequestedAt, CreatedAt, UpdatedAt, - steps); + steps, + TenantId); } - } + } private sealed record StepDocument( string StepId, diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilesystemPackRunArtifactReader.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilesystemPackRunArtifactReader.cs new file mode 100644 index 000000000..49ddb5617 --- /dev/null +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilesystemPackRunArtifactReader.cs @@ -0,0 +1,75 @@ +using System.Text.Json; +using StellaOps.TaskRunner.Core.Execution; + +namespace StellaOps.TaskRunner.Infrastructure.Execution; + +public sealed class FilesystemPackRunArtifactReader : IPackRunArtifactReader +{ + private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web); + + private readonly string rootPath; + + public FilesystemPackRunArtifactReader(string rootPath) + { + ArgumentException.ThrowIfNullOrWhiteSpace(rootPath); + this.rootPath = Path.GetFullPath(rootPath); + } + + public async Task> ListAsync(string runId, CancellationToken cancellationToken) + { + ArgumentException.ThrowIfNullOrWhiteSpace(runId); + + var manifestPath = Path.Combine(rootPath, Sanitize(runId), "artifact-manifest.json"); + if (!File.Exists(manifestPath)) + { + return Array.Empty(); + } + + await using var stream = File.OpenRead(manifestPath); + var manifest = await JsonSerializer.DeserializeAsync(stream, SerializerOptions, cancellationToken) + .ConfigureAwait(false); + + if (manifest is null || manifest.Outputs is null) + { + return Array.Empty(); + } + + return manifest.Outputs + .OrderBy(output => output.Name, StringComparer.Ordinal) + .Select(output => new PackRunArtifactRecord( + output.Name, + output.Type, + output.SourcePath, + output.StoredPath, + output.Status, + output.Notes, + manifest.UploadedAt, + output.ExpressionJson)) + .ToList(); + } + + private static string Sanitize(string value) + { + var safe = value.Trim(); + foreach (var invalid in Path.GetInvalidFileNameChars()) + { + safe = safe.Replace(invalid, '_'); + } + + return string.IsNullOrWhiteSpace(safe) ? "run" : safe; + } + + private sealed record ArtifactManifest( + string RunId, + DateTimeOffset UploadedAt, + List Outputs); + + private sealed record ArtifactRecord( + string Name, + string Type, + string? SourcePath, + string? StoredPath, + string Status, + string? Notes, + string? ExpressionJson); +} diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilesystemPackRunProvenanceWriter.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilesystemPackRunProvenanceWriter.cs new file mode 100644 index 000000000..51e239514 --- /dev/null +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/FilesystemPackRunProvenanceWriter.cs @@ -0,0 +1,56 @@ +using System.Text.Json; +using StellaOps.TaskRunner.Core.Execution; + +namespace StellaOps.TaskRunner.Infrastructure.Execution; + +public sealed class FilesystemPackRunProvenanceWriter : IPackRunProvenanceWriter +{ + private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web) + { + WriteIndented = false + }; + + private readonly string rootPath; + private readonly TimeProvider timeProvider; + + public FilesystemPackRunProvenanceWriter(string rootPath, TimeProvider? timeProvider = null) + { + ArgumentException.ThrowIfNullOrWhiteSpace(rootPath); + + this.rootPath = Path.GetFullPath(rootPath); + this.timeProvider = timeProvider ?? TimeProvider.System; + } + + public async Task WriteAsync(PackRunExecutionContext context, PackRunState state, CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(context); + ArgumentNullException.ThrowIfNull(state); + + var completedAt = timeProvider.GetUtcNow(); + var manifest = ProvenanceManifestFactory.Create(context, state, completedAt); + var manifestPath = GetPath(context.RunId); + + Directory.CreateDirectory(Path.GetDirectoryName(manifestPath)!); + + await using var stream = File.Open(manifestPath, FileMode.Create, FileAccess.Write, FileShare.None); + await JsonSerializer.SerializeAsync(stream, manifest, SerializerOptions, cancellationToken).ConfigureAwait(false); + await stream.FlushAsync(cancellationToken).ConfigureAwait(false); + } + + private string GetPath(string runId) + { + var safe = Sanitize(runId); + return Path.Combine(rootPath, "provenance", $"{safe}.json"); + } + + private static string Sanitize(string value) + { + var result = value.Trim(); + foreach (var invalid in Path.GetInvalidFileNameChars()) + { + result = result.Replace(invalid, '_'); + } + + return result; + } +} diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunApprovalStore.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunApprovalStore.cs index c8f8d3dbd..d96fb5951 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunApprovalStore.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunApprovalStore.cs @@ -82,25 +82,25 @@ public sealed class MongoPackRunApprovalStore : IPackRunApprovalStore .ConfigureAwait(false); } - private static void EnsureIndexes(IMongoCollection target) + public static IEnumerable> GetIndexModels() { - var models = new[] - { - new CreateIndexModel( - Builders.IndexKeys - .Ascending(document => document.RunId) - .Ascending(document => document.ApprovalId), - new CreateIndexOptions { Unique = true }), - new CreateIndexModel( - Builders.IndexKeys - .Ascending(document => document.RunId) - .Ascending(document => document.Status)) - }; + yield return new CreateIndexModel( + Builders.IndexKeys + .Ascending(document => document.RunId) + .Ascending(document => document.ApprovalId), + new CreateIndexOptions { Unique = true, Name = "pack_run_approvals_run_approval" }); - target.Indexes.CreateMany(models); + yield return new CreateIndexModel( + Builders.IndexKeys + .Ascending(document => document.RunId) + .Ascending(document => document.Status), + new CreateIndexOptions { Name = "pack_run_approvals_run_status" }); } - private sealed class PackRunApprovalDocument + private static void EnsureIndexes(IMongoCollection target) + => target.Indexes.CreateMany(GetIndexModels()); + + public sealed class PackRunApprovalDocument { [BsonId] public ObjectId Id { get; init; } diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunArtifactReader.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunArtifactReader.cs new file mode 100644 index 000000000..3e9cadc40 --- /dev/null +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunArtifactReader.cs @@ -0,0 +1,42 @@ +using MongoDB.Driver; +using StellaOps.TaskRunner.Core.Configuration; +using StellaOps.TaskRunner.Core.Execution; + +namespace StellaOps.TaskRunner.Infrastructure.Execution; + +public sealed class MongoPackRunArtifactReader : IPackRunArtifactReader +{ + private readonly IMongoCollection collection; + + public MongoPackRunArtifactReader(IMongoDatabase database, TaskRunnerMongoOptions options) + { + ArgumentNullException.ThrowIfNull(database); + ArgumentNullException.ThrowIfNull(options); + + collection = database.GetCollection(options.ArtifactsCollection); + } + + public async Task> ListAsync(string runId, CancellationToken cancellationToken) + { + ArgumentException.ThrowIfNullOrWhiteSpace(runId); + + var filter = Builders.Filter.Eq(doc => doc.RunId, runId); + var documents = await collection + .Find(filter) + .SortBy(doc => doc.Name) + .ToListAsync(cancellationToken) + .ConfigureAwait(false); + + return documents + .Select(doc => new PackRunArtifactRecord( + doc.Name, + doc.Type, + doc.SourcePath, + doc.StoredPath, + doc.Status, + doc.Notes, + new DateTimeOffset(doc.CapturedAt, TimeSpan.Zero), + doc.Expression?.ToJson())) + .ToList(); + } +} diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunArtifactUploader.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunArtifactUploader.cs index 0a6c7581b..dbe5f3f6b 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunArtifactUploader.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunArtifactUploader.cs @@ -149,24 +149,23 @@ public sealed class MongoPackRunArtifactUploader : IPackRunArtifactUploader return parameter.Value; } - private static void EnsureIndexes(IMongoCollection target) + public static IEnumerable> GetIndexModels() { - var models = new[] - { - new CreateIndexModel( - Builders.IndexKeys - .Ascending(document => document.RunId) - .Ascending(document => document.Name), - new CreateIndexOptions { Unique = true }), - new CreateIndexModel( - Builders.IndexKeys - .Ascending(document => document.RunId) - .Ascending(document => document.Status)) - }; + yield return new CreateIndexModel( + Builders.IndexKeys + .Ascending(document => document.RunId) + .Ascending(document => document.Name), + new CreateIndexOptions { Unique = true, Name = "pack_artifacts_run_name" }); - target.Indexes.CreateMany(models); + yield return new CreateIndexModel( + Builders.IndexKeys + .Ascending(document => document.RunId), + new CreateIndexOptions { Name = "pack_artifacts_run" }); } + private static void EnsureIndexes(IMongoCollection target) + => target.Indexes.CreateMany(GetIndexModels()); + public sealed class PackRunArtifactDocument { [BsonId] diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunLogStore.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunLogStore.cs index 2c34b2b0d..445c90b88 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunLogStore.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunLogStore.cs @@ -89,24 +89,24 @@ public sealed class MongoPackRunLogStore : IPackRunLogStore .ConfigureAwait(false); } - private static void EnsureIndexes(IMongoCollection target) + public static IEnumerable> GetIndexModels() { - var models = new[] - { - new CreateIndexModel( - Builders.IndexKeys - .Ascending(document => document.RunId) - .Ascending(document => document.Sequence), - new CreateIndexOptions { Unique = true }), - new CreateIndexModel( - Builders.IndexKeys - .Ascending(document => document.RunId) - .Ascending(document => document.Timestamp)) - }; + yield return new CreateIndexModel( + Builders.IndexKeys + .Ascending(document => document.RunId) + .Ascending(document => document.Sequence), + new CreateIndexOptions { Unique = true, Name = "pack_run_logs_run_sequence" }); - target.Indexes.CreateMany(models); + yield return new CreateIndexModel( + Builders.IndexKeys + .Ascending(document => document.RunId) + .Ascending(document => document.Timestamp), + new CreateIndexOptions { Name = "pack_run_logs_run_timestamp" }); } + private static void EnsureIndexes(IMongoCollection target) + => target.Indexes.CreateMany(GetIndexModels()); + public sealed class PackRunLogDocument { [BsonId] diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunProvenanceWriter.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunProvenanceWriter.cs new file mode 100644 index 000000000..0212766a8 --- /dev/null +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunProvenanceWriter.cs @@ -0,0 +1,67 @@ +using System.Text.Json; +using MongoDB.Bson; +using MongoDB.Driver; +using StellaOps.TaskRunner.Core.Configuration; +using StellaOps.TaskRunner.Core.Execution; + +namespace StellaOps.TaskRunner.Infrastructure.Execution; + +public sealed class MongoPackRunProvenanceWriter : IPackRunProvenanceWriter +{ + private static readonly JsonSerializerOptions SerializerOptions = new(JsonSerializerDefaults.Web); + + private readonly IMongoCollection collection; + private readonly TimeProvider timeProvider; + + public MongoPackRunProvenanceWriter(IMongoDatabase database, TaskRunnerMongoOptions options, TimeProvider? timeProvider = null) + { + ArgumentNullException.ThrowIfNull(database); + ArgumentNullException.ThrowIfNull(options); + + collection = database.GetCollection(options.ArtifactsCollection); + this.timeProvider = timeProvider ?? TimeProvider.System; + } + + public async Task WriteAsync(PackRunExecutionContext context, PackRunState state, CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(context); + ArgumentNullException.ThrowIfNull(state); + + var completedAt = timeProvider.GetUtcNow(); + var manifest = ProvenanceManifestFactory.Create(context, state, completedAt); + var manifestJson = JsonSerializer.Serialize(manifest, SerializerOptions); + var manifestDocument = BsonDocument.Parse(manifestJson); + + var document = new ProvenanceDocument + { + RunId = context.RunId, + Name = "provenance-manifest", + Type = "object", + Status = "materialized", + CapturedAt = completedAt.UtcDateTime, + Expression = manifestDocument + }; + + var filter = Builders.Filter.And( + Builders.Filter.Eq(doc => doc.RunId, context.RunId), + Builders.Filter.Eq(doc => doc.Name, document.Name)); + + var options = new ReplaceOptions { IsUpsert = true }; + await collection.ReplaceOneAsync(filter, document, options, cancellationToken).ConfigureAwait(false); + } + + private sealed class ProvenanceDocument + { + public string RunId { get; init; } = default!; + + public string Name { get; init; } = default!; + + public string Type { get; init; } = default!; + + public string Status { get; init; } = default!; + + public DateTime CapturedAt { get; init; } + + public BsonDocument Expression { get; init; } = default!; + } +} diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunStateStore.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunStateStore.cs index 6be5e27e8..5f66e2bc8 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunStateStore.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/MongoPackRunStateStore.cs @@ -62,20 +62,23 @@ public sealed class MongoPackRunStateStore : IPackRunStateStore .ToList(); } - private static void EnsureIndexes(IMongoCollection target) + public static IEnumerable> GetIndexModels() { - var models = new[] - { - new CreateIndexModel( - Builders.IndexKeys.Descending(document => document.UpdatedAt)), - new CreateIndexModel( - Builders.IndexKeys.Ascending(document => document.PlanHash)) - }; + yield return new CreateIndexModel( + Builders.IndexKeys.Descending(document => document.UpdatedAt), + new CreateIndexOptions { Name = "pack_runs_updatedAt_desc" }); - target.Indexes.CreateMany(models); + yield return new CreateIndexModel( + Builders.IndexKeys + .Ascending(document => document.TenantId) + .Descending(document => document.UpdatedAt), + new CreateIndexOptions { Name = "pack_runs_tenant_updatedAt_desc", Sparse = true }); } - private sealed class PackRunStateDocument + private static void EnsureIndexes(IMongoCollection target) + => target.Indexes.CreateMany(GetIndexModels()); + + public sealed class PackRunStateDocument { [BsonId] public string RunId { get; init; } = default!; @@ -94,6 +97,8 @@ public sealed class MongoPackRunStateStore : IPackRunStateStore public List Steps { get; init; } = new(); + public string? TenantId { get; init; } + public static PackRunStateDocument FromDomain(PackRunState state) { var planDocument = BsonDocument.Parse(JsonSerializer.Serialize(state.Plan, SerializerOptions)); @@ -113,7 +118,8 @@ public sealed class MongoPackRunStateStore : IPackRunStateStore RequestedAt = state.RequestedAt.UtcDateTime, CreatedAt = state.CreatedAt.UtcDateTime, UpdatedAt = state.UpdatedAt.UtcDateTime, - Steps = steps + Steps = steps, + TenantId = state.TenantId }; } @@ -139,11 +145,12 @@ public sealed class MongoPackRunStateStore : IPackRunStateStore new DateTimeOffset(RequestedAt, TimeSpan.Zero), new DateTimeOffset(CreatedAt, TimeSpan.Zero), new DateTimeOffset(UpdatedAt, TimeSpan.Zero), - new ReadOnlyDictionary(stepRecords)); + new ReadOnlyDictionary(stepRecords), + TenantId); } } - private sealed class PackRunStepDocument + public sealed class PackRunStepDocument { public string StepId { get; init; } = default!; diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/PackRunApprovalDecisionService.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/PackRunApprovalDecisionService.cs index 41a0bfbee..c924f9a09 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/PackRunApprovalDecisionService.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Infrastructure/Execution/PackRunApprovalDecisionService.cs @@ -48,6 +48,16 @@ public sealed class PackRunApprovalDecisionService return PackRunApprovalDecisionResult.NotFound; } + if (!string.Equals(state.PlanHash, request.PlanHash, StringComparison.Ordinal)) + { + _logger.LogWarning( + "Approval decision for run {RunId} rejected – plan hash mismatch (expected {Expected}, got {Actual}).", + runId, + state.PlanHash, + request.PlanHash); + return PackRunApprovalDecisionResult.PlanHashMismatch; + } + var requestedAt = state.RequestedAt != default ? state.RequestedAt : state.CreatedAt; var coordinator = PackRunApprovalCoordinator.Restore(state.Plan, approvals, requestedAt); @@ -96,6 +106,7 @@ public sealed class PackRunApprovalDecisionService public sealed record PackRunApprovalDecisionRequest( string RunId, string ApprovalId, + string PlanHash, PackRunApprovalDecisionType Decision, string? ActorId, string? Summary); @@ -110,6 +121,7 @@ public enum PackRunApprovalDecisionType public sealed record PackRunApprovalDecisionResult(string Status) { public static PackRunApprovalDecisionResult NotFound { get; } = new("not_found"); + public static PackRunApprovalDecisionResult PlanHashMismatch { get; } = new("plan_hash_mismatch"); public static PackRunApprovalDecisionResult Applied { get; } = new("applied"); public static PackRunApprovalDecisionResult Resumed { get; } = new("resumed"); diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/PackRunApprovalDecisionServiceTests.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/PackRunApprovalDecisionServiceTests.cs index ab2e196a9..8ddc4b230 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/PackRunApprovalDecisionServiceTests.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/PackRunApprovalDecisionServiceTests.cs @@ -39,7 +39,7 @@ public sealed class PackRunApprovalDecisionServiceTests NullLogger.Instance); var result = await service.ApplyAsync( - new PackRunApprovalDecisionRequest("run-1", "security-review", PackRunApprovalDecisionType.Approved, "approver@example.com", "LGTM"), + new PackRunApprovalDecisionRequest("run-1", "security-review", plan.Hash, PackRunApprovalDecisionType.Approved, "approver@example.com", "LGTM"), CancellationToken.None); Assert.Equal("resumed", result.Status); @@ -62,13 +62,51 @@ public sealed class PackRunApprovalDecisionServiceTests NullLogger.Instance); var result = await service.ApplyAsync( - new PackRunApprovalDecisionRequest("missing", "approval", PackRunApprovalDecisionType.Approved, "actor", null), + new PackRunApprovalDecisionRequest("missing", "approval", "hash", PackRunApprovalDecisionType.Approved, "actor", null), CancellationToken.None); Assert.Equal("not_found", result.Status); Assert.False(scheduler.ScheduledContexts.Any()); } + [Fact] + public async Task ApplyAsync_ReturnsPlanHashMismatchWhenIncorrect() + { + var plan = TestPlanFactory.CreatePlan(); + var state = TestPlanFactory.CreateState("run-1", plan); + var approval = new PackRunApprovalState( + "security-review", + new[] { "Packs.Approve" }, + new[] { "step-a" }, + Array.Empty(), + null, + DateTimeOffset.UtcNow.AddMinutes(-5), + PackRunApprovalStatus.Pending); + + var approvalStore = new InMemoryApprovalStore(new Dictionary> + { + ["run-1"] = new List { approval } + }); + var stateStore = new InMemoryStateStore(new Dictionary + { + ["run-1"] = state + }); + var scheduler = new RecordingScheduler(); + + var service = new PackRunApprovalDecisionService( + approvalStore, + stateStore, + scheduler, + NullLogger.Instance); + + var result = await service.ApplyAsync( + new PackRunApprovalDecisionRequest("run-1", "security-review", "wrong-hash", PackRunApprovalDecisionType.Approved, "actor", null), + CancellationToken.None); + + Assert.Equal("plan_hash_mismatch", result.Status); + Assert.False(scheduler.ScheduledContexts.Any()); + } + private sealed class InMemoryApprovalStore : IPackRunApprovalStore { private readonly Dictionary> _approvals; diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/PackRunProvenanceWriterTests.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/PackRunProvenanceWriterTests.cs new file mode 100644 index 000000000..b9ffa18d2 --- /dev/null +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/PackRunProvenanceWriterTests.cs @@ -0,0 +1,95 @@ +using System.Text.Json; +using System.Text.Json.Nodes; +using MongoDB.Driver; +using StellaOps.TaskRunner.Core.Execution; +using StellaOps.TaskRunner.Core.Execution.Simulation; +using StellaOps.TaskRunner.Core.Planning; +using StellaOps.TaskRunner.Core.TaskPacks; +using StellaOps.TaskRunner.Infrastructure.Execution; +using Xunit; + +namespace StellaOps.TaskRunner.Tests; + +public sealed class PackRunProvenanceWriterTests +{ + [Fact] + public async Task Filesystem_writer_emits_manifest() + { + var (context, state) = CreateRunState(); + var completedAt = new DateTimeOffset(2025, 11, 30, 12, 30, 0, TimeSpan.Zero); + var temp = Directory.CreateTempSubdirectory(); + try + { + var ct = TestContext.Current.CancellationToken; + var writer = new FilesystemPackRunProvenanceWriter(temp.FullName, new FixedTimeProvider(completedAt)); + await writer.WriteAsync(context, state, ct); + + var path = Path.Combine(temp.FullName, "provenance", "run-test.json"); + Assert.True(File.Exists(path)); + + using var document = JsonDocument.Parse(await File.ReadAllTextAsync(path, ct)); + var root = document.RootElement; + Assert.Equal("run-test", root.GetProperty("runId").GetString()); + Assert.Equal("tenant-alpha", root.GetProperty("tenantId").GetString()); + Assert.Equal(context.Plan.Hash, root.GetProperty("planHash").GetString()); + Assert.Equal(completedAt, root.GetProperty("completedAt").GetDateTimeOffset()); + } + finally + { + temp.Delete(recursive: true); + } + } + + [Fact] + public async Task Mongo_writer_upserts_manifest() + { + await using var mongo = MongoTaskRunnerTestContext.Create(); + var (context, state) = CreateRunState(); + var completedAt = new DateTimeOffset(2025, 11, 30, 12, 0, 0, TimeSpan.Zero); + var ct = TestContext.Current.CancellationToken; + + var options = mongo.CreateMongoOptions(); + var writer = new MongoPackRunProvenanceWriter(mongo.Database, options, new FixedTimeProvider(completedAt)); + await writer.WriteAsync(context, state, ct); + + var collection = mongo.Database.GetCollection(options.ArtifactsCollection); + var saved = await collection + .Find(Builders.Filter.Eq("RunId", context.RunId)) + .FirstOrDefaultAsync(ct); + + Assert.NotNull(saved); + var manifest = saved!["Expression"].AsBsonDocument; + Assert.Equal("run-test", manifest["runId"].AsString); + Assert.Equal("tenant-alpha", manifest["tenantId"].AsString); + Assert.Equal(context.Plan.Hash, manifest["planHash"].AsString); + } + + private static (PackRunExecutionContext Context, PackRunState State) CreateRunState() + { + var loader = new TaskPackManifestLoader(); + var planner = new TaskPackPlanner(); + var manifest = loader.Deserialize(TestManifests.Sample); + var plan = planner.Plan(manifest, new Dictionary()).Plan ?? throw new InvalidOperationException("Plan generation failed."); + + var graphBuilder = new PackRunExecutionGraphBuilder(); + var simulationEngine = new PackRunSimulationEngine(); + var graph = graphBuilder.Build(plan); + + var requestedAt = new DateTimeOffset(2025, 11, 30, 10, 0, 0, TimeSpan.Zero); + var context = new PackRunExecutionContext("run-test", plan, requestedAt, "tenant-alpha"); + var state = PackRunStateFactory.CreateInitialState(context, graph, simulationEngine, requestedAt); + return (context, state); + } + + private sealed class FixedTimeProvider : TimeProvider + { + private readonly DateTimeOffset now; + + public FixedTimeProvider(DateTimeOffset now) + { + this.now = now; + } + + public override DateTimeOffset GetUtcNow() => now; + } +} diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/StellaOps.TaskRunner.Tests.csproj b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/StellaOps.TaskRunner.Tests.csproj index 9618701fb..7d6d5dac4 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/StellaOps.TaskRunner.Tests.csproj +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Tests/StellaOps.TaskRunner.Tests.csproj @@ -24,6 +24,10 @@ + + + + diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/Program.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/Program.cs index 2c2b84770..e01405f7f 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/Program.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/Program.cs @@ -1,12 +1,15 @@ +using System.Collections.ObjectModel; using System.Globalization; using System.IO; using System.Linq; using System.Text; using System.Text.Json; using System.Text.Json.Nodes; +using MongoDB.Driver; using Microsoft.AspNetCore.Http; using Microsoft.AspNetCore.Mvc; using Microsoft.Extensions.Options; +using StellaOps.TaskRunner.Core.Configuration; using StellaOps.TaskRunner.Core.Execution; using StellaOps.TaskRunner.Core.Execution.Simulation; using StellaOps.TaskRunner.Core.Planning; @@ -22,6 +25,7 @@ builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddSingleton(); +builder.Services.AddEndpointsApiExplorer(); builder.Services.AddStellaOpsTelemetry( builder.Configuration, serviceName: "StellaOps.TaskRunner.WebService", @@ -52,6 +56,7 @@ if (string.Equals(storageOptions.Mode, TaskRunnerStorageModes.Mongo, StringCompa builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddSingleton(); + builder.Services.AddSingleton(); } else { @@ -70,6 +75,11 @@ else var options = sp.GetRequiredService>().Value; return new FilePackRunLogStore(options.LogsPath); }); + builder.Services.AddSingleton(sp => + { + var options = sp.GetRequiredService>().Value; + return new FilesystemPackRunArtifactReader(options.ArtifactsPath); + }); } builder.Services.AddSingleton(sp => @@ -83,10 +93,7 @@ builder.Services.AddOpenApi(); var app = builder.Build(); -if (app.Environment.IsDevelopment()) -{ - app.MapOpenApi(); -} +app.MapOpenApi("/openapi"); app.MapPost("/v1/task-runner/simulations", async ( [FromBody] SimulationRequest request, @@ -126,7 +133,35 @@ app.MapPost("/v1/task-runner/simulations", async ( return Results.Ok(response); }).WithName("SimulateTaskPack"); -app.MapPost("/v1/task-runner/runs", async ( +app.MapPost("/v1/task-runner/runs", HandleCreateRun).WithName("CreatePackRun"); +app.MapPost("/api/runs", HandleCreateRun).WithName("CreatePackRunApi"); + +app.MapGet("/v1/task-runner/runs/{runId}", HandleGetRunState).WithName("GetRunState"); +app.MapGet("/api/runs/{runId}", HandleGetRunState).WithName("GetRunStateApi"); + +app.MapGet("/v1/task-runner/runs/{runId}/logs", HandleStreamRunLogs).WithName("StreamRunLogs"); +app.MapGet("/api/runs/{runId}/logs", HandleStreamRunLogs).WithName("StreamRunLogsApi"); + +app.MapGet("/v1/task-runner/runs/{runId}/artifacts", HandleListArtifacts).WithName("ListRunArtifacts"); +app.MapGet("/api/runs/{runId}/artifacts", HandleListArtifacts).WithName("ListRunArtifactsApi"); + +app.MapPost("/v1/task-runner/runs/{runId}/approvals/{approvalId}", HandleApplyApprovalDecision).WithName("ApplyApprovalDecision"); +app.MapPost("/api/runs/{runId}/approvals/{approvalId}", HandleApplyApprovalDecision).WithName("ApplyApprovalDecisionApi"); + +app.MapPost("/v1/task-runner/runs/{runId}/cancel", HandleCancelRun).WithName("CancelRun"); +app.MapPost("/api/runs/{runId}/cancel", HandleCancelRun).WithName("CancelRunApi"); + +app.MapGet("/.well-known/openapi", (HttpResponse response) => +{ + var metadata = OpenApiMetadataFactory.Create("/openapi"); + response.Headers.ETag = metadata.ETag; + response.Headers.Append("X-Signature", metadata.Signature); + return Results.Ok(metadata); +}).WithName("GetOpenApiMetadata"); + +app.MapGet("/", () => Results.Redirect("/openapi")); + +async Task HandleCreateRun( [FromBody] CreateRunRequest request, TaskPackManifestLoader loader, TaskPackPlanner planner, @@ -135,7 +170,7 @@ app.MapPost("/v1/task-runner/runs", async ( IPackRunStateStore stateStore, IPackRunLogStore logStore, IPackRunJobScheduler scheduler, - CancellationToken cancellationToken) => + CancellationToken cancellationToken) { if (request is null || string.IsNullOrWhiteSpace(request.Manifest)) { @@ -174,7 +209,7 @@ app.MapPost("/v1/task-runner/runs", async ( } var requestedAt = DateTimeOffset.UtcNow; - var context = new PackRunExecutionContext(runId, plan, requestedAt); + var context = new PackRunExecutionContext(runId, plan, requestedAt, request.TenantId); var graph = executionGraphBuilder.Build(plan); var state = PackRunStateFactory.CreateInitialState(context, graph, simulationEngine, requestedAt); @@ -194,9 +229,15 @@ app.MapPost("/v1/task-runner/runs", async ( return Results.StatusCode(StatusCodes.Status500InternalServerError); } - var metadata = new Dictionary(StringComparer.Ordinal); - metadata["planHash"] = plan.Hash; - metadata["requestedAt"] = requestedAt.ToUniversalTime().ToString("O", CultureInfo.InvariantCulture); + var metadata = new Dictionary(StringComparer.Ordinal) + { + ["planHash"] = plan.Hash, + ["requestedAt"] = requestedAt.ToUniversalTime().ToString("O", CultureInfo.InvariantCulture) + }; + if (!string.IsNullOrWhiteSpace(context.TenantId)) + { + metadata["tenantId"] = context.TenantId!; + } await logStore.AppendAsync( runId, @@ -205,31 +246,31 @@ app.MapPost("/v1/task-runner/runs", async ( var response = RunStateMapper.ToResponse(state); return Results.Created($"/v1/task-runner/runs/{runId}", response); -}).WithName("CreatePackRun"); +} -app.MapGet("/v1/task-runner/runs/{runId}", async ( +async Task HandleGetRunState( string runId, IPackRunStateStore stateStore, - CancellationToken cancellationToken) => + CancellationToken cancellationToken) { - if (string.IsNullOrWhiteSpace(runId)) - { - return Results.BadRequest(new { error = "runId is required." }); - } - - var state = await stateStore.GetAsync(runId, cancellationToken).ConfigureAwait(false); - if (state is null) - { - return Results.NotFound(); - } + if (string.IsNullOrWhiteSpace(runId)) + { + return Results.BadRequest(new { error = "runId is required." }); + } + + var state = await stateStore.GetAsync(runId, cancellationToken).ConfigureAwait(false); + if (state is null) + { + return Results.NotFound(); + } return Results.Ok(RunStateMapper.ToResponse(state)); -}).WithName("GetRunState"); +} -app.MapGet("/v1/task-runner/runs/{runId}/logs", async ( +async Task HandleStreamRunLogs( string runId, IPackRunLogStore logStore, - CancellationToken cancellationToken) => + CancellationToken cancellationToken) { if (string.IsNullOrWhiteSpace(runId)) { @@ -248,14 +289,14 @@ app.MapGet("/v1/task-runner/runs/{runId}/logs", async ( await RunLogMapper.WriteAsync(stream, entry, ct).ConfigureAwait(false); } }, "application/x-ndjson"); -}).WithName("StreamRunLogs"); +} -app.MapPost("/v1/task-runner/runs/{runId}/approvals/{approvalId}", async ( +async Task HandleApplyApprovalDecision( string runId, string approvalId, [FromBody] ApprovalDecisionDto request, PackRunApprovalDecisionService decisionService, - CancellationToken cancellationToken) => + CancellationToken cancellationToken) { if (request is null) { @@ -267,8 +308,13 @@ app.MapPost("/v1/task-runner/runs/{runId}/approvals/{approvalId}", async ( return Results.BadRequest(new { error = "Invalid decision. Expected approved, rejected, or expired." }); } + if (string.IsNullOrWhiteSpace(request.PlanHash)) + { + return Results.BadRequest(new { error = "planHash is required." }); + } + var result = await decisionService.ApplyAsync( - new PackRunApprovalDecisionRequest(runId, approvalId, decisionType, request.ActorId, request.Summary), + new PackRunApprovalDecisionRequest(runId, approvalId, request.PlanHash, decisionType, request.ActorId, request.Summary), cancellationToken).ConfigureAwait(false); if (ReferenceEquals(result, PackRunApprovalDecisionResult.NotFound)) @@ -276,18 +322,105 @@ app.MapPost("/v1/task-runner/runs/{runId}/approvals/{approvalId}", async ( return Results.NotFound(); } + if (ReferenceEquals(result, PackRunApprovalDecisionResult.PlanHashMismatch)) + { + return Results.Conflict(new { error = "Plan hash mismatch." }); + } + return Results.Ok(new { status = result.Status, resumed = result.ShouldResume }); -}).WithName("ApplyApprovalDecision"); +} -app.MapGet("/", () => Results.Redirect("/openapi")); - -app.Run(); - -static IDictionary? ConvertInputs(JsonObject? node) +async Task HandleListArtifacts( + string runId, + IPackRunStateStore stateStore, + IPackRunArtifactReader artifactReader, + CancellationToken cancellationToken) +{ + if (string.IsNullOrWhiteSpace(runId)) + { + return Results.BadRequest(new { error = "runId is required." }); + } + + var state = await stateStore.GetAsync(runId, cancellationToken).ConfigureAwait(false); + if (state is null) + { + return Results.NotFound(); + } + + var artifacts = await artifactReader.ListAsync(runId, cancellationToken).ConfigureAwait(false); + var response = artifacts + .Select(artifact => new + { + artifact.Name, + artifact.Type, + artifact.SourcePath, + artifact.StoredPath, + artifact.Status, + artifact.Notes, + artifact.CapturedAt, + artifact.ExpressionJson + }) + .ToList(); + + return Results.Ok(response); +} + +async Task HandleCancelRun( + string runId, + IPackRunStateStore stateStore, + IPackRunLogStore logStore, + CancellationToken cancellationToken) +{ + if (string.IsNullOrWhiteSpace(runId)) + { + return Results.BadRequest(new { error = "runId is required." }); + } + + var state = await stateStore.GetAsync(runId, cancellationToken).ConfigureAwait(false); + if (state is null) + { + return Results.NotFound(); + } + + var now = DateTimeOffset.UtcNow; + var updatedSteps = state.Steps.Values + .Select(step => step.Status is PackRunStepExecutionStatus.Succeeded or PackRunStepExecutionStatus.Skipped + ? step + : step with + { + Status = PackRunStepExecutionStatus.Skipped, + StatusReason = "cancelled", + LastTransitionAt = now, + NextAttemptAt = null + }) + .ToDictionary(step => step.StepId, step => step, StringComparer.Ordinal); + + var updatedState = state with + { + UpdatedAt = now, + Steps = new ReadOnlyDictionary(updatedSteps) + }; + + await stateStore.SaveAsync(updatedState, cancellationToken).ConfigureAwait(false); + + var metadata = new Dictionary(StringComparer.Ordinal) + { + ["planHash"] = state.PlanHash + }; + + await logStore.AppendAsync(runId, new PackRunLogEntry(now, "warn", "run.cancel-requested", "Run cancellation requested.", null, metadata), cancellationToken).ConfigureAwait(false); + await logStore.AppendAsync(runId, new PackRunLogEntry(DateTimeOffset.UtcNow, "info", "run.cancelled", "Run cancelled; remaining steps marked as skipped.", null, metadata), cancellationToken).ConfigureAwait(false); + + return Results.Accepted($"/v1/task-runner/runs/{runId}", new { status = "cancelled" }); +} + +app.Run(); + +static IDictionary? ConvertInputs(JsonObject? node) { if (node is null) { @@ -303,7 +436,7 @@ static IDictionary? ConvertInputs(JsonObject? node) return dictionary; } -internal sealed record CreateRunRequest(string? RunId, string Manifest, JsonObject? Inputs); +internal sealed record CreateRunRequest(string? RunId, string Manifest, JsonObject? Inputs, string? TenantId); internal sealed record SimulationRequest(string Manifest, JsonObject? Inputs); @@ -359,7 +492,7 @@ internal sealed record RunStateStepResponse( DateTimeOffset? NextAttemptAt, string? StatusReason); -internal sealed record ApprovalDecisionDto(string Decision, string? ActorId, string? Summary); +internal sealed record ApprovalDecisionDto(string Decision, string PlanHash, string? ActorId, string? Summary); internal sealed record RunLogEntryResponse( DateTimeOffset Timestamp, diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/StellaOps.TaskRunner.WebService.csproj b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/StellaOps.TaskRunner.WebService.csproj index eb905ae6b..15a8e3018 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/StellaOps.TaskRunner.WebService.csproj +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/StellaOps.TaskRunner.WebService.csproj @@ -33,7 +33,7 @@ - + diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/TaskRunnerServiceOptions.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/TaskRunnerServiceOptions.cs index a59ce3f48..09913bf79 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/TaskRunnerServiceOptions.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.WebService/TaskRunnerServiceOptions.cs @@ -9,6 +9,7 @@ public sealed class TaskRunnerServiceOptions public string QueuePath { get; set; } = Path.Combine(AppContext.BaseDirectory, "queue"); public string ArchivePath { get; set; } = Path.Combine(AppContext.BaseDirectory, "queue", "archive"); public string LogsPath { get; set; } = Path.Combine(AppContext.BaseDirectory, "logs", "runs"); + public string ArtifactsPath { get; set; } = Path.Combine(AppContext.BaseDirectory, "artifacts"); public TaskRunnerStorageOptions Storage { get; set; } = new(); } diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/Program.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/Program.cs index 7d4bbb55b..9693cb55e 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/Program.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/Program.cs @@ -73,6 +73,13 @@ if (string.Equals(workerStorageOptions.Mode, TaskRunnerStorageModes.Mongo, Strin builder.Services.AddSingleton(); builder.Services.AddSingleton(); builder.Services.AddSingleton(); + builder.Services.AddSingleton(sp => + { + var db = sp.GetRequiredService(); + var options = sp.GetRequiredService(); + var timeProvider = sp.GetRequiredService(); + return new MongoPackRunProvenanceWriter(db, options, timeProvider); + }); } else { @@ -98,6 +105,12 @@ else var logger = sp.GetRequiredService>(); return new FilesystemPackRunArtifactUploader(options.ArtifactsPath, timeProvider, logger); }); + builder.Services.AddSingleton(sp => + { + var options = sp.GetRequiredService>().Value; + var timeProvider = sp.GetRequiredService(); + return new FilesystemPackRunProvenanceWriter(options.ArtifactsPath, timeProvider); + }); } builder.Services.AddHostedService(); diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/Services/PackRunWorkerService.cs b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/Services/PackRunWorkerService.cs index 699ad4a84..e7fe2bf72 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/Services/PackRunWorkerService.cs +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/Services/PackRunWorkerService.cs @@ -24,6 +24,7 @@ public sealed class PackRunWorkerService : BackgroundService private readonly PackRunSimulationEngine simulationEngine; private readonly IPackRunStepExecutor executor; private readonly IPackRunArtifactUploader artifactUploader; + private readonly IPackRunProvenanceWriter provenanceWriter; private readonly IPackRunLogStore logStore; private readonly ILogger logger; private readonly UpDownCounter runningSteps; @@ -36,17 +37,19 @@ public sealed class PackRunWorkerService : BackgroundService PackRunSimulationEngine simulationEngine, IPackRunStepExecutor executor, IPackRunArtifactUploader artifactUploader, + IPackRunProvenanceWriter provenanceWriter, IPackRunLogStore logStore, IOptions options, ILogger logger) { this.dispatcher = dispatcher ?? throw new ArgumentNullException(nameof(dispatcher)); this.processor = processor ?? throw new ArgumentNullException(nameof(processor)); - this.stateStore = stateStore ?? throw new ArgumentNullException(nameof(stateStore)); + this.stateStore = stateStore ?? throw new ArgumentNullException(nameof(stateStore)); this.graphBuilder = graphBuilder ?? throw new ArgumentNullException(nameof(graphBuilder)); this.simulationEngine = simulationEngine ?? throw new ArgumentNullException(nameof(simulationEngine)); this.executor = executor ?? throw new ArgumentNullException(nameof(executor)); this.artifactUploader = artifactUploader ?? throw new ArgumentNullException(nameof(artifactUploader)); + this.provenanceWriter = provenanceWriter ?? throw new ArgumentNullException(nameof(provenanceWriter)); this.logStore = logStore ?? throw new ArgumentNullException(nameof(logStore)); this.options = options?.Value ?? throw new ArgumentNullException(nameof(options)); this.logger = logger ?? throw new ArgumentNullException(nameof(logger)); @@ -165,6 +168,7 @@ public sealed class PackRunWorkerService : BackgroundService "Run finished successfully.", cancellationToken).ConfigureAwait(false); await artifactUploader.UploadAsync(context, updatedState, context.Plan.Outputs, cancellationToken).ConfigureAwait(false); + await provenanceWriter.WriteAsync(context, updatedState, cancellationToken).ConfigureAwait(false); } else { diff --git a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/StellaOps.TaskRunner.Worker.csproj b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/StellaOps.TaskRunner.Worker.csproj index dad95744f..4d8340547 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/StellaOps.TaskRunner.Worker.csproj +++ b/src/TaskRunner/StellaOps.TaskRunner/StellaOps.TaskRunner.Worker/StellaOps.TaskRunner.Worker.csproj @@ -36,7 +36,7 @@ - + diff --git a/src/TaskRunner/StellaOps.TaskRunner/TASKS.md b/src/TaskRunner/StellaOps.TaskRunner/TASKS.md index 243adda82..206078061 100644 --- a/src/TaskRunner/StellaOps.TaskRunner/TASKS.md +++ b/src/TaskRunner/StellaOps.TaskRunner/TASKS.md @@ -2,16 +2,16 @@ | Task ID | Status | Sprint | Dependency | Notes | | --- | --- | --- | --- | --- | -| TASKRUN-41-001 | BLOCKED | SPRINT_0157_0001_0001_taskrunner_i | — | Blocked: TaskRunner architecture/API contracts and Sprint 120/130/140 inputs not published. | -| TASKRUN-AIRGAP-56-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-41-001 | Sealed-mode plan validation; depends on 41-001. | -| TASKRUN-AIRGAP-56-002 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-56-001 | Bundle ingestion helpers; depends on 56-001. | -| TASKRUN-AIRGAP-57-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-56-002 | Sealed install enforcement; depends on 56-002. | -| TASKRUN-AIRGAP-58-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-57-001 | Evidence bundles for imports; depends on 57-001. | +| TASKRUN-41-001 | DONE (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | — | Implemented run API, Mongo/file stores, approvals, provenance manifest per architecture contract. | +| TASKRUN-AIRGAP-56-001 | BLOCKED (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-41-001 | Sealed-mode plan validation; depends on 41-001. | +| TASKRUN-AIRGAP-56-002 | BLOCKED (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-56-001 | Bundle ingestion helpers; depends on 56-001. | +| TASKRUN-AIRGAP-57-001 | BLOCKED (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-56-002 | Sealed install enforcement; depends on 56-002. | +| TASKRUN-AIRGAP-58-001 | BLOCKED (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-AIRGAP-57-001 | Evidence bundles for imports; depends on 57-001. | | TASKRUN-42-001 | BLOCKED (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | — | Execution engine enhancements (loops/conditionals/maxParallel), simulation mode, policy gate integration. Blocked: loop/conditional semantics and policy-gate evaluation contract not published. | -| TASKRUN-OAS-61-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-41-001 | Document APIs; depends on 41-001. | -| TASKRUN-OAS-61-002 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-61-001 | Well-known OpenAPI endpoint; depends on 61-001. | -| TASKRUN-OAS-62-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-61-002 | SDK examples; depends on 61-002. | -| TASKRUN-OAS-63-001 | TODO | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-62-001 | Deprecation headers/notifications; depends on 62-001. | +| TASKRUN-OAS-61-001 | BLOCKED (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-41-001 | Document APIs; depends on 41-001. | +| TASKRUN-OAS-61-002 | BLOCKED (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-61-001 | Well-known OpenAPI endpoint; depends on 61-001. | +| TASKRUN-OAS-62-001 | BLOCKED (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-61-002 | SDK examples; depends on 61-002. | +| TASKRUN-OAS-63-001 | BLOCKED (2025-11-30) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OAS-62-001 | Deprecation headers/notifications; depends on 62-001. | | TASKRUN-OBS-50-001 | DONE (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | — | Telemetry core adoption. | | TASKRUN-OBS-51-001 | DONE (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OBS-50-001 | Metrics/SLOs; depends on 50-001. | | TASKRUN-OBS-52-001 | BLOCKED (2025-11-25) | SPRINT_0157_0001_0001_taskrunner_i | TASKRUN-OBS-51-001 | Timeline events; blocked: schema/evidence-pointer contract not published. | diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineEventStore.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineEventStore.cs new file mode 100644 index 000000000..7d6b7b690 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineEventStore.cs @@ -0,0 +1,16 @@ +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Core.Abstractions; + +/// +/// Persistence contract for timeline event ingestion. +/// Implementations must enforce tenant isolation and idempotency on (tenant_id, event_id). +/// +public interface ITimelineEventStore +{ + /// + /// Inserts the event atomically (headers, payloads, digests). + /// Must be idempotent on (tenant_id, event_id) and return whether a new row was created. + /// + Task InsertAsync(TimelineEventEnvelope envelope, CancellationToken cancellationToken = default); +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineEventSubscriber.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineEventSubscriber.cs new file mode 100644 index 000000000..f5edaf2dd --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineEventSubscriber.cs @@ -0,0 +1,12 @@ +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Core.Abstractions; + +/// +/// Abstraction over transport-specific event subscriptions (NATS/Redis/etc.). +/// Implementations yield tenant-scoped timeline event envelopes in publish order. +/// +public interface ITimelineEventSubscriber +{ + IAsyncEnumerable SubscribeAsync(CancellationToken cancellationToken = default); +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineIngestionService.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineIngestionService.cs new file mode 100644 index 000000000..363810cfe --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineIngestionService.cs @@ -0,0 +1,12 @@ +using StellaOps.TimelineIndexer.Core.Models; +using StellaOps.TimelineIndexer.Core.Models.Results; + +namespace StellaOps.TimelineIndexer.Core.Abstractions; + +/// +/// High-level ingestion service that validates, hashes, and persists timeline events. +/// +public interface ITimelineIngestionService +{ + Task IngestAsync(TimelineEventEnvelope envelope, CancellationToken cancellationToken = default); +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineQueryService.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineQueryService.cs new file mode 100644 index 000000000..be07cb20c --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineQueryService.cs @@ -0,0 +1,9 @@ +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Core.Abstractions; + +public interface ITimelineQueryService +{ + Task> QueryAsync(string tenantId, TimelineQueryOptions options, CancellationToken cancellationToken = default); + Task GetAsync(string tenantId, string eventId, CancellationToken cancellationToken = default); +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineQueryStore.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineQueryStore.cs new file mode 100644 index 000000000..37141f005 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Abstractions/ITimelineQueryStore.cs @@ -0,0 +1,9 @@ +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Core.Abstractions; + +public interface ITimelineQueryStore +{ + Task> QueryAsync(string tenantId, TimelineQueryOptions options, CancellationToken cancellationToken); + Task GetAsync(string tenantId, string eventId, CancellationToken cancellationToken); +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Class1.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Class1.cs deleted file mode 100644 index 3d154bf03..000000000 --- a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Class1.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace StellaOps.TimelineIndexer.Core; - -public class Class1 -{ - -} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/Results/TimelineIngestResult.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/Results/TimelineIngestResult.cs new file mode 100644 index 000000000..557651d7e --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/Results/TimelineIngestResult.cs @@ -0,0 +1,3 @@ +namespace StellaOps.TimelineIndexer.Core.Models.Results; + +public sealed record TimelineIngestResult(bool Inserted); diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineEventEnvelope.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineEventEnvelope.cs new file mode 100644 index 000000000..ada7a488e --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineEventEnvelope.cs @@ -0,0 +1,29 @@ +namespace StellaOps.TimelineIndexer.Core.Models; + +/// +/// Canonical ingestion envelope for timeline events. +/// Maps closely to orchestrator/notify envelopes while remaining transport-agnostic. +/// +public sealed class TimelineEventEnvelope +{ + public required string EventId { get; init; } + public required string TenantId { get; init; } + public required string EventType { get; init; } + public required string Source { get; init; } + public required DateTimeOffset OccurredAt { get; init; } + + public string? CorrelationId { get; init; } + public string? TraceId { get; init; } + public string? Actor { get; init; } + public string Severity { get; init; } = "info"; + public string? PayloadHash { get; set; } + public string RawPayloadJson { get; init; } = "{}"; + public string? NormalizedPayloadJson { get; init; } + public IDictionary? Attributes { get; init; } + + public string? BundleDigest { get; init; } + public Guid? BundleId { get; init; } + public string? AttestationSubject { get; init; } + public string? AttestationDigest { get; init; } + public string? ManifestUri { get; init; } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineEventView.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineEventView.cs new file mode 100644 index 000000000..13c23b22b --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineEventView.cs @@ -0,0 +1,20 @@ +namespace StellaOps.TimelineIndexer.Core.Models; + +/// +/// Projected timeline event for query responses. +/// +public sealed class TimelineEventView +{ + public required long EventSeq { get; init; } + public required string EventId { get; init; } + public required string TenantId { get; init; } + public required string EventType { get; init; } + public required string Source { get; init; } + public required DateTimeOffset OccurredAt { get; init; } + public required DateTimeOffset ReceivedAt { get; init; } + public string? CorrelationId { get; init; } + public string? TraceId { get; init; } + public string? Actor { get; init; } + public string Severity { get; init; } = "info"; + public string? PayloadHash { get; init; } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineQueryOptions.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineQueryOptions.cs new file mode 100644 index 000000000..8b07a18e3 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Models/TimelineQueryOptions.cs @@ -0,0 +1,15 @@ +namespace StellaOps.TimelineIndexer.Core.Models; + +/// +/// Query filters for timeline listing. +/// +public sealed class TimelineQueryOptions +{ + public string? EventType { get; init; } + public string? CorrelationId { get; init; } + public string? TraceId { get; init; } + public string? Severity { get; init; } + public DateTimeOffset? Since { get; init; } + public long? AfterEventSeq { get; init; } + public int Limit { get; init; } = 100; +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Services/TimelineIngestionService.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Services/TimelineIngestionService.cs new file mode 100644 index 000000000..31821ab88 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Services/TimelineIngestionService.cs @@ -0,0 +1,46 @@ +using System.Security.Cryptography; +using System.Text; +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; +using StellaOps.TimelineIndexer.Core.Models.Results; + +namespace StellaOps.TimelineIndexer.Core.Services; + +/// +/// Validates and persists timeline events with deterministic hashing. +/// +public sealed class TimelineIngestionService(ITimelineEventStore store) : ITimelineIngestionService +{ + public async Task IngestAsync(TimelineEventEnvelope envelope, CancellationToken cancellationToken = default) + { + ArgumentNullException.ThrowIfNull(envelope); + Validate(envelope); + + if (string.IsNullOrWhiteSpace(envelope.PayloadHash)) + { + envelope.PayloadHash = ComputePayloadHash(envelope.RawPayloadJson); + } + + var inserted = await store.InsertAsync(envelope, cancellationToken).ConfigureAwait(false); + return new TimelineIngestResult(inserted); + } + + private static void Validate(TimelineEventEnvelope envelope) + { + if (string.IsNullOrWhiteSpace(envelope.EventId)) + throw new ArgumentException("event_id is required", nameof(envelope)); + if (string.IsNullOrWhiteSpace(envelope.TenantId)) + throw new ArgumentException("tenant_id is required", nameof(envelope)); + if (string.IsNullOrWhiteSpace(envelope.EventType)) + throw new ArgumentException("event_type is required", nameof(envelope)); + if (string.IsNullOrWhiteSpace(envelope.Source)) + throw new ArgumentException("source is required", nameof(envelope)); + } + + internal static string ComputePayloadHash(string payloadJson) + { + var bytes = Encoding.UTF8.GetBytes(payloadJson ?? string.Empty); + var hash = SHA256.HashData(bytes); + return $"sha256:{Convert.ToHexString(hash).ToLowerInvariant()}"; + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Services/TimelineQueryService.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Services/TimelineQueryService.cs new file mode 100644 index 000000000..ca0691b93 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Core/Services/TimelineQueryService.cs @@ -0,0 +1,29 @@ +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Core.Services; + +public sealed class TimelineQueryService(ITimelineQueryStore store) : ITimelineQueryService +{ + public Task> QueryAsync(string tenantId, TimelineQueryOptions options, CancellationToken cancellationToken = default) + { + ArgumentException.ThrowIfNullOrWhiteSpace(tenantId); + ArgumentNullException.ThrowIfNull(options); + return store.QueryAsync(tenantId, Normalize(options), cancellationToken); + } + + public Task GetAsync(string tenantId, string eventId, CancellationToken cancellationToken = default) + { + ArgumentException.ThrowIfNullOrWhiteSpace(tenantId); + ArgumentException.ThrowIfNullOrWhiteSpace(eventId); + return store.GetAsync(tenantId, eventId, cancellationToken); + } + + private static TimelineQueryOptions Normalize(TimelineQueryOptions options) + { + var limit = options.Limit; + if (limit <= 0) limit = 100; + if (limit > 500) limit = 500; + return options with { Limit = limit }; + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Class1.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Class1.cs deleted file mode 100644 index c5228dc50..000000000 --- a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Class1.cs +++ /dev/null @@ -1,6 +0,0 @@ -namespace StellaOps.TimelineIndexer.Infrastructure; - -public class Class1 -{ - -} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/Migrations/001_initial_schema.sql b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/Migrations/001_initial_schema.sql new file mode 100644 index 000000000..2a89c355d --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/Migrations/001_initial_schema.sql @@ -0,0 +1,111 @@ +-- 001_initial_schema.sql +-- Establishes Timeline Indexer schema, RLS scaffolding, and evidence linkage tables. + +CREATE EXTENSION IF NOT EXISTS pgcrypto; + +CREATE SCHEMA IF NOT EXISTS timeline; +CREATE SCHEMA IF NOT EXISTS timeline_app; + +-- Enforce tenant context for all RLS policies +CREATE OR REPLACE FUNCTION timeline_app.require_current_tenant() +RETURNS text +LANGUAGE plpgsql +AS $$ +DECLARE + tenant_text text; +BEGIN + tenant_text := current_setting('app.current_tenant', true); + IF tenant_text IS NULL OR length(tenant_text) = 0 THEN + RAISE EXCEPTION 'app.current_tenant is not set for the current session'; + END IF; + RETURN tenant_text; +END; +$$; + +-- Severity enum keeps ordering deterministic and compact +DO $$ +BEGIN + CREATE TYPE timeline.event_severity AS ENUM ('info', 'notice', 'warn', 'error', 'critical'); +EXCEPTION + WHEN duplicate_object THEN NULL; +END +$$; + +-- Core event header table (dedupe + ordering) +CREATE TABLE IF NOT EXISTS timeline.timeline_events +( + event_seq bigserial PRIMARY KEY, + event_id text NOT NULL, + tenant_id text NOT NULL, + source text NOT NULL, + event_type text NOT NULL, + occurred_at timestamptz NOT NULL, + received_at timestamptz NOT NULL DEFAULT (NOW() AT TIME ZONE 'UTC'), + correlation_id text, + trace_id text, + actor text, + severity timeline.event_severity NOT NULL DEFAULT 'info', + payload_hash text CHECK (payload_hash IS NULL OR payload_hash ~ '^[0-9a-f]{64}$'), + attributes jsonb NOT NULL DEFAULT '{}'::jsonb, + UNIQUE (tenant_id, event_id) +); + +CREATE INDEX IF NOT EXISTS ix_timeline_events_tenant_occurred + ON timeline.timeline_events (tenant_id, occurred_at DESC, event_seq DESC); +CREATE INDEX IF NOT EXISTS ix_timeline_events_type + ON timeline.timeline_events (tenant_id, event_type, occurred_at DESC); + +ALTER TABLE timeline.timeline_events ENABLE ROW LEVEL SECURITY; +CREATE POLICY IF NOT EXISTS timeline_events_isolation + ON timeline.timeline_events + USING (tenant_id = timeline_app.require_current_tenant()) + WITH CHECK (tenant_id = timeline_app.require_current_tenant()); + +-- Raw and normalized payloads per event +CREATE TABLE IF NOT EXISTS timeline.timeline_event_details +( + event_id text NOT NULL, + tenant_id text NOT NULL, + envelope_version text NOT NULL, + raw_payload jsonb NOT NULL, + normalized_payload jsonb, + created_at timestamptz NOT NULL DEFAULT (NOW() AT TIME ZONE 'UTC'), + CONSTRAINT fk_event_details FOREIGN KEY (event_id, tenant_id) + REFERENCES timeline.timeline_events (event_id, tenant_id) ON DELETE CASCADE, + PRIMARY KEY (event_id, tenant_id) +); + +ALTER TABLE timeline.timeline_event_details ENABLE ROW LEVEL SECURITY; +CREATE POLICY IF NOT EXISTS timeline_event_details_isolation + ON timeline.timeline_event_details + USING (tenant_id = timeline_app.require_current_tenant()) + WITH CHECK (tenant_id = timeline_app.require_current_tenant()); + +-- Evidence linkage (bundle/attestation manifests) +CREATE TABLE IF NOT EXISTS timeline.timeline_event_digests +( + digest_id uuid PRIMARY KEY DEFAULT gen_random_uuid(), + tenant_id text NOT NULL, + event_id text NOT NULL, + bundle_id uuid, + bundle_digest text, + attestation_subject text, + attestation_digest text, + manifest_uri text, + created_at timestamptz NOT NULL DEFAULT (NOW() AT TIME ZONE 'UTC'), + CONSTRAINT fk_event_digest_event FOREIGN KEY (event_id, tenant_id) + REFERENCES timeline.timeline_events (event_id, tenant_id) ON DELETE CASCADE, + CONSTRAINT ck_bundle_digest_sha CHECK (bundle_digest IS NULL OR bundle_digest ~ '^sha256:[0-9a-f]{64}$'), + CONSTRAINT ck_attestation_digest_sha CHECK (attestation_digest IS NULL OR attestation_digest ~ '^sha256:[0-9a-f]{64}$') +); + +CREATE INDEX IF NOT EXISTS ix_timeline_digests_event + ON timeline.timeline_event_digests (tenant_id, event_id); +CREATE INDEX IF NOT EXISTS ix_timeline_digests_bundle + ON timeline.timeline_event_digests (tenant_id, bundle_digest); + +ALTER TABLE timeline.timeline_event_digests ENABLE ROW LEVEL SECURITY; +CREATE POLICY IF NOT EXISTS timeline_event_digests_isolation + ON timeline.timeline_event_digests + USING (tenant_id = timeline_app.require_current_tenant()) + WITH CHECK (tenant_id = timeline_app.require_current_tenant()); diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineEventStore.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineEventStore.cs new file mode 100644 index 000000000..398cb42cf --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineEventStore.cs @@ -0,0 +1,112 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using System.Text.Json; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Infrastructure.Db; + +/// +/// Postgres-backed implementation of ITimelineEventStore. +/// +public sealed class TimelineEventStore(TimelineIndexerDataSource dataSource, ILogger logger) + : RepositoryBase(dataSource, logger), ITimelineEventStore +{ + private const string InsertEventSql = """ + INSERT INTO timeline.timeline_events + (event_id, tenant_id, source, event_type, occurred_at, correlation_id, trace_id, actor, severity, payload_hash, attributes) + VALUES + (@event_id, @tenant_id, @source, @event_type, @occurred_at, @correlation_id, @trace_id, @actor, @severity, @payload_hash, @attributes::jsonb) + ON CONFLICT (tenant_id, event_id) DO NOTHING + RETURNING event_seq; + """; + + private const string InsertDetailSql = """ + INSERT INTO timeline.timeline_event_details + (event_id, tenant_id, envelope_version, raw_payload, normalized_payload) + VALUES + (@event_id, @tenant_id, @envelope_version, @raw_payload::jsonb, @normalized_payload::jsonb) + ON CONFLICT (event_id, tenant_id) DO NOTHING; + """; + + private const string InsertDigestSql = """ + INSERT INTO timeline.timeline_event_digests + (tenant_id, event_id, bundle_id, bundle_digest, attestation_subject, attestation_digest, manifest_uri) + VALUES + (@tenant_id, @event_id, @bundle_id, @bundle_digest, @attestation_subject, @attestation_digest, @manifest_uri) + ON CONFLICT (event_id, tenant_id) DO NOTHING; + """; + + public async Task InsertAsync(TimelineEventEnvelope envelope, CancellationToken cancellationToken = default) + { + await using var connection = await DataSource.OpenConnectionAsync(envelope.TenantId, "writer", cancellationToken) + .ConfigureAwait(false); + await using var transaction = await connection.BeginTransactionAsync(cancellationToken).ConfigureAwait(false); + + var inserted = await InsertEventAsync(connection, envelope, cancellationToken).ConfigureAwait(false); + if (!inserted) + { + await transaction.RollbackAsync(cancellationToken).ConfigureAwait(false); + return false; + } + + await InsertDetailAsync(connection, envelope, cancellationToken).ConfigureAwait(false); + await InsertDigestAsync(connection, envelope, cancellationToken).ConfigureAwait(false); + await transaction.CommitAsync(cancellationToken).ConfigureAwait(false); + + return true; + } + + private async Task InsertEventAsync(NpgsqlConnection connection, TimelineEventEnvelope envelope, CancellationToken cancellationToken) + { + await using var command = CreateCommand(InsertEventSql, connection); + AddParameter(command, "event_id", envelope.EventId); + AddParameter(command, "tenant_id", envelope.TenantId); + AddParameter(command, "source", envelope.Source); + AddParameter(command, "event_type", envelope.EventType); + AddParameter(command, "occurred_at", envelope.OccurredAt); + AddParameter(command, "correlation_id", envelope.CorrelationId); + AddParameter(command, "trace_id", envelope.TraceId); + AddParameter(command, "actor", envelope.Actor); + AddParameter(command, "severity", envelope.Severity); + AddParameter(command, "payload_hash", envelope.PayloadHash); + AddJsonbParameter(command, "attributes", envelope.Attributes is null + ? "{}" + : JsonSerializer.Serialize(envelope.Attributes)); + + var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false); + return result is not null; + } + + private async Task InsertDetailAsync(NpgsqlConnection connection, TimelineEventEnvelope envelope, CancellationToken cancellationToken) + { + await using var command = CreateCommand(InsertDetailSql, connection); + AddParameter(command, "event_id", envelope.EventId); + AddParameter(command, "tenant_id", envelope.TenantId); + AddParameter(command, "envelope_version", "orch.event.v1"); + AddJsonbParameter(command, "raw_payload", envelope.RawPayloadJson); + AddJsonbParameter(command, "normalized_payload", envelope.NormalizedPayloadJson); + + await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + private async Task InsertDigestAsync(NpgsqlConnection connection, TimelineEventEnvelope envelope, CancellationToken cancellationToken) + { + if (envelope.BundleDigest is null && envelope.AttestationDigest is null && envelope.ManifestUri is null && envelope.BundleId is null) + { + return; + } + + await using var command = CreateCommand(InsertDigestSql, connection); + AddParameter(command, "tenant_id", envelope.TenantId); + AddParameter(command, "event_id", envelope.EventId); + AddParameter(command, "bundle_id", envelope.BundleId); + AddParameter(command, "bundle_digest", envelope.BundleDigest); + AddParameter(command, "attestation_subject", envelope.AttestationSubject); + AddParameter(command, "attestation_digest", envelope.AttestationDigest); + AddParameter(command, "manifest_uri", envelope.ManifestUri); + + await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineIndexerMigrationRunner.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineIndexerMigrationRunner.cs new file mode 100644 index 000000000..1b3fb7349 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineIndexerMigrationRunner.cs @@ -0,0 +1,47 @@ +using System.Reflection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using StellaOps.Infrastructure.Postgres.Migrations; +using StellaOps.Infrastructure.Postgres.Options; + +namespace StellaOps.TimelineIndexer.Infrastructure.Db; + +/// +/// Runs embedded SQL migrations for the Timeline Indexer schema. +/// +public sealed class TimelineIndexerMigrationRunner +{ + private readonly PostgresOptions _options; + private readonly ILogger _logger; + + private const string ResourcePrefix = "StellaOps.TimelineIndexer.Infrastructure.Db.Migrations"; + + public TimelineIndexerMigrationRunner( + IOptions options, + ILogger logger) + { + _options = options.Value ?? throw new ArgumentNullException(nameof(options)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + } + + /// + /// Apply all pending migrations from embedded resources. + /// + public Task RunAsync(CancellationToken cancellationToken = default) + { + var schema = string.IsNullOrWhiteSpace(_options.SchemaName) + ? TimelineIndexerDataSource.DefaultSchemaName + : _options.SchemaName!; + + var runner = new MigrationRunner( + _options.ConnectionString, + schema, + moduleName: "TimelineIndexer", + _logger); + + return runner.RunFromAssemblyAsync( + assembly: Assembly.GetExecutingAssembly(), + resourcePrefix: ResourcePrefix, + cancellationToken); + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineQueryStore.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineQueryStore.cs new file mode 100644 index 000000000..48fba100d --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Db/TimelineQueryStore.cs @@ -0,0 +1,85 @@ +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Infrastructure.Db; + +public sealed class TimelineQueryStore(TimelineIndexerDataSource dataSource, ILogger logger) + : RepositoryBase(dataSource, logger), ITimelineQueryStore +{ + private const string BaseSelect = """ + SELECT event_seq, event_id, tenant_id, event_type, source, occurred_at, received_at, correlation_id, trace_id, actor, severity, payload_hash + FROM timeline.timeline_events + WHERE tenant_id = @tenant_id + """; + + public async Task> QueryAsync(string tenantId, TimelineQueryOptions options, CancellationToken cancellationToken) + { + ArgumentNullException.ThrowIfNull(options); + + var sql = new System.Text.StringBuilder(BaseSelect); + + if (!string.IsNullOrWhiteSpace(options.EventType)) sql.Append(" AND event_type = @event_type"); + if (!string.IsNullOrWhiteSpace(options.CorrelationId)) sql.Append(" AND correlation_id = @correlation_id"); + if (!string.IsNullOrWhiteSpace(options.TraceId)) sql.Append(" AND trace_id = @trace_id"); + if (!string.IsNullOrWhiteSpace(options.Severity)) sql.Append(" AND severity = @severity"); + if (options.Since is not null) sql.Append(" AND occurred_at >= @since"); + if (options.AfterEventSeq is not null) sql.Append(" AND event_seq < @after_seq"); + + sql.Append(" ORDER BY occurred_at DESC, event_seq DESC LIMIT @limit"); + + return await QueryAsync( + tenantId, + sql.ToString(), + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "event_type", options.EventType); + AddParameter(cmd, "correlation_id", options.CorrelationId); + AddParameter(cmd, "trace_id", options.TraceId); + AddParameter(cmd, "severity", options.Severity); + AddParameter(cmd, "since", options.Since); + AddParameter(cmd, "after_seq", options.AfterEventSeq); + AddParameter(cmd, "limit", Math.Clamp(options.Limit, 1, 500)); + }, + MapEvent, + cancellationToken).ConfigureAwait(false); + } + + public async Task GetAsync(string tenantId, string eventId, CancellationToken cancellationToken) + { + const string sql = """ + SELECT event_seq, event_id, tenant_id, event_type, source, occurred_at, received_at, correlation_id, trace_id, actor, severity, payload_hash + FROM timeline.timeline_events + WHERE tenant_id = @tenant_id AND event_id = @event_id + """; + + return await QuerySingleOrDefaultAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "event_id", eventId); + }, + MapEvent, + cancellationToken).ConfigureAwait(false); + } + + private static TimelineEventView MapEvent(NpgsqlDataReader reader) => new() + { + EventSeq = reader.GetInt64(0), + EventId = reader.GetString(1), + TenantId = reader.GetString(2), + EventType = reader.GetString(3), + Source = reader.GetString(4), + OccurredAt = reader.GetFieldValue(5), + ReceivedAt = reader.GetFieldValue(6), + CorrelationId = GetNullableString(reader, 7), + TraceId = GetNullableString(reader, 8), + Actor = GetNullableString(reader, 9), + Severity = reader.GetString(10), + PayloadHash = GetNullableString(reader, 11) + }; +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/DependencyInjection/ServiceCollectionExtensions.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/DependencyInjection/ServiceCollectionExtensions.cs new file mode 100644 index 000000000..540e532e6 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/DependencyInjection/ServiceCollectionExtensions.cs @@ -0,0 +1,30 @@ +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using StellaOps.Infrastructure.Postgres.Options; +using StellaOps.TimelineIndexer.Infrastructure.Db; +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Services; + +namespace StellaOps.TimelineIndexer.Infrastructure.DependencyInjection; + +/// +/// Timeline Indexer PostgreSQL service registration helpers. +/// +public static class ServiceCollectionExtensions +{ + private const string DefaultSection = "Postgres:Timeline"; + + /// + /// Registers Postgres options, data source, and migration runner for the Timeline Indexer. + /// + public static IServiceCollection AddTimelineIndexerPostgres( + this IServiceCollection services, + IConfiguration configuration, + string sectionName = DefaultSection) + { + services.Configure(configuration.GetSection(sectionName)); + services.AddSingleton(); + services.AddSingleton(); + services.AddScoped(); + services.AddScoped(); + services.AddScoped - - - - - - - - - - - + + + + + + + + + + + + + net10.0 enable enable preview - true - - - - - + true + + + + + + + + + + + + + + + + diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Subscriptions/NullTimelineEventSubscriber.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Subscriptions/NullTimelineEventSubscriber.cs new file mode 100644 index 000000000..ef6d3bf16 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/Subscriptions/NullTimelineEventSubscriber.cs @@ -0,0 +1,16 @@ +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Infrastructure.Subscriptions; + +/// +/// Default no-op subscriber used until transport bindings are configured. +/// Keeps the ingestion worker running without requiring live brokers. +/// +public sealed class NullTimelineEventSubscriber : ITimelineEventSubscriber +{ + public IAsyncEnumerable SubscribeAsync(CancellationToken cancellationToken = default) + { + return AsyncEnumerable.Empty(); + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/TimelineIndexerDataSource.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/TimelineIndexerDataSource.cs new file mode 100644 index 000000000..207def6f7 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Infrastructure/TimelineIndexerDataSource.cs @@ -0,0 +1,32 @@ +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using StellaOps.Infrastructure.Postgres.Connections; +using StellaOps.Infrastructure.Postgres.Options; + +namespace StellaOps.TimelineIndexer.Infrastructure; + +/// +/// PostgreSQL data source for the Timeline Indexer module. +/// Sets the default schema and carries tenant context via app.current_tenant. +/// +public sealed class TimelineIndexerDataSource : DataSourceBase +{ + public const string DefaultSchemaName = "timeline"; + + public TimelineIndexerDataSource(IOptions options, ILogger logger) + : base(EnsureSchema(options.Value), logger) + { + } + + protected override string ModuleName => "TimelineIndexer"; + + private static PostgresOptions EnsureSchema(PostgresOptions baseOptions) + { + if (string.IsNullOrWhiteSpace(baseOptions.SchemaName)) + { + baseOptions.SchemaName = DefaultSchemaName; + } + + return baseOptions; + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/InMemoryTimelineEventSubscriber.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/InMemoryTimelineEventSubscriber.cs new file mode 100644 index 000000000..72d39b172 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/InMemoryTimelineEventSubscriber.cs @@ -0,0 +1,41 @@ +using System.Collections.Concurrent; +using System.Threading.Channels; +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Tests; + +public sealed class InMemoryTimelineEventSubscriber : ITimelineEventSubscriber +{ + private readonly Channel _channel; + + public InMemoryTimelineEventSubscriber(IEnumerable? seed = null) + { + _channel = Channel.CreateUnbounded(new UnboundedChannelOptions + { + SingleReader = false, + SingleWriter = false + }); + + if (seed is not null) + { + foreach (var envelope in seed) + { + _channel.Writer.TryWrite(envelope); + } + _channel.Writer.TryComplete(); + } + } + + public void Enqueue(TimelineEventEnvelope envelope) + { + _channel.Writer.TryWrite(envelope); + } + + public void Complete() => _channel.Writer.TryComplete(); + + public IAsyncEnumerable SubscribeAsync(CancellationToken cancellationToken = default) + { + return _channel.Reader.ReadAllAsync(cancellationToken); + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/StellaOps.TimelineIndexer.Tests.csproj b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/StellaOps.TimelineIndexer.Tests.csproj index bbd3dc83d..28afbc060 100644 --- a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/StellaOps.TimelineIndexer.Tests.csproj +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/StellaOps.TimelineIndexer.Tests.csproj @@ -53,11 +53,11 @@ - - - - - + + + + + @@ -111,25 +111,15 @@ - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineIngestionServiceTests.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineIngestionServiceTests.cs new file mode 100644 index 000000000..71471be79 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineIngestionServiceTests.cs @@ -0,0 +1,65 @@ +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; +using StellaOps.TimelineIndexer.Core.Services; + +namespace StellaOps.TimelineIndexer.Tests; + +public class TimelineIngestionServiceTests +{ + [Fact] + public async Task Ingest_ComputesHash_WhenMissing() + { + var store = new FakeStore(); + var service = new TimelineIngestionService(store); + var envelope = new TimelineEventEnvelope + { + EventId = "evt-1", + TenantId = "tenant-a", + EventType = "job.completed", + Source = "orchestrator", + OccurredAt = DateTimeOffset.UtcNow, + RawPayloadJson = """{"ok":true}""" + }; + + var result = await service.IngestAsync(envelope); + + Assert.True(result.Inserted); + Assert.Equal("sha256:8ceeb2a3cfdd5c6c0257df04e3d6b7c29c6a54f9b89e0ee1d3f3f94a639a6a39", store.LastEnvelope?.PayloadHash); + } + + [Fact] + public async Task Ingest_IsIdempotent_OnSameEventId() + { + var store = new FakeStore(); + var service = new TimelineIngestionService(store); + var envelope = new TimelineEventEnvelope + { + EventId = "evt-dup", + TenantId = "tenant-a", + EventType = "job.completed", + Source = "orchestrator", + OccurredAt = DateTimeOffset.UtcNow, + RawPayloadJson = "{}" + }; + + var first = await service.IngestAsync(envelope); + var second = await service.IngestAsync(envelope); + + Assert.True(first.Inserted); + Assert.False(second.Inserted); + } + + private sealed class FakeStore : ITimelineEventStore + { + private readonly HashSet<(string tenant, string id)> _seen = new(); + public TimelineEventEnvelope? LastEnvelope { get; private set; } + + public Task InsertAsync(TimelineEventEnvelope envelope, CancellationToken cancellationToken = default) + { + LastEnvelope = envelope; + var key = (envelope.TenantId, envelope.EventId); + var inserted = _seen.Add(key); + return Task.FromResult(inserted); + } + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineIngestionWorkerTests.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineIngestionWorkerTests.cs new file mode 100644 index 000000000..164741fe7 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineIngestionWorkerTests.cs @@ -0,0 +1,65 @@ +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging.Abstractions; +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; +using StellaOps.TimelineIndexer.Core.Models.Results; +using StellaOps.TimelineIndexer.Core.Services; +using StellaOps.TimelineIndexer.Worker; + +namespace StellaOps.TimelineIndexer.Tests; + +public sealed class TimelineIngestionWorkerTests +{ + [Fact] + public async Task Worker_Ingests_And_Dedupes() + { + var subscriber = new InMemoryTimelineEventSubscriber(); + var store = new RecordingStore(); + var serviceCollection = new ServiceCollection(); + serviceCollection.AddSingleton(subscriber); + serviceCollection.AddSingleton(store); + serviceCollection.AddSingleton(); + serviceCollection.AddSingleton(); + serviceCollection.AddLogging(builder => builder.AddProvider(NullLoggerProvider.Instance)); + + using var host = serviceCollection.BuildServiceProvider(); + var hosted = host.GetRequiredService(); + var cts = new CancellationTokenSource(TimeSpan.FromSeconds(2)); + await hosted.StartAsync(cts.Token); + + var evt = new TimelineEventEnvelope + { + EventId = "evt-1", + TenantId = "tenant-a", + EventType = "test", + Source = "unit", + OccurredAt = DateTimeOffset.UtcNow, + RawPayloadJson = "{}" + }; + + subscriber.Enqueue(evt); + subscriber.Enqueue(evt); // duplicate + subscriber.Complete(); + + await Task.Delay(200, cts.Token); + await hosted.StopAsync(cts.Token); + + Assert.Equal(1, store.InsertCalls); // duplicate dropped + Assert.Equal("sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a", store.LastHash); // hash of "{}" + } + + private sealed class RecordingStore : ITimelineEventStore + { + private readonly HashSet<(string tenant, string id)> _seen = new(); + public int InsertCalls { get; private set; } + public string? LastHash { get; private set; } + + public Task InsertAsync(TimelineEventEnvelope envelope, CancellationToken cancellationToken = default) + { + InsertCalls++; + LastHash = envelope.PayloadHash; + return Task.FromResult(_seen.Add((envelope.TenantId, envelope.EventId))); + } + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineSchemaTests.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineSchemaTests.cs new file mode 100644 index 000000000..dfa3df7f4 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Tests/TimelineSchemaTests.cs @@ -0,0 +1,82 @@ +using Xunit; + +namespace StellaOps.TimelineIndexer.Tests; + +public sealed class TimelineSchemaTests +{ + private static string FindRepoRoot() + { + var dir = AppContext.BaseDirectory; + for (var i = 0; i < 10 && dir is not null; i++) + { + if (File.Exists(Path.Combine(dir, "StellaOps.sln")) || + File.Exists(Path.Combine(dir, "Directory.Build.props"))) + { + return dir; + } + + dir = Directory.GetParent(dir)?.FullName; + } + + throw new InvalidOperationException("Could not locate repository root from test base directory."); + } + + private static string ReadMigrationSql() + { + var root = FindRepoRoot(); + var path = Path.Combine( + root, + "src", + "TimelineIndexer", + "StellaOps.TimelineIndexer", + "StellaOps.TimelineIndexer.Infrastructure", + "Db", + "Migrations", + "001_initial_schema.sql"); + + if (!File.Exists(path)) + { + throw new FileNotFoundException("Expected migration file was not found.", path); + } + + return File.ReadAllText(path); + } + + [Fact] + public void MigrationFile_Exists() + { + var root = FindRepoRoot(); + var path = Path.Combine( + root, + "src", + "TimelineIndexer", + "StellaOps.TimelineIndexer", + "StellaOps.TimelineIndexer.Infrastructure", + "Db", + "Migrations", + "001_initial_schema.sql"); + + Assert.True(File.Exists(path), $"Migration script missing at {path}"); + } + + [Fact] + public void Migration_EnablesRlsPolicies() + { + var sql = ReadMigrationSql(); + + Assert.Contains("timeline_app.require_current_tenant", sql, StringComparison.Ordinal); + Assert.Contains("timeline_events_isolation", sql, StringComparison.Ordinal); + Assert.Contains("timeline_event_details_isolation", sql, StringComparison.Ordinal); + Assert.Contains("timeline_event_digests_isolation", sql, StringComparison.Ordinal); + Assert.Contains("ENABLE ROW LEVEL SECURITY", sql, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public void Migration_DefinesUniqueEventConstraint() + { + var sql = ReadMigrationSql(); + + Assert.Contains("UNIQUE (tenant_id, event_id)", sql, StringComparison.Ordinal); + Assert.Contains("event_seq bigserial", sql, StringComparison.Ordinal); + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/Program.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/Program.cs index f7b067d1a..ba67dcd75 100644 --- a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/Program.cs +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/Program.cs @@ -1,7 +1,18 @@ -using StellaOps.TimelineIndexer.Worker; - -var builder = Host.CreateApplicationBuilder(args); -builder.Services.AddHostedService(); - -var host = builder.Build(); -host.Run(); +using Microsoft.Extensions.Configuration; +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Infrastructure.DependencyInjection; +using StellaOps.TimelineIndexer.Infrastructure.Subscriptions; +using StellaOps.TimelineIndexer.Worker; + +var builder = Host.CreateApplicationBuilder(args); + +builder.Configuration.AddJsonFile("appsettings.json", optional: true, reloadOnChange: true); +builder.Configuration.AddJsonFile("appsettings.Development.json", optional: true, reloadOnChange: true); +builder.Configuration.AddEnvironmentVariables(prefix: "TIMELINE_"); + +builder.Services.AddTimelineIndexerPostgres(builder.Configuration); +builder.Services.AddSingleton(); +builder.Services.AddHostedService(); + +var host = builder.Build(); +host.Run(); diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/TimelineIngestionWorker.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/TimelineIngestionWorker.cs new file mode 100644 index 000000000..dd7a800f6 --- /dev/null +++ b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/TimelineIngestionWorker.cs @@ -0,0 +1,71 @@ +using System.Collections.Concurrent; +using System.Diagnostics.Metrics; +using System.Linq; +using StellaOps.TimelineIndexer.Core.Abstractions; +using StellaOps.TimelineIndexer.Core.Models; + +namespace StellaOps.TimelineIndexer.Worker; + +/// +/// Background consumer that reads timeline events from configured subscribers and persists them via the ingestion service. +/// +public sealed class TimelineIngestionWorker( + IEnumerable subscribers, + ITimelineIngestionService ingestionService, + ILogger logger) : BackgroundService +{ + private static readonly Meter Meter = new("StellaOps.TimelineIndexer", "1.0.0"); + private static readonly Counter IngestedCounter = Meter.CreateCounter("timeline.ingested"); + private static readonly Counter DuplicateCounter = Meter.CreateCounter("timeline.duplicates"); + private static readonly Counter FailedCounter = Meter.CreateCounter("timeline.failed"); + + private readonly IEnumerable _subscribers = subscribers; + private readonly ITimelineIngestionService _ingestion = ingestionService; + private readonly ILogger _logger = logger; + private readonly ConcurrentDictionary<(string tenant, string eventId), byte> _sessionSeen = new(); + + protected override Task ExecuteAsync(CancellationToken stoppingToken) + { + var tasks = _subscribers.Select(subscriber => ConsumeAsync(subscriber, stoppingToken)).ToArray(); + return Task.WhenAll(tasks); + } + + private async Task ConsumeAsync(ITimelineEventSubscriber subscriber, CancellationToken cancellationToken) + { + await foreach (var envelope in subscriber.SubscribeAsync(cancellationToken)) + { + var key = (envelope.TenantId, envelope.EventId); + if (!_sessionSeen.TryAdd(key, 0)) + { + DuplicateCounter.Add(1); + _logger.LogDebug("Skipped duplicate timeline event {EventId} for tenant {Tenant}", envelope.EventId, envelope.TenantId); + continue; + } + + try + { + var result = await _ingestion.IngestAsync(envelope, cancellationToken).ConfigureAwait(false); + if (result.Inserted) + { + IngestedCounter.Add(1); + _logger.LogInformation("Ingested timeline event {EventId} from {Source} (tenant {Tenant})", envelope.EventId, envelope.Source, envelope.TenantId); + } + else + { + DuplicateCounter.Add(1); + _logger.LogDebug("Store reported duplicate for event {EventId} tenant {Tenant}", envelope.EventId, envelope.TenantId); + } + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + // Respect shutdown. + break; + } + catch (Exception ex) + { + FailedCounter.Add(1); + _logger.LogError(ex, "Failed to ingest timeline event {EventId} for tenant {Tenant}", envelope.EventId, envelope.TenantId); + } + } + } +} diff --git a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/Worker.cs b/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/Worker.cs deleted file mode 100644 index e21f75bc4..000000000 --- a/src/TimelineIndexer/StellaOps.TimelineIndexer/StellaOps.TimelineIndexer.Worker/Worker.cs +++ /dev/null @@ -1,16 +0,0 @@ -namespace StellaOps.TimelineIndexer.Worker; - -public class Worker(ILogger logger) : BackgroundService -{ - protected override async Task ExecuteAsync(CancellationToken stoppingToken) - { - while (!stoppingToken.IsCancellationRequested) - { - if (logger.IsEnabled(LogLevel.Information)) - { - logger.LogInformation("Worker running at: {time}", DateTimeOffset.Now); - } - await Task.Delay(1000, stoppingToken); - } - } -}