From 75f694276902c08e7dbe39c0c222b8cad586bf55 Mon Sep 17 00:00:00 2001 From: master <> Date: Thu, 4 Dec 2025 19:10:54 +0200 Subject: [PATCH] Add integration tests for migration categories and execution - Implemented MigrationCategoryTests to validate migration categorization for startup, release, seed, and data migrations. - Added tests for edge cases, including null, empty, and whitespace migration names. - Created StartupMigrationHostTests to verify the behavior of the migration host with real PostgreSQL instances using Testcontainers. - Included tests for migration execution, schema creation, and handling of pending release migrations. - Added SQL migration files for testing: creating a test table, adding a column, a release migration, and seeding data. --- .claude/settings.local.json | 18 +- NuGet.config | 10 +- docs/07_HIGH_LEVEL_ARCHITECTURE.md | 139 +- docs/adr/0001-postgresql-for-control-plane.md | 207 ++ docs/adr/index.md | 7 + docs/db/MIGRATION_STRATEGY.md | 498 +++++ docs/db/README.md | 2 + docs/implplan/BLOCKED_DEPENDENCY_TREE.md | 502 +++++ docs/implplan/CLI_AUTH_MIGRATION_PLAN.md | 143 ++ .../SPRINT_0111_0001_0001_advisoryai.md | 2 + .../SPRINT_0113_0001_0002_concelier_ii.md | 2 + .../SPRINT_0114_0001_0003_concelier_iii.md | 2 + .../SPRINT_0115_0001_0004_concelier_iv.md | 2 + .../SPRINT_0116_0001_0005_concelier_v.md | 2 + .../SPRINT_0117_0001_0006_concelier_vi.md | 2 + .../SPRINT_0119_0001_0004_excititor_iv.md | 2 + .../SPRINT_0119_0001_0005_excititor_v.md | 2 + .../SPRINT_0119_0001_0006_excititor_vi.md | 2 + .../SPRINT_0120_0000_0001_policy_reasoning.md | 2 + .../SPRINT_0121_0001_0001_policy_reasoning.md | 2 + ...121_0001_0002_policy_reasoning_blockers.md | 2 + .../SPRINT_0122_0001_0001_policy_reasoning.md | 2 + .../SPRINT_0123_0001_0001_policy_reasoning.md | 2 + .../SPRINT_0124_0001_0001_policy_reasoning.md | 2 + docs/implplan/SPRINT_0125_0001_0001_mirror.md | 2 + .../SPRINT_0125_0001_0001_policy_reasoning.md | 2 + .../SPRINT_0126_0001_0001_policy_reasoning.md | 2 + .../SPRINT_0127_0001_0001_policy_reasoning.md | 144 +- .../SPRINT_0128_0001_0001_policy_reasoning.md | 112 +- .../SPRINT_0129_0001_0001_policy_reasoning.md | 2 + .../SPRINT_0131_0001_0001_scanner_surface.md | 2 + .../SPRINT_0132_0001_0001_scanner_surface.md | 2 + .../SPRINT_0133_0001_0001_scanner_surface.md | 60 +- .../SPRINT_0134_0001_0001_scanner_surface.md | 2 + .../SPRINT_0135_0001_0001_scanner_surface.md | 2 + .../SPRINT_0136_0001_0001_scanner_surface.md | 2 + ...RINT_0138_0000_0001_scanner_ruby_parity.md | 2 + .../SPRINT_0140_0001_0001_runtime_signals.md | 2 + .../SPRINT_0141_0001_0001_graph_indexer.md | 2 + .../SPRINT_0142_0001_0001_sbomservice.md | 2 + .../implplan/SPRINT_0143_0000_0001_signals.md | 2 + ..._0144_0001_0001_zastava_runtime_signals.md | 2 + .../SPRINT_0150_0001_0001_mirror_dsse.md | 2 + ...NT_0150_0001_0001_scheduling_automation.md | 2 + .../SPRINT_0150_0001_0002_mirror_time.md | 2 + .../SPRINT_0150_0001_0003_mirror_orch.md | 2 + .../SPRINT_0151_0001_0001_orchestrator_i.md | 2 + .../SPRINT_0152_0001_0002_orchestrator_ii.md | 2 + .../SPRINT_0153_0001_0003_orchestrator_iii.md | 2 + .../SPRINT_0154_0001_0001_packsregistry.md | 2 + .../SPRINT_0155_0001_0001_scheduler_i.md | 2 + .../SPRINT_0156_0001_0002_scheduler_ii.md | 2 + .../SPRINT_0157_0001_0001_taskrunner_i.md | 2 + ...RINT_0157_0001_0002_taskrunner_blockers.md | 2 + .../SPRINT_0158_0001_0002_taskrunner_ii.md | 2 + .../SPRINT_0160_0001_0001_export_evidence.md | 246 +-- .../SPRINT_0161_0001_0001_evidencelocker.md | 2 + .../SPRINT_0162_0001_0001_exportcenter_i.md | 2 + .../SPRINT_0163_0001_0001_exportcenter_ii.md | 2 + .../SPRINT_0164_0001_0001_exportcenter_iii.md | 2 + .../SPRINT_0165_0001_0001_timelineindexer.md | 2 + ..._0170_0001_0001_notifications_telemetry.md | 2 + .../SPRINT_0171_0001_0001_notifier_i.md | 2 + .../SPRINT_0172_0001_0002_notifier_ii.md | 156 +- .../SPRINT_0173_0001_0003_notifier_iii.md | 86 +- .../SPRINT_0174_0001_0001_telemetry.md | 142 +- .../SPRINT_0180_0001_0001_telemetry_core.md | 2 + ...0185_0001_0001_shared_replay_primitives.md | 2 + ...001_0001_record_deterministic_execution.md | 92 +- ...01_0001_evidence_locker_cli_integration.md | 2 + .../SPRINT_0190_0001_0001_cvss_v4_receipts.md | 2 + .../SPRINT_0200_0001_0001_experience_sdks.md | 2 + docs/implplan/SPRINT_0201_0001_0001_cli_i.md | 2 + docs/implplan/SPRINT_0202_0001_0001_cli_ii.md | 2 + .../SPRINT_0206_0001_0001_devportal.md | 2 + docs/implplan/SPRINT_0207_0001_0001_graph.md | 2 + docs/implplan/SPRINT_0208_0001_0001_sdk.md | 140 +- docs/implplan/SPRINT_0209_0001_0001_ui_i.md | 2 + docs/implplan/SPRINT_0210_0001_0002_ui_ii.md | 2 + docs/implplan/SPRINT_0211_0001_0003_ui_iii.md | 2 + docs/implplan/SPRINT_0212_0001_0001_web_i.md | 2 + docs/implplan/SPRINT_0213_0001_0002_web_ii.md | 2 + .../implplan/SPRINT_0214_0001_0001_web_iii.md | 2 + .../SPRINT_0215_0001_0001_vuln_triage_ux.md | 2 + docs/implplan/SPRINT_0215_0001_0001_web_iv.md | 2 + docs/implplan/SPRINT_0216_0001_0001_web_v.md | 2 + .../SPRINT_0301_0001_0001_docs_md_i.md | 2 + ...NT_0313_0001_0001_docs_modules_attestor.md | 2 + ...T_0314_0001_0001_docs_modules_authority.md | 2 + .../SPRINT_0315_0001_0001_docs_modules_ci.md | 2 + .../SPRINT_0316_0001_0001_docs_modules_cli.md | 2 + ...T_0317_0001_0001_docs_modules_concelier.md | 2 + ...20_0001_0001_docs_modules_export_center.md | 2 + ...PRINT_0321_0001_0001_docs_modules_graph.md | 2 + ...323_0001_0001_docs_modules_orchestrator.md | 2 + ...NT_0324_0001_0001_docs_modules_platform.md | 2 + ...INT_0327_0001_0001_docs_modules_scanner.md | 2 + ...T_0328_0001_0001_docs_modules_scheduler.md | 2 + ...T_0330_0001_0001_docs_modules_telemetry.md | 2 + .../SPRINT_0331_0001_0001_docs_modules_ui.md | 2 + ...NT_0332_0001_0001_docs_modules_vex_lens.md | 2 + ...T_0333_0001_0001_docs_modules_excititor.md | 2 + ...34_0001_0001_docs_modules_vuln_explorer.md | 2 + ...INT_0335_0001_0001_docs_modules_zastava.md | 2 + ..._0001_reachability_runtime_static_union.md | 2 + ...1_0001_0001_reachability_evidence_chain.md | 2 + .../SPRINT_0504_0001_0001_ops_devops_ii.md | 2 + .../implplan/SPRINT_0509_0001_0001_samples.md | 2 + docs/implplan/SPRINT_0510_0001_0001_airgap.md | 2 + docs/implplan/SPRINT_0512_0001_0001_bench.md | 8 +- .../SPRINT_0513_0001_0001_provenance.md | 2 + ...0001_0001_public_reachability_benchmark.md | 2 + ...4_0001_0001_sovereign_crypto_enablement.md | 28 +- docs/implplan/SPRINT_120_excititor_ii.md | 2 + docs/implplan/SPRINT_121_excititor_iii.md | 2 + docs/implplan/SPRINT_122_excititor_iv.md | 2 + docs/implplan/SPRINT_123_excititor_v.md | 2 + docs/implplan/SPRINT_123_policy_reasoning.md | 2 + docs/implplan/SPRINT_124_excititor_vi.md | 2 + docs/implplan/SPRINT_125_policy_reasoning.md | 2 + docs/implplan/SPRINT_126_policy_reasoning.md | 2 + docs/implplan/SPRINT_127_policy_reasoning.md | 2 + docs/implplan/SPRINT_128_policy_reasoning.md | 2 + docs/implplan/SPRINT_132_scanner_surface.md | 2 + docs/implplan/SPRINT_133_scanner_surface.md | 2 + docs/implplan/SPRINT_134_scanner_surface.md | 2 + docs/implplan/SPRINT_135_scanner_surface.md | 2 + docs/implplan/SPRINT_136_scanner_surface.md | 2 + docs/implplan/SPRINT_144_zastava.md | 2 + .../SPRINT_150_scheduling_automation.md | 2 + docs/implplan/SPRINT_152_orchestrator_ii.md | 2 + docs/implplan/SPRINT_154_packsregistry.md | 2 + docs/implplan/SPRINT_157_taskrunner_i.md | 2 + docs/implplan/SPRINT_158_taskrunner_ii.md | 2 + docs/implplan/SPRINT_164_exportcenter_iii.md | 2 + docs/implplan/SPRINT_165_timelineindexer.md | 2 + .../SPRINT_170_notifications_telemetry.md | 2 + docs/implplan/SPRINT_171_notifier_i.md | 2 + docs/implplan/SPRINT_172_notifier_ii.md | 2 + docs/implplan/SPRINT_173_notifier_iii.md | 2 + docs/implplan/SPRINT_174_telemetry.md | 2 + .../SPRINT_185_shared_replay_primitives.md | 2 + ...RINT_186_record_deterministic_execution.md | 2 + ...INT_187_evidence_locker_cli_integration.md | 2 + docs/implplan/SPRINT_200_experience_sdks.md | 2 + docs/implplan/SPRINT_202_cli_ii.md | 2 + docs/implplan/SPRINT_203_cli_iii.md | 2 + docs/implplan/SPRINT_204_cli_iv.md | 2 + docs/implplan/SPRINT_205_cli_v.md | 2 + docs/implplan/SPRINT_215_web_iv.md | 2 + .../SPRINT_300_documentation_process.md | 2 + docs/implplan/SPRINT_303_docs_tasks_md_iii.md | 2 + docs/implplan/SPRINT_304_docs_tasks_md_iv.md | 2 + docs/implplan/SPRINT_305_docs_tasks_md_v.md | 2 + docs/implplan/SPRINT_306_docs_tasks_md_vi.md | 2 + docs/implplan/SPRINT_307_docs_tasks_md_vii.md | 2 + .../implplan/SPRINT_308_docs_tasks_md_viii.md | 2 + docs/implplan/SPRINT_309_docs_tasks_md_ix.md | 2 + docs/implplan/SPRINT_310_docs_tasks_md_x.md | 2 + docs/implplan/SPRINT_311_docs_tasks_md_xi.md | 2 + .../SPRINT_312_docs_modules_advisory_ai.md | 2 + .../SPRINT_313_docs_modules_attestor.md | 2 + .../SPRINT_314_docs_modules_authority.md | 2 + docs/implplan/SPRINT_315_docs_modules_ci.md | 2 + .../SPRINT_318_docs_modules_devops.md | 2 + .../SPRINT_319_docs_modules_excititor.md | 2 + .../SPRINT_320_docs_modules_export_center.md | 2 + .../SPRINT_322_docs_modules_notify.md | 2 + .../SPRINT_324_docs_modules_platform.md | 2 + .../SPRINT_325_docs_modules_policy.md | 2 + .../SPRINT_326_docs_modules_registry.md | 2 + .../SPRINT_327_docs_modules_scanner.md | 2 + .../SPRINT_329_docs_modules_signer.md | 28 +- .../SPRINT_330_docs_modules_telemetry.md | 2 + docs/implplan/SPRINT_331_docs_modules_ui.md | 2 + .../SPRINT_332_docs_modules_vex_lens.md | 2 + .../SPRINT_333_docs_modules_excititor.md | 2 + .../SPRINT_334_docs_modules_vuln_explorer.md | 2 + .../SPRINT_335_docs_modules_zastava.md | 2 + ..._0000_0000_postgres_conversion_overview.md | 2 + ...INT_3400_0001_0001_postgres_foundations.md | 2 + ...PRINT_3401_0001_0001_postgres_authority.md | 2 + ...PRINT_3402_0001_0001_postgres_scheduler.md | 2 + .../SPRINT_3403_0001_0001_postgres_notify.md | 2 + .../SPRINT_3404_0001_0001_postgres_policy.md | 2 + ...3405_0001_0001_postgres_vulnerabilities.md | 2 + ...PRINT_3406_0001_0001_postgres_vex_graph.md | 2 + .../SPRINT_3407_0001_0001_postgres_cleanup.md | 2 + ..._0001_0001_postgres_migration_lifecycle.md | 272 +++ docs/implplan/SPRINT_500_ops_offline.md | 2 + docs/implplan/SPRINT_501_ops_deployment_i.md | 2 + docs/implplan/SPRINT_502_ops_deployment_ii.md | 2 + docs/implplan/SPRINT_503_ops_devops_i.md | 2 + docs/implplan/SPRINT_504_ops_devops_ii.log.md | 2 + docs/implplan/SPRINT_505_ops_devops_iii.md | 2 + docs/implplan/SPRINT_506_ops_devops_iv.md | 2 + docs/implplan/SPRINT_507_ops_devops_v.md | 2 + docs/implplan/SPRINT_508_ops_offline_kit.md | 2 + docs/implplan/SPRINT_511_api.md | 2 + docs/router/13-Step.md | 946 +++++++++ docs/router/14-Step.md | 1054 +++++++++++ docs/router/15-Step.md | 1156 +++++++++++ docs/router/16-Step.md | 994 ++++++++++ docs/router/17-Step.md | 903 +++++++++ docs/router/18-Step.md | 890 +++++++++ docs/router/19-Step.md | 714 +++++++ docs/router/20-Step.md | 696 +++++++ docs/router/21-Step.md | 793 ++++++++ docs/router/22-Step.md | 698 +++++++ docs/router/23-Step.md | 769 ++++++++ docs/router/24-Step.md | 856 +++++++++ docs/router/25-Step.md | 754 ++++++++ docs/router/26-Step.md | 683 +++++++ docs/router/27-Step.md | 1524 +++++++++++++++ docs/router/28-Step.md | 755 ++++++++ docs/router/29-Step.md | 1684 +++++++++++++++++ .../SPRINT_7000_0001_0001_router_skeleton.md | 134 +- .../SPRINT_7000_0001_0002_router_common.md | 157 ++ ...PRINT_7000_0002_0001_inmemory_transport.md | 121 ++ ...NT_7000_0003_0001_microservice_sdk_core.md | 135 ++ ...000_0003_0002_microservice_sdk_handlers.md | 173 ++ .../SPRINT_7000_0004_0001_gateway_core.md | 135 ++ ...PRINT_7000_0004_0002_gateway_middleware.md | 172 ++ ...RINT_7000_0004_0003_gateway_connections.md | 218 +++ .../SPRINT_7000_0005_0001_heartbeat_health.md | 205 ++ ...SPRINT_7000_0005_0002_routing_algorithm.md | 217 +++ .../SPRINT_7000_0005_0003_cancellation.md | 230 +++ .../router/SPRINT_7000_0005_0004_streaming.md | 215 +++ .../SPRINT_7000_0005_0005_payload_limits.md | 231 +++ .../SPRINT_7000_0006_0001_transport_tcp.md | 231 +++ .../SPRINT_7000_0006_0002_transport_tls.md | 227 +++ .../SPRINT_7000_0006_0003_transport_udp.md | 221 +++ ...PRINT_7000_0006_0004_transport_rabbitmq.md | 218 +++ .../SPRINT_7000_0007_0001_router_config.md | 220 +++ ...SPRINT_7000_0007_0002_microservice_yaml.md | 213 +++ ...NT_7000_0008_0001_authority_integration.md | 204 ++ .../SPRINT_7000_0008_0002_source_generator.md | 231 +++ ...SPRINT_7000_0009_0001_reference_example.md | 260 +++ .../router/SPRINT_7000_0010_0001_migration.md | 267 +++ docs/router/SPRINT_INDEX.md | 200 ++ scripts/add_blocked_reference.py | 110 ++ .../StellaOps.Cli/Commands/CommandFactory.cs | 53 +- .../StellaOps.Cli/Commands/CommandHandlers.cs | 240 +-- .../StellaOps.Cli/Configuration/CliProfile.cs | 2 +- .../Configuration/GlobalOptions.cs | 108 +- .../Configuration/StellaOpsCliOptions.cs | 65 +- .../Extensions/CommandLineExtensions.cs | 32 + .../StellaOpsTokenClientExtensions.cs | 115 ++ src/Cli/StellaOps.Cli/Output/CliError.cs | 17 +- .../Services/BackendOperationsClient.cs | 42 +- .../StellaOps.Cli/Services/ExceptionClient.cs | 17 +- .../Services/MigrationModuleRegistry.cs | 60 + .../Services/Models/AttestationModels.cs | 4 +- .../Services/Models/PolicyWorkspaceModels.cs | 13 +- .../Services/Models/ReachabilityModels.cs | 2 +- .../StellaOps.Cli/Services/NotifyClient.cs | 17 +- .../Services/ObservabilityClient.cs | 17 +- .../Services/OfflineModeGuard.cs | 41 + .../Services/OrchestratorClient.cs | 5 +- src/Cli/StellaOps.Cli/Services/PackClient.cs | 17 +- .../Services/PromotionAssembler.cs | 1 + src/Cli/StellaOps.Cli/Services/SbomClient.cs | 17 +- .../StellaOps.Cli/Services/SbomerClient.cs | 16 +- .../Services/VexObservationsClient.cs | 30 +- .../TraceparentHttpMessageHandler.cs | 1 + .../DualWrite/DualWriteAdvisoryStore.cs | 72 + .../Advisories/IPostgresAdvisoryStore.cs | 51 + .../Advisories/PostgresAdvisoryStore.cs | 301 +++ .../Conversion/AdvisoryConversionResult.cs | 56 + .../Conversion/AdvisoryConverter.cs | 659 +++++++ .../AdvisoryRepositoryTests.cs | 444 +++++ .../KevFlagRepositoryTests.cs | 274 +++ .../MergeEventRepositoryTests.cs | 288 +++ .../Parity/AdvisoryStoreParityTests.cs | 315 +++ .../Parity/DualBackendFixture.cs | 167 ++ .../Parity/PurlMatchingParityTests.cs | 349 ++++ .../Performance/AdvisoryPerformanceTests.cs | 412 ++++ .../SourceRepositoryTests.cs | 201 ++ .../SourceStateRepositoryTests.cs | 192 ++ .../DigestAggregationTests.cs | 544 ++++++ .../EscalationHandlingTests.cs | 469 +++++ .../NotificationDeliveryFlowTests.cs | 405 ++++ .../StellaOps.Policy.RiskProfile.csproj | 1 - .../Migration/MongoDocumentConverter.cs | 335 ++++ .../Migration/PolicyMigrator.cs | 467 +++++ .../PackVersioningWorkflowTests.cs | 281 +++ .../RiskProfileVersionHistoryTests.cs | 473 +++++ .../Internal/NodeEnvironmentScanner.cs | 17 +- .../NodeLanguageAnalyzer.cs | 49 +- .../Runtime/EntryTraceRuntimeReconciler.cs | 2 +- src/StellaOps.sln | 64 - .../Migrations/IMigrationRunner.cs | 181 ++ .../Migrations/MigrationCategory.cs | 88 + .../Migrations/MigrationServiceExtensions.cs | 383 ++++ .../Migrations/StartupMigrationHost.cs | 479 +++++ .../Migrations/MigrationCategoryTests.cs | 205 ++ .../Migrations/StartupMigrationHostTests.cs | 548 ++++++ .../TestMigrations/001_create_test_table.sql | 9 + .../TestMigrations/002_add_column.sql | 5 + .../TestMigrations/100_release_migration.sql | 5 + .../TestMigrations/S001_seed_data.sql | 7 + 301 files changed, 32810 insertions(+), 1128 deletions(-) create mode 100644 docs/adr/0001-postgresql-for-control-plane.md create mode 100644 docs/db/MIGRATION_STRATEGY.md create mode 100644 docs/implplan/BLOCKED_DEPENDENCY_TREE.md create mode 100644 docs/implplan/CLI_AUTH_MIGRATION_PLAN.md create mode 100644 docs/implplan/SPRINT_3408_0001_0001_postgres_migration_lifecycle.md create mode 100644 docs/router/13-Step.md create mode 100644 docs/router/14-Step.md create mode 100644 docs/router/15-Step.md create mode 100644 docs/router/16-Step.md create mode 100644 docs/router/17-Step.md create mode 100644 docs/router/18-Step.md create mode 100644 docs/router/19-Step.md create mode 100644 docs/router/20-Step.md create mode 100644 docs/router/21-Step.md create mode 100644 docs/router/22-Step.md create mode 100644 docs/router/23-Step.md create mode 100644 docs/router/24-Step.md create mode 100644 docs/router/25-Step.md create mode 100644 docs/router/26-Step.md create mode 100644 docs/router/27-Step.md create mode 100644 docs/router/28-Step.md create mode 100644 docs/router/29-Step.md create mode 100644 docs/router/SPRINT_7000_0001_0002_router_common.md create mode 100644 docs/router/SPRINT_7000_0002_0001_inmemory_transport.md create mode 100644 docs/router/SPRINT_7000_0003_0001_microservice_sdk_core.md create mode 100644 docs/router/SPRINT_7000_0003_0002_microservice_sdk_handlers.md create mode 100644 docs/router/SPRINT_7000_0004_0001_gateway_core.md create mode 100644 docs/router/SPRINT_7000_0004_0002_gateway_middleware.md create mode 100644 docs/router/SPRINT_7000_0004_0003_gateway_connections.md create mode 100644 docs/router/SPRINT_7000_0005_0001_heartbeat_health.md create mode 100644 docs/router/SPRINT_7000_0005_0002_routing_algorithm.md create mode 100644 docs/router/SPRINT_7000_0005_0003_cancellation.md create mode 100644 docs/router/SPRINT_7000_0005_0004_streaming.md create mode 100644 docs/router/SPRINT_7000_0005_0005_payload_limits.md create mode 100644 docs/router/SPRINT_7000_0006_0001_transport_tcp.md create mode 100644 docs/router/SPRINT_7000_0006_0002_transport_tls.md create mode 100644 docs/router/SPRINT_7000_0006_0003_transport_udp.md create mode 100644 docs/router/SPRINT_7000_0006_0004_transport_rabbitmq.md create mode 100644 docs/router/SPRINT_7000_0007_0001_router_config.md create mode 100644 docs/router/SPRINT_7000_0007_0002_microservice_yaml.md create mode 100644 docs/router/SPRINT_7000_0008_0001_authority_integration.md create mode 100644 docs/router/SPRINT_7000_0008_0002_source_generator.md create mode 100644 docs/router/SPRINT_7000_0009_0001_reference_example.md create mode 100644 docs/router/SPRINT_7000_0010_0001_migration.md create mode 100644 docs/router/SPRINT_INDEX.md create mode 100644 scripts/add_blocked_reference.py create mode 100644 src/Cli/StellaOps.Cli/Extensions/CommandLineExtensions.cs create mode 100644 src/Cli/StellaOps.Cli/Extensions/StellaOpsTokenClientExtensions.cs create mode 100644 src/Cli/StellaOps.Cli/Services/MigrationModuleRegistry.cs create mode 100644 src/Cli/StellaOps.Cli/Services/OfflineModeGuard.cs create mode 100644 src/Concelier/StellaOps.Concelier.WebService/DualWrite/DualWriteAdvisoryStore.cs create mode 100644 src/Concelier/__Libraries/StellaOps.Concelier.Storage.Postgres/Advisories/IPostgresAdvisoryStore.cs create mode 100644 src/Concelier/__Libraries/StellaOps.Concelier.Storage.Postgres/Advisories/PostgresAdvisoryStore.cs create mode 100644 src/Concelier/__Libraries/StellaOps.Concelier.Storage.Postgres/Conversion/AdvisoryConversionResult.cs create mode 100644 src/Concelier/__Libraries/StellaOps.Concelier.Storage.Postgres/Conversion/AdvisoryConverter.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/AdvisoryRepositoryTests.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/KevFlagRepositoryTests.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/MergeEventRepositoryTests.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/Parity/AdvisoryStoreParityTests.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/Parity/DualBackendFixture.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/Parity/PurlMatchingParityTests.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/Performance/AdvisoryPerformanceTests.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/SourceRepositoryTests.cs create mode 100644 src/Concelier/__Tests/StellaOps.Concelier.Storage.Postgres.Tests/SourceStateRepositoryTests.cs create mode 100644 src/Notify/__Tests/StellaOps.Notify.Storage.Postgres.Tests/DigestAggregationTests.cs create mode 100644 src/Notify/__Tests/StellaOps.Notify.Storage.Postgres.Tests/EscalationHandlingTests.cs create mode 100644 src/Notify/__Tests/StellaOps.Notify.Storage.Postgres.Tests/NotificationDeliveryFlowTests.cs create mode 100644 src/Policy/__Libraries/StellaOps.Policy.Storage.Postgres/Migration/MongoDocumentConverter.cs create mode 100644 src/Policy/__Libraries/StellaOps.Policy.Storage.Postgres/Migration/PolicyMigrator.cs create mode 100644 src/Policy/__Tests/StellaOps.Policy.Storage.Postgres.Tests/PackVersioningWorkflowTests.cs create mode 100644 src/Policy/__Tests/StellaOps.Policy.Storage.Postgres.Tests/RiskProfileVersionHistoryTests.cs create mode 100644 src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/IMigrationRunner.cs create mode 100644 src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/MigrationCategory.cs create mode 100644 src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/MigrationServiceExtensions.cs create mode 100644 src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/StartupMigrationHost.cs create mode 100644 src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/MigrationCategoryTests.cs create mode 100644 src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/StartupMigrationHostTests.cs create mode 100644 src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/TestMigrations/001_create_test_table.sql create mode 100644 src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/TestMigrations/002_add_column.sql create mode 100644 src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/TestMigrations/100_release_migration.sql create mode 100644 src/__Libraries/__Tests/StellaOps.Infrastructure.Postgres.Tests/Migrations/TestMigrations/S001_seed_data.sql diff --git a/.claude/settings.local.json b/.claude/settings.local.json index b3b53188f..d816abfd3 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -4,7 +4,23 @@ "Bash(dotnet build:*)", "Bash(dotnet restore:*)", "Bash(chmod:*)", - "Bash(cat:*)" + "Bash(cat:*)", + "Bash(dotnet nuget:*)", + "Bash(cd /mnt/c/dev/New\\ folder/git.stella-ops.org && python3:*)", + "Bash(cd:*)", + "Bash(grep:*)", + "Bash(src/Cli/StellaOps.Cli/Commands/CommandHandlers.cs )", + "Bash(src/Cli/StellaOps.Cli/Configuration/CliProfile.cs )", + "Bash(src/Cli/StellaOps.Cli/Configuration/GlobalOptions.cs )", + "Bash(src/Cli/StellaOps.Cli/Output/CliError.cs )", + "Bash(src/Cli/StellaOps.Cli/Services/BackendOperationsClient.cs )", + "Bash(src/Cli/StellaOps.Cli/Services/OrchestratorClient.cs )", + "Bash(src/Cli/StellaOps.Cli/Services/PromotionAssembler.cs )", + "Bash(src/Cli/StellaOps.Cli/Services/VexObservationsClient.cs )", + "Bash(src/Cli/StellaOps.Cli/Telemetry/TraceparentHttpMessageHandler.cs)", + "Bash(python3:*)", + "Bash(dotnet list:*)", + "WebSearch" ], "deny": [], "ask": [] diff --git a/NuGet.config b/NuGet.config index 69f6da972..c524de5f6 100644 --- a/NuGet.config +++ b/NuGet.config @@ -3,9 +3,17 @@ - + + + + + + + + + diff --git a/docs/07_HIGH_LEVEL_ARCHITECTURE.md b/docs/07_HIGH_LEVEL_ARCHITECTURE.md index e8d6f5b74..b0499a396 100755 --- a/docs/07_HIGH_LEVEL_ARCHITECTURE.md +++ b/docs/07_HIGH_LEVEL_ARCHITECTURE.md @@ -1,6 +1,6 @@ -# High‑Level Architecture — **Stella Ops** (Consolidated • 2025Q4) - -> **Want the 10-minute tour?** See [`high-level-architecture.md`](high-level-architecture.md); this file retains the exhaustive reference. +# High‑Level Architecture — **Stella Ops** (Consolidated • 2025Q4) + +> **Want the 10-minute tour?** See [`high-level-architecture.md`](high-level-architecture.md); this file retains the exhaustive reference. > **Purpose.** A complete, implementation‑ready map of Stella Ops: product vision, all runtime components, trust boundaries, tokens/licensing, control/data flows, storage, APIs, security, scale, DevOps, and verification logic. > **Scope.** This file **replaces** the separate `components.md`; all component details now live here. @@ -14,14 +14,14 @@ **Operating principles.** * **Scanner‑owned SBOMs.** We generate our own BOMs; we do not warehouse third‑party SBOM content (we can **link** to attested SBOMs). -* **Deterministic evidence.** Facts come from package DBs, installed metadata, linkers, and verified attestations; no fuzzy guessing in the core. -* **Per-layer caching.** Cache fragments by **layer digest** and compose image SBOMs via **CycloneDX BOM-Link** / **SPDX ExternalRef**. -* **Inventory vs Usage.** Always record the full **inventory** of what exists; separately present **usage** (entrypoint closure + loaded libs). -* **Backend decides.** PASS/FAIL is produced by **Policy** + **VEX** + **Advisories**. The scanner reports facts. -* **Attest or it didn’t happen.** Every export is signed as **in-toto/DSSE** and logged in **Rekor v2**. -* **Hybrid reachability attestations.** Every reachability graph ships with a graph-level DSSE (mandatory) plus optional edge-bundle DSSEs for runtime/init/contested edges; Policy/Signals consume graph DSSE as baseline and edge bundles for quarantine/disputes. -* **Sovereign-ready.** Cloud is used only for licensing and optional endorsement; everything else is first-party and self-hostable. -* **Competitive clarity.** Moats: deterministic replay, hybrid reachability proofs, lattice VEX, sovereign crypto, proof graph; see `docs/market/competitive-landscape.md`. +* **Deterministic evidence.** Facts come from package DBs, installed metadata, linkers, and verified attestations; no fuzzy guessing in the core. +* **Per-layer caching.** Cache fragments by **layer digest** and compose image SBOMs via **CycloneDX BOM-Link** / **SPDX ExternalRef**. +* **Inventory vs Usage.** Always record the full **inventory** of what exists; separately present **usage** (entrypoint closure + loaded libs). +* **Backend decides.** PASS/FAIL is produced by **Policy** + **VEX** + **Advisories**. The scanner reports facts. +* **Attest or it didn’t happen.** Every export is signed as **in-toto/DSSE** and logged in **Rekor v2**. +* **Hybrid reachability attestations.** Every reachability graph ships with a graph-level DSSE (mandatory) plus optional edge-bundle DSSEs for runtime/init/contested edges; Policy/Signals consume graph DSSE as baseline and edge bundles for quarantine/disputes. +* **Sovereign-ready.** Cloud is used only for licensing and optional endorsement; everything else is first-party and self-hostable. +* **Competitive clarity.** Moats: deterministic replay, hybrid reachability proofs, lattice VEX, sovereign crypto, proof graph; see `docs/market/competitive-landscape.md`. --- @@ -53,8 +53,9 @@ * **Fulcio** (Sigstore CA) — issues short‑lived signing certs (keyless). * **Rekor v2** (tile‑backed transparency log). -* **RustFS** — offline-first object store with deterministic REST API (S3/MinIO fallback available for legacy installs). -* **MongoDB** — catalog, advisories, VEX, scheduler, notify. +* **RustFS** — offline-first object store with deterministic REST API (S3/MinIO fallback available for legacy installs). +* **PostgreSQL** (≥15) — control-plane storage with per-module schema isolation (auth, vuln, vex, scheduler, notify, policy). See [Database Architecture](#database-architecture-postgresql). +* **MongoDB** (≥7) — legacy catalog support; being phased out in favor of PostgreSQL for control-plane domains. * **Queue** — Redis Streams / NATS / RabbitMQ (pluggable). * **OCI Registry** — must support **Referrers API** (discover SBOMs/signatures). @@ -85,7 +86,7 @@ flowchart LR ATT[Attestor\n(Rekor v2 submit/verify)] UI[Web UI (Angular)] Z[Zastava\n(Runtime Inspector/Enforcer)] - RFS[(RustFS object store)] + RFS[(RustFS object store)] MGO[(MongoDB)] QUE[(Queue/Streams)] end @@ -98,7 +99,7 @@ flowchart LR CLI -->|scan/build| SW SW -->|jobs| QUE QUE --> WK - WK --> RFS + WK --> RFS SW --> MGO CONC --> MGO EXC --> MGO @@ -229,13 +230,13 @@ LS --> IA: PoE (mTLS client cert or JWT with cnf=K_inst), CRL/OCSP/introspect --- -## 6) Storage & catalogs (RustFS/Mongo) - -**RustFS layout (default)** +## 6) Storage & catalogs (RustFS/PostgreSQL) -``` -rustfs://stellaops/ - layers//sbom.cdx.json.zst +**RustFS layout (default)** + +``` +rustfs://stellaops/ + layers//sbom.cdx.json.zst layers//sbom.spdx.json.zst images//inventory.cdx.pb images//usage.cdx.pb @@ -243,16 +244,62 @@ rustfs://stellaops/ attest/.dsse.json ``` -**Catalog (Mongo)** +### Database Architecture (PostgreSQL) -* `artifacts` (type/format/sha/size/rekor/ttl/immutable/refCount/createdAt) -* `images`, `layers`, `links`, `lifecycleRules` -* **Scheduler:** `schedules`, `runs`, `locks`, `impact_cursors` -* **Notify:** `rules`, `deliveries`, `channels`, `templates` +StellaOps uses PostgreSQL for all control-plane data with **per-module schema isolation**. Each module owns and manages only its own schema, ensuring clear ownership and independent migration lifecycles. + +**Schema topology:** + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ PostgreSQL Cluster │ +│ ┌─────────────────────────────────────────────────────────────┐│ +│ │ stellaops (database) ││ +│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ││ +│ │ │ auth │ │ vuln │ │ vex │ │scheduler│ ││ +│ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ ││ +│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ││ +│ │ │ notify │ │ policy │ │ audit │ ││ +│ │ └─────────┘ └─────────┘ └─────────┘ ││ +│ └─────────────────────────────────────────────────────────────┘│ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Schema ownership:** + +| Schema | Owner Module | Purpose | +|--------|--------------|---------| +| `auth` | Authority | Identity, authentication, authorization, licensing, sessions | +| `vuln` | Concelier | Vulnerability advisories, CVSS, affected packages, sources | +| `vex` | Excititor | VEX statements, graphs, observations, evidence, consensus | +| `scheduler` | Scheduler | Jobs, triggers, workers, locks, execution history | +| `notify` | Notify | Channels, templates, rules, deliveries, escalations | +| `policy` | Policy | Policy packs, rules, risk profiles, evaluations | +| `audit` | Shared | Cross-cutting audit log (optional) | + +**Key design principles:** + +1. **Module isolation** — Each module controls only its own schema. Cross-schema queries are rare and explicitly documented. +2. **Multi-tenancy** — Single database, single schema set, `tenant_id` column on all tenant-scoped tables with row-level security. +3. **Forward-only migrations** — No down migrations; fixes are applied as new forward migrations. +4. **Advisory lock coordination** — Startup migrations use `pg_try_advisory_lock(hashtext('schema_name'))` to prevent concurrent execution. +5. **Air-gap compatible** — All migrations embedded in assemblies, no external network dependencies. + +**Migration categories:** + +| Category | Prefix | Execution | Description | +|----------|--------|-----------|-------------| +| Startup (A) | `001-099` | Automatic at boot | Non-breaking DDL (CREATE IF NOT EXISTS, ADD COLUMN nullable) | +| Release (B) | `100-199` | Manual via CLI | Breaking changes (DROP, ALTER TYPE), require maintenance window | +| Seed | `S001-S999` | After schema | Reference data with ON CONFLICT DO NOTHING | +| Data (C) | `DM001-DM999` | Background job | Batched data transformations, resumable | + +**Detailed documentation:** See [`docs/db/`](db/README.md) for full specification, coding rules, and phase-by-phase conversion tasks. **Retention** -* RustFS applies retention via `X-RustFS-Retain-Seconds`; Scanner.WebService GC decrements `refCount` and deletes unreferenced metadata; S3/MinIO fallback retains native Object Lock when enabled. +* RustFS applies retention via `X-RustFS-Retain-Seconds`; Scanner.WebService GC decrements `refCount` and deletes unreferenced metadata; S3/MinIO fallback retains native Object Lock when enabled. +* PostgreSQL retention managed via time-based partitioning for high-volume tables (runs, execution_logs) with monthly partition drops. --- @@ -376,36 +423,36 @@ Binary header + purl table + roaring bitmaps; optional `usedByEntrypoint` flags * **Community** (public registry): throttled, non‑attesting. * **Authorized** (private registry): full speed, DSSE enabled. -* **Client update flow:** containers self‑verify signatures at boot; report version; **Signer** enforces `valid_release_year` / `max_version` from PoE before signing. -* **Compose skeleton:** +* **Client update flow:** containers self‑verify signatures at boot; report version; **Signer** enforces `valid_release_year` / `max_version` from PoE before signing. +* **Compose skeleton:** ```yaml services: - authority: { image: stellaops/authority } + authority: { image: stellaops/authority, depends_on: [postgres] } fulcio: { image: sigstore/fulcio } rekor: { image: sigstore/rekor-v2 } minio: { image: minio/minio, command: server /data --console-address ":9001" } - mongo: { image: mongo:7 } + postgres: { image: postgres:15-alpine, environment: { POSTGRES_DB: stellaops, POSTGRES_USER: stellaops } } signer: { image: stellaops/signer, depends_on: [authority, fulcio] } attestor: { image: stellaops/attestor, depends_on: [rekor, signer] } - scanner-web: { image: stellaops/scanner-web, depends_on: [mongo, minio, signer, attestor] } + scanner-web: { image: stellaops/scanner-web, depends_on: [postgres, minio, signer, attestor] } scanner-worker: { image: stellaops/scanner-worker, deploy: { replicas: 4 }, depends_on: [scanner-web] } - concelier: { image: stellaops/concelier-web, depends_on: [mongo] } - excititor: { image: stellaops/excititor-web, depends_on: [mongo] } - scheduler-web: { image: stellaops/scheduler-web, depends_on: [mongo] } + concelier: { image: stellaops/concelier-web, depends_on: [postgres] } + excititor: { image: stellaops/excititor-web, depends_on: [postgres] } + scheduler-web: { image: stellaops/scheduler-web, depends_on: [postgres] } scheduler-worker:{ image: stellaops/scheduler-worker, deploy: { replicas: 2 }, depends_on: [scheduler-web] } - notify-web: { image: stellaops/notify-web, depends_on: [mongo] } + notify-web: { image: stellaops/notify-web, depends_on: [postgres] } notify-worker: { image: stellaops/notify-worker, deploy: { replicas: 2 }, depends_on: [notify-web] } - ui: { image: stellaops/ui, depends_on: [scanner-web, concelier, excititor, scheduler-web, notify-web] } -``` - -* **Binary prerequisites (offline-first):** - - * Single curated NuGet location: `local-nugets/` holds the `.nupkg` feed (hashed in `manifest.json`) and the restore output (`local-nugets/packages`, configured via `NuGet.config`). - * Non-NuGet binaries (plugins/CLIs/tools) are catalogued with SHA-256 in `vendor/manifest.json`; air-gap bundles are registered in `offline/feeds/manifest.json`. - * CI guard: `scripts/verify-binaries.sh` blocks binaries outside approved roots; offline restores use `dotnet restore --source local-nugets` with `OFFLINE=1` (override via `ALLOW_REMOTE=1`). + ui: { image: stellaops/ui, depends_on: [scanner-web, concelier, excititor, scheduler-web, notify-web] } +``` -* **Backups:** Mongo dumps; RustFS snapshots (or S3 versioning when fallback driver is used); Rekor v2 DB snapshots; JWKS/Fulcio/KMS key rotation. +* **Binary prerequisites (offline-first):** + + * Single curated NuGet location: `local-nugets/` holds the `.nupkg` feed (hashed in `manifest.json`) and the restore output (`local-nugets/packages`, configured via `NuGet.config`). + * Non-NuGet binaries (plugins/CLIs/tools) are catalogued with SHA-256 in `vendor/manifest.json`; air-gap bundles are registered in `offline/feeds/manifest.json`. + * CI guard: `scripts/verify-binaries.sh` blocks binaries outside approved roots; offline restores use `dotnet restore --source local-nugets` with `OFFLINE=1` (override via `ALLOW_REMOTE=1`). + +* **Backups:** Mongo dumps; RustFS snapshots (or S3 versioning when fallback driver is used); Rekor v2 DB snapshots; JWKS/Fulcio/KMS key rotation. * **Ops runbooks:** Scheduler catch‑up after Concelier/Excititor recovery; connector key rotation (Slack/Teams/SMTP). * **SLOs & alerts:** lag between Concelier/Excititor export and first rescan verdict; delivery failure rates by channel. @@ -418,7 +465,7 @@ services: * **Notify metrics:** `notify.sent_total{channel}`, `notify.dropped_total{reason}`, `notify.digest_coalesced_total`, `notify.latency_ms`. * **Tracing:** per‑stage spans; correlation IDs across Scanner→Signer→Attestor and Concelier/Excititor→Scheduler→Scanner→Notify. * **Audit logs:** every signing records `license_id`, `image_digest`, `policy_digest`, and Rekor UUID; Scheduler records who scheduled what; Notify records where, when, and why messages were sent or deduped. -* **Compliance:** RustFS retention headers (or MinIO Object Lock when operating in S3 mode) keep immutable artifacts tamper‑resistant; reproducible outputs via policy digest + SBOM digest in predicate. +* **Compliance:** RustFS retention headers (or MinIO Object Lock when operating in S3 mode) keep immutable artifacts tamper‑resistant; reproducible outputs via policy digest + SBOM digest in predicate. --- diff --git a/docs/adr/0001-postgresql-for-control-plane.md b/docs/adr/0001-postgresql-for-control-plane.md new file mode 100644 index 000000000..3b5b34fab --- /dev/null +++ b/docs/adr/0001-postgresql-for-control-plane.md @@ -0,0 +1,207 @@ +# ADR-0001: PostgreSQL for Control-Plane Storage + +## Status +Accepted + +## Date +2025-12-04 + +## Authors +- Platform Team + +## Deciders +- Architecture Guild +- Platform Team + +## Context + +StellaOps control-plane services (Authority, Scheduler, Notify, Concelier/Excititor, Policy) require persistent storage for: + +- Identity and authorization data (users, roles, tokens, sessions) +- Job scheduling and execution state +- Notification rules, templates, and delivery tracking +- Vulnerability advisories and VEX statements +- Policy packs, rules, and evaluation history + +**Triggers for this decision:** + +1. **Licensing trust & ecosystem stability** — PostgreSQL is licensed under the permissive PostgreSQL License (similar to MIT/BSD), OSI-approved, with no vendor lock-in concerns. MongoDB's SSPL license (2018) is not OSI-approved and creates uncertainty for self-hosted/sovereign deployments. For a platform emphasizing sovereignty and auditability, database licensing must be beyond reproach. +2. **Schema complexity** — Control-plane domains have well-defined, relational schemas with referential integrity requirements (foreign keys, cascading deletes, constraints). +3. **Query patterns** — Complex joins, aggregations, and window functions are common (e.g., finding all images affected by a newly published CVE). +4. **ACID requirements** — Job scheduling, token issuance, and notification delivery require strong transactional guarantees. +5. **Multi-tenancy** — Row-level security (RLS) needed for tenant isolation without schema-per-tenant overhead. +6. **Migration tooling** — Need deterministic, forward-only migrations with advisory lock coordination for multi-instance deployments. +7. **Air-gap operation** — All schema and data must be embeddable in assemblies without external network dependencies. +8. **Auditability** — PostgreSQL's mature ecosystem includes proven audit logging, compliance tooling, and forensic capabilities trusted by regulated industries. + +## Decision + +**Adopt PostgreSQL (≥15) as the primary database for all StellaOps control-plane domains.** + +Key architectural choices: + +### 1. Per-Module Schema Isolation + +Each module owns exactly one PostgreSQL schema: + +| Schema | Owner | Description | +|--------|-------|-------------| +| `auth` | Authority | Identity, authentication, authorization, licensing | +| `vuln` | Concelier | Vulnerability advisories, sources, affected packages | +| `vex` | Excititor | VEX statements, graphs, observations, consensus | +| `scheduler` | Scheduler | Jobs, triggers, workers, execution history | +| `notify` | Notify | Channels, templates, rules, deliveries | +| `policy` | Policy | Policy packs, rules, risk profiles | +| `audit` | Shared | Cross-cutting audit log (optional) | + +**Rationale:** +- Clear ownership boundaries +- Independent migration lifecycles +- Schema-level access control +- Simplified testing and development + +### 2. Multi-Tenancy via tenant_id Column + +Single database, single schema set, `tenant_id` column on all tenant-scoped tables. + +```sql +-- Session-level tenant context +SET app.tenant_id = ''; + +-- Row-level security (defense in depth) +CREATE POLICY tenant_isolation ON + USING (tenant_id = current_setting('app.tenant_id')::uuid); +``` + +**Rationale:** +- Simplest operational model +- Shared connection pooling +- Easy cross-tenant queries for admin operations +- Composite indexes on `(tenant_id, ...)` for query performance + +### 3. Forward-Only Migrations with Advisory Locks + +Migrations are embedded in assemblies and executed at startup with PostgreSQL advisory locks: + +```sql +SELECT pg_try_advisory_lock(hashtext('auth')); -- Per-schema lock +``` + +**Migration categories:** +- **Startup (001-099)**: Automatic, non-breaking DDL +- **Release (100-199)**: Manual CLI, breaking changes +- **Seed (S001-S999)**: Reference data +- **Data (DM001-DM999)**: Batched background jobs + +**Rationale:** +- No down migrations needed (forward-only with fix-forward) +- Advisory locks prevent concurrent migrations across instances +- Checksum validation catches unauthorized modifications +- Air-gap compatible (no external migration service needed) + +### 4. RustFS for Binary Artifacts + +PostgreSQL stores metadata and indexes; RustFS stores binary artifacts (SBOMs, attestations, reports): + +``` +PostgreSQL: Schema definitions, relationships, indexes, audit trails +RustFS: sbom.cdx.json.zst, inventory.cdx.pb, bom-index.bin, *.dsse.json +``` + +**Rationale:** +- Right tool for each job +- PostgreSQL excellent for structured queries +- Object storage better for large binary blobs +- Clear separation of concerns + +## Consequences + +### Positive + +1. **Licensing trust** — PostgreSQL License is permissive, OSI-approved, and universally accepted. No vendor lock-in, no license ambiguity for sovereign deployments. Trusted by governments, regulated industries, and security-conscious organizations. +2. **Ecosystem stability** — 30+ years of development, included in all major distributions, no license rug-pulls. Community governance ensures long-term trust. +3. **Relational integrity** — Foreign keys, constraints, and transactions ensure data consistency. +4. **Query flexibility** — Complex joins, CTEs, window functions, and full-text search available natively. +5. **Operational maturity** — Well-understood backup, replication, and monitoring ecosystem. +6. **Row-level security** — Built-in multi-tenancy support without application-layer hacks. +7. **Schema evolution** — Mature migration tooling with online DDL capabilities. +8. **Performance** — Excellent query planning, connection pooling (PgBouncer), and indexing options. +9. **Auditability** — Proven audit logging extensions (pgAudit), compliance certifications, forensic tooling. + +### Negative + +1. **Schema rigidity** — Changes require migrations; less flexible than document stores for rapidly evolving schemas. +2. **Operational overhead** — Requires PostgreSQL expertise for tuning, vacuuming, and monitoring. +3. **Connection limits** — Need PgBouncer for high-concurrency workloads. + +### Follow-up Actions + +- [x] Create `docs/db/` documentation directory with specification, rules, and conversion plan +- [x] Define migration infrastructure in `StellaOps.Infrastructure.Postgres` +- [ ] Complete phased conversion from MongoDB per `docs/db/tasks/PHASE_*.md` +- [ ] Update deployment guides for PostgreSQL requirements +- [ ] Add PostgreSQL health checks to all control-plane services + +### Rollback Criteria + +Revert to MongoDB (or hybrid) if: +- Migration performance unacceptable (> 60s startup time) +- Query complexity exceeds PostgreSQL capabilities +- Operational burden exceeds team capacity + +## Alternatives Considered + +### Option A: Continue with MongoDB + +**Pros:** +- Already in use for some components +- Flexible schema +- Good for document-centric workloads + +**Cons:** +- **Licensing uncertainty** — MongoDB's SSPL (Server Side Public License, 2018) is not OSI-approved. Creates legal ambiguity for sovereign/self-hosted deployments, especially in regulated industries and government contexts where license provenance matters. +- **Ecosystem trust erosion** — SSPL switch caused major distributions (Debian, Fedora, RHEL) to drop MongoDB packages. Sovereign customers may have policies against non-OSI licenses. +- No referential integrity (app-enforced) +- Limited join capabilities +- Multi-tenancy requires additional logic +- No row-level security +- Less mature migration tooling + +**Rejected because:** Licensing uncertainty is incompatible with StellaOps' sovereign-first positioning. Control-plane domains are also fundamentally relational with strong consistency requirements. + +### Option B: Hybrid (PostgreSQL + MongoDB) + +**Pros:** +- Use each database for appropriate workloads +- Gradual migration possible + +**Cons:** +- Two databases to operate and monitor +- Complex deployment +- Cross-database consistency challenges +- Higher operational burden + +**Rejected because:** Unified PostgreSQL approach is simpler and sufficient for all control-plane needs. + +### Option C: CockroachDB / YugabyteDB + +**Pros:** +- PostgreSQL-compatible +- Built-in horizontal scaling +- Multi-region capabilities + +**Cons:** +- Additional operational complexity +- Less mature than PostgreSQL +- Overkill for current scale +- Air-gap deployment challenges + +**Rejected because:** PostgreSQL provides sufficient scale and simpler operations for current requirements. Can revisit if horizontal scaling becomes necessary. + +## References + +- [`docs/db/README.md`](../db/README.md) — Database documentation index +- [`docs/db/SPECIFICATION.md`](../db/SPECIFICATION.md) — Schema design specification +- [`docs/db/MIGRATION_STRATEGY.md`](../db/MIGRATION_STRATEGY.md) — Migration execution strategy +- [`docs/db/RULES.md`](../db/RULES.md) — Database coding rules +- [`docs/07_HIGH_LEVEL_ARCHITECTURE.md`](../07_HIGH_LEVEL_ARCHITECTURE.md) — High-level architecture overview diff --git a/docs/adr/index.md b/docs/adr/index.md index bbe4c9b6e..08007d4ec 100644 --- a/docs/adr/index.md +++ b/docs/adr/index.md @@ -34,8 +34,15 @@ Small, module-local refactors that do not modify public behaviour can live in co - [ ] Consequences call out migration or rollback steps. - [ ] Announcement posted to Docs Guild updates (or sprint log). +## ADR Index + +| ADR | Title | Status | Date | +|-----|-------|--------|------| +| [0001](./0001-postgresql-for-control-plane.md) | PostgreSQL for Control-Plane Storage | Accepted | 2025-12-04 | + ## Related resources - [Docs Guild Task Board](../TASKS.md) - [High-Level Architecture Overview](../07_HIGH_LEVEL_ARCHITECTURE.md) +- [Database Documentation](../db/README.md) - [Coding Standards](../18_CODING_STANDARDS.md) - [Release Engineering Playbook](../13_RELEASE_ENGINEERING_PLAYBOOK.md) diff --git a/docs/db/MIGRATION_STRATEGY.md b/docs/db/MIGRATION_STRATEGY.md new file mode 100644 index 000000000..17354100d --- /dev/null +++ b/docs/db/MIGRATION_STRATEGY.md @@ -0,0 +1,498 @@ +# PostgreSQL Migration Strategy + +**Version:** 1.0 +**Last Updated:** 2025-12-03 +**Status:** Active + +## Overview + +This document defines the migration strategy for StellaOps PostgreSQL databases. It covers initial setup, per-release migrations, multi-instance coordination, and air-gapped operation. + +## Principles + +1. **Forward-Only**: No down migrations. Fixes are applied as new forward migrations. +2. **Idempotent**: All migrations must be safe to re-run (use `IF NOT EXISTS`, `ON CONFLICT DO NOTHING`). +3. **Deterministic**: Same input produces identical schema state across environments. +4. **Air-Gap Compatible**: All migrations embedded in assemblies, no external dependencies. +5. **Zero-Downtime**: Non-breaking migrations run at startup; breaking changes require coordination. + +## Migration Categories + +### Category A: Startup Migrations (Automatic) + +Run automatically when application starts. Must complete within 60 seconds. + +**Allowed Operations:** +- `CREATE SCHEMA IF NOT EXISTS` +- `CREATE TABLE IF NOT EXISTS` +- `CREATE INDEX IF NOT EXISTS` +- `CREATE INDEX CONCURRENTLY` (non-blocking) +- `ALTER TABLE ADD COLUMN` (nullable or with default) +- `CREATE TYPE ... IF NOT EXISTS` (enums) +- Adding new enum values (`ALTER TYPE ... ADD VALUE IF NOT EXISTS`) +- Insert seed data with `ON CONFLICT DO NOTHING` + +**Forbidden Operations:** +- `DROP TABLE/COLUMN/INDEX` +- `ALTER TABLE DROP COLUMN` +- `ALTER TABLE ALTER COLUMN TYPE` +- `TRUNCATE` +- Large data migrations (> 10,000 rows affected) +- Any operation requiring `ACCESS EXCLUSIVE` lock for extended periods + +### Category B: Release Migrations (Manual/CLI) + +Require explicit execution via CLI before deployment. Used for breaking changes. + +**Typical Operations:** +- Dropping deprecated columns/tables +- Column type changes +- Large data backfills +- Index rebuilds +- Table renames +- Constraint modifications + +### Category C: Data Migrations (Batched) + +Long-running data transformations that run as background jobs. + +**Characteristics:** +- Batched processing (1000-10000 rows per batch) +- Resumable after interruption +- Progress tracking +- Can run alongside application + +## Migration File Structure + +``` +src//__Libraries/StellaOps..Storage.Postgres/ +├── Migrations/ +│ ├── 001_initial_schema.sql # Category A +│ ├── 002_add_audit_columns.sql # Category A +│ ├── 003_add_search_index.sql # Category A +│ └── 100_drop_legacy_columns.sql # Category B (100+ = manual) +├── Seeds/ +│ ├── 001_default_roles.sql # Seed data +│ └── 002_builtin_policies.sql # Seed data +└── DataMigrations/ + └── DM001_BackfillTenantIds.cs # Category C (code-based) +``` + +### Naming Convention + +| Prefix | Category | Description | +|--------|----------|-------------| +| `001-099` | A (Startup) | Automatic, non-breaking | +| `100-199` | B (Release) | Manual, breaking changes | +| `200-299` | B (Release) | Major version migrations | +| `S001-S999` | Seed | Reference data | +| `DM001-DM999` | C (Data) | Batched data migrations | + +## Execution Flow + +### Application Startup + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Application Startup │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 1. Acquire Advisory Lock (pg_try_advisory_lock) │ +│ Key: hash of schema name │ +│ If lock fails: wait up to 120s, then fail startup │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 2. Create schema_migrations table if not exists │ +│ Columns: migration_name, applied_at, checksum, category │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 3. Load embedded migrations (001-099 only) │ +│ - Sort by name │ +│ - Compute checksums │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 4. Compare with applied migrations │ +│ - Detect checksum mismatches (FATAL ERROR) │ +│ - Identify pending migrations │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 5. Check for pending Category B migrations │ +│ - If any 100+ migrations are pending: FAIL STARTUP │ +│ - Log: "Run 'stellaops migrate' before deployment" │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 6. Execute pending Category A migrations │ +│ - Each in transaction │ +│ - Record in schema_migrations │ +│ - Log timing │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 7. Execute seed data (if not already applied) │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 8. Release Advisory Lock │ +└─────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ 9. Continue Application Startup │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Release Migration (CLI) + +```bash +# Before deployment - run breaking migrations +stellaops system migrations-run --module Authority --category release + +# Verify migration state +stellaops system migrations-status --module Authority + +# Dry run (show what would be executed) +stellaops system migrations-run --module Authority --dry-run +``` + +## Multi-Instance Coordination + +### Advisory Locks + +Each module uses a unique advisory lock key derived from its schema name: + +```sql +-- Lock key calculation +SELECT pg_try_advisory_lock(hashtext('auth')); -- Authority +SELECT pg_try_advisory_lock(hashtext('scheduler')); -- Scheduler +SELECT pg_try_advisory_lock(hashtext('vuln')); -- Concelier +SELECT pg_try_advisory_lock(hashtext('policy')); -- Policy +SELECT pg_try_advisory_lock(hashtext('notify')); -- Notify +``` + +### Race Condition Handling + +``` +Instance A Instance B + │ │ + ├─ Acquire lock (success) ──► │ + │ ├─ Acquire lock (BLOCKED) + ├─ Run migrations │ Wait up to 120s + │ │ + ├─ Release lock ────────────► │ + │ ├─ Acquire lock (success) + │ ├─ Check migrations (none pending) + │ ├─ Release lock + │ │ + ▼ ▼ + Running Running +``` + +## Schema Migrations Table + +Each schema maintains its own migration history: + +```sql +CREATE TABLE IF NOT EXISTS {schema}.schema_migrations ( + migration_name TEXT PRIMARY KEY, + category TEXT NOT NULL DEFAULT 'startup', + checksum TEXT NOT NULL, + applied_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + applied_by TEXT, + duration_ms INT, + + CONSTRAINT valid_category CHECK (category IN ('startup', 'release', 'seed', 'data')) +); + +CREATE INDEX IF NOT EXISTS idx_schema_migrations_applied_at + ON {schema}.schema_migrations(applied_at DESC); +``` + +## Module-Specific Schemas + +| Module | Schema | Lock Key | Tables | +|--------|--------|----------|--------| +| Authority | `auth` | `hashtext('auth')` | tenants, users, roles, tokens, sessions | +| Scheduler | `scheduler` | `hashtext('scheduler')` | jobs, triggers, workers, locks | +| Concelier | `vuln` | `hashtext('vuln')` | advisories, affected, aliases, sources | +| Policy | `policy` | `hashtext('policy')` | packs, versions, rules, evaluations | +| Notify | `notify` | `hashtext('notify')` | templates, channels, deliveries | +| Excititor | `vex` | `hashtext('vex')` | statements, documents, products | + +## Release Workflow + +### Pre-Deployment + +```bash +# 1. Review pending migrations +stellaops system migrations-status --module all + +# 2. Backup database (if required) +pg_dump -Fc stellaops > backup_$(date +%Y%m%d).dump + +# 3. Run release migrations in maintenance window +stellaops system migrations-run --category release --module all + +# 4. Verify schema state +stellaops system migrations-verify --module all +``` + +### Deployment + +1. Deploy new application version +2. Application startup runs Category A migrations automatically +3. Health checks pass after migrations complete + +### Post-Deployment + +```bash +# Check migration status +stellaops system migrations-status --module all + +# Run any data migrations (background) +stellaops system migrations-run --category data --module all +``` + +## Rollback Strategy + +Since we use forward-only migrations, rollback is achieved through: + +1. **Fix-Forward**: Deploy a new migration that reverses the problematic change +2. **Blue/Green Deployment**: Switch back to previous version (requires backward-compatible migrations) +3. **Point-in-Time Recovery**: Restore from backup (last resort) + +### Backward Compatibility Window + +For zero-downtime deployments, migrations must be backward compatible for N-1 version: + +``` +Version N: Adds new nullable column 'status_v2' +Version N+1: Application uses 'status_v2', keeps 'status' populated +Version N+2: Migration removes 'status' column (Category B) +``` + +## Air-Gapped Operation + +All migrations are embedded as assembly resources: + +```xml + + + + + +``` + +No network access required during migration execution. + +## Monitoring & Observability + +### Metrics + +| Metric | Type | Description | +|--------|------|-------------| +| `stellaops_migration_duration_seconds` | Histogram | Time to run migration | +| `stellaops_migration_pending_count` | Gauge | Number of pending migrations | +| `stellaops_migration_applied_total` | Counter | Total migrations applied | +| `stellaops_migration_failed_total` | Counter | Total migration failures | + +### Logging + +``` +[INF] Migration: Acquiring lock for schema 'auth' +[INF] Migration: Lock acquired, checking pending migrations +[INF] Migration: 2 pending migrations found +[INF] Migration: Applying 003_add_audit_columns.sql (checksum: a1b2c3...) +[INF] Migration: 003_add_audit_columns.sql completed in 245ms +[INF] Migration: Applying 004_add_search_index.sql (checksum: d4e5f6...) +[INF] Migration: 004_add_search_index.sql completed in 1823ms +[INF] Migration: All migrations applied, releasing lock +``` + +### Alerts + +- Migration lock held > 5 minutes +- Migration failure +- Checksum mismatch detected +- Pending Category B migrations blocking startup + +## Development Workflow + +### Creating a New Migration + +```bash +# 1. Create migration file +touch src/Authority/__Libraries/StellaOps.Authority.Storage.Postgres/Migrations/005_add_mfa_columns.sql + +# 2. Write idempotent SQL +cat > 005_add_mfa_columns.sql << 'EOF' +-- Migration: 005_add_mfa_columns +-- Category: startup +-- Description: Add MFA support columns to users table + +ALTER TABLE auth.users ADD COLUMN IF NOT EXISTS mfa_enabled BOOLEAN NOT NULL DEFAULT FALSE; +ALTER TABLE auth.users ADD COLUMN IF NOT EXISTS mfa_secret TEXT; +ALTER TABLE auth.users ADD COLUMN IF NOT EXISTS mfa_backup_codes TEXT[]; + +CREATE INDEX IF NOT EXISTS idx_users_mfa_enabled ON auth.users(mfa_enabled) WHERE mfa_enabled = TRUE; +EOF + +# 3. Test locally +dotnet run --project src/Authority/StellaOps.Authority.WebService + +# 4. Verify migration applied +stellaops system migrations-status --module Authority +``` + +### Testing Migrations + +```bash +# Run integration tests with migrations +dotnet test --filter "Category=Migration" + +# Test idempotency (run twice) +stellaops system migrations-run --module Authority +stellaops system migrations-run --module Authority # Should be no-op +``` + +## Troubleshooting + +### Lock Timeout + +``` +ERROR: Could not acquire migration lock within 120 seconds +``` + +**Cause**: Another instance is running migrations or crashed while holding lock. + +**Resolution**: +```sql +-- Check active locks +SELECT * FROM pg_locks WHERE locktype = 'advisory'; + +-- Force release (use with caution) +SELECT pg_advisory_unlock_all(); +``` + +### Checksum Mismatch + +``` +ERROR: Migration checksum mismatch for '003_add_audit_columns.sql' + Expected: a1b2c3d4e5f6... + Found: x9y8z7w6v5u4... +``` + +**Cause**: Migration file was modified after being applied. + +**Resolution**: +1. Never modify applied migrations +2. If intentional, update checksum manually in `schema_migrations` +3. Create new migration with fix instead + +### Pending Release Migrations + +``` +ERROR: Cannot start application - pending release migrations require manual execution + Pending: 100_drop_legacy_columns.sql + Run: stellaops system migrations-run --module Authority --category release +``` + +**Resolution**: Run CLI migration command before deployment. + +## Integration Guide + +### Adding Startup Migrations to a Module + +```csharp +// In Program.cs or Startup.cs +using StellaOps.Infrastructure.Postgres.Migrations; + +// Option 1: Using PostgresOptions +services.AddStartupMigrations( + schemaName: "auth", + moduleName: "Authority", + migrationsAssembly: typeof(AuthorityDataSource).Assembly, + configureOptions: options => + { + options.LockTimeoutSeconds = 120; + options.FailOnPendingReleaseMigrations = true; + }); + +// Option 2: Using custom options type +services.AddStartupMigrations( + schemaName: "auth", + moduleName: "Authority", + migrationsAssembly: typeof(AuthorityDataSource).Assembly, + connectionStringSelector: opts => opts.Storage.ConnectionString); + +// Add migration status service for health checks +services.AddMigrationStatus( + schemaName: "auth", + moduleName: "Authority", + migrationsAssembly: typeof(AuthorityDataSource).Assembly, + connectionStringSelector: opts => opts.ConnectionString); +``` + +### Embedding Migrations in Assembly + +```xml + + + + + +``` + +### Health Check Integration + +```csharp +// Add migration status to health checks +services.AddHealthChecks() + .AddCheck("migrations", async (cancellationToken) => + { + var status = await migrationStatusService.GetStatusAsync(cancellationToken); + + if (status.HasBlockingIssues) + { + return HealthCheckResult.Unhealthy( + $"Pending release migrations: {status.PendingReleaseCount}, " + + $"Checksum errors: {status.ChecksumErrors.Count}"); + } + + if (status.PendingStartupCount > 0) + { + return HealthCheckResult.Degraded( + $"Pending startup migrations: {status.PendingStartupCount}"); + } + + return HealthCheckResult.Healthy($"Applied: {status.AppliedCount}"); + }); +``` + +## Implementation Files + +| File | Description | +|------|-------------| +| `src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/MigrationRunner.cs` | Core migration execution logic | +| `src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/MigrationCategory.cs` | Migration category enum and helpers | +| `src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/StartupMigrationHost.cs` | IHostedService for automatic migrations | +| `src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations/MigrationServiceExtensions.cs` | DI registration extensions | + +## Reference + +- [PostgreSQL Advisory Locks](https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS) +- [Zero-Downtime Migrations](https://docs.stellaops.org/operations/migrations) +- [StellaOps CLI Reference](../09_API_CLI_REFERENCE.md) diff --git a/docs/db/README.md b/docs/db/README.md index 127e22270..47d70e129 100644 --- a/docs/db/README.md +++ b/docs/db/README.md @@ -2,6 +2,8 @@ This directory contains all documentation related to the StellaOps database architecture, including the MongoDB to PostgreSQL conversion project. +> **ADR Reference:** See [ADR-0001: PostgreSQL for Control-Plane Storage](../adr/0001-postgresql-for-control-plane.md) for the architectural decision rationale. + ## Document Index | Document | Purpose | diff --git a/docs/implplan/BLOCKED_DEPENDENCY_TREE.md b/docs/implplan/BLOCKED_DEPENDENCY_TREE.md new file mode 100644 index 000000000..48a4b3fa5 --- /dev/null +++ b/docs/implplan/BLOCKED_DEPENDENCY_TREE.md @@ -0,0 +1,502 @@ +# BLOCKED Tasks Dependency Tree + +> **Last Updated:** 2025-12-04 +> **Purpose:** This document maps all BLOCKED tasks and their root causes to help teams prioritize unblocking work. + +## How to Use This Document + +Before starting work on any BLOCKED task, check this tree to understand: +1. What is the **root blocker** (external dependency, missing spec, staffing, etc.) +2. What **chain of tasks** depends on it +3. Which team/guild owns the root blocker + +--- + +## Legend + +- **Root Blocker** — External/system cause (missing spec, staffing, disk space, etc.) +- **Chained Blocked** — Blocked by another BLOCKED task +- **Module** — Module/guild name + +--- + +## 1. SIGNALS & RUNTIME FACTS (SGSI0101) — Critical Path + +**Root Blocker:** `PREP-SIGNALS-24-002` (CAS promotion pending) + +``` +PREP-SIGNALS-24-002 (CAS promotion pending) + +-- 24-002: Surface cache availability + +-- 24-003: Runtime facts ingestion + provenance enrichment + +-- 24-004: Authority scopes + 24-003 + +-- 24-005: 24-004 scoring outputs +``` + +**Root Blocker:** `SGSI0101 provenance feed/contract pending` + +``` +SGSI0101 provenance feed/contract pending + +-- 56-001: Telemetry provenance + +-- 401-004: Replay Core (awaiting runtime facts + GAP-REP-004) +``` + +**Impact:** 6+ tasks in Signals, Telemetry, Replay Core guilds + +**To Unblock:** Deliver CAS promotion and SGSI0101 provenance contract + +--- + +## 2. API GOVERNANCE (APIG0101) — DevPortal & SDK Chain + +**Root Blocker:** `APIG0101 outputs` (API baseline missing) + +``` +APIG0101 outputs (API baseline) + +-- 62-001: DevPortal API baseline + | +-- 62-002: Blocked until 62-001 + | +-- 63-001: Platform integration + | +-- 63-002: SDK Generator integration + | + +-- 63-003: SDK Generator (APIG0101 outputs) + +-- 63-004: SDK Generator outstanding +``` + +**Impact:** 6 tasks in DevPortal + SDK Generator guilds + +**To Unblock:** Deliver APIG0101 API baseline outputs + +--- + +## 3. VEX LENS CHAIN (30-00x Series) + +**Root Blocker:** `VEX normalization + issuer directory + API governance specs` + +``` +VEX normalization + issuer directory + API governance specs + +-- 30-001: VEX Lens base + +-- 30-002 + +-- 30-003 (Issuer Directory) + +-- 30-004 (Policy) + +-- 30-005 + +-- 30-006 (Findings Ledger) + +-- 30-007 + +-- 30-008 (Policy) + +-- 30-009 (Observability) + +-- 30-010 (QA) + +-- 30-011 (DevOps) +``` + +**Impact:** 11 tasks — full VEX Lens series + +**To Unblock:** Publish VEX normalization spec, issuer directory contract, and API governance specs + +--- + +## 4. DEPLOYMENT CHAIN (44-xxx to 45-xxx) + +**Root Blocker:** `Upstream module releases` (service list/version pins) + +``` +Upstream module releases (service list/version pins) + +-- 44-001: Compose deployment base + | +-- 44-002 + | +-- 44-003 + | +-- 45-001 + | +-- 45-002 (Security) + | +-- 45-003 (Observability) + | + +-- COMPOSE-44-001 (parallel blocker) +``` + +**Impact:** 7 tasks in Deployment Guild + +**To Unblock:** Publish consolidated service list and version pins from upstream modules + +--- + +## 5. AIRGAP ECOSYSTEM + +### 5.1 Controller Chain + +**Root Blocker:** `Disk full` (workspace cleanup needed) + +``` +Disk full (workspace cleanup needed) + +-- AIRGAP-CTL-57-001: Startup diagnostics + +-- AIRGAP-CTL-57-002: Seal/unseal telemetry + +-- AIRGAP-CTL-58-001: Time anchor persistence +``` + +### 5.2 Importer Chain + +**Root Blocker:** `Disk space + controller telemetry` + +``` +Disk space + controller telemetry + +-- AIRGAP-IMP-57-002: Object-store loader + +-- AIRGAP-IMP-58-001: Import API + CLI + +-- AIRGAP-IMP-58-002: Timeline events +``` + +### 5.3 Time Chain + +**Root Blocker:** `Controller telemetry + disk space` + +``` +Controller telemetry + disk space + +-- AIRGAP-TIME-57-002: Time anchor telemetry + +-- AIRGAP-TIME-58-001: Drift baseline + +-- AIRGAP-TIME-58-002: Staleness notifications +``` + +### 5.4 CLI AirGap Chain + +**Root Blocker:** `Mirror bundle contract/spec` not available + +``` +Mirror bundle contract/spec not available + +-- CLI-AIRGAP-56-001: stella mirror create + +-- CLI-AIRGAP-56-002: Telemetry sealed mode + +-- CLI-AIRGAP-57-001: stella airgap import + +-- CLI-AIRGAP-57-002: stella airgap seal + +-- CLI-AIRGAP-58-001: stella airgap export evidence +``` + +### 5.5 Docs AirGap + +**Root Blocker:** `CLI airgap contract` (CLI-AIRGAP-56/57) + +``` +CLI airgap contract (CLI-AIRGAP-56/57) + +-- AIRGAP-57-003: CLI & ops inputs + +-- AIRGAP-57-004: Ops Guild +``` + +**Impact:** 17+ tasks in AirGap ecosystem + +**To Unblock:** +1. Clean up disk space +2. Publish mirror bundle contract/spec +3. Complete CLI-AIRGAP-56-001 + +--- + +## 6. CLI ATTESTOR CHAIN + +**Root Blocker:** `Scanner analyzer compile failures + attestor SDK transport contract` + +``` +Scanner analyzer compile failures + attestor SDK transport contract + +-- CLI-ATTEST-73-001: stella attest sign + +-- CLI-ATTEST-73-002: stella attest verify + +-- CLI-ATTEST-74-001: stella attest list + +-- CLI-ATTEST-74-002: stella attest fetch +``` + +**Impact:** 4 tasks in CLI Attestor Guild + +**To Unblock:** Fix scanner analyzer compile issues; publish attestor SDK transport contract + +--- + +## 7. TASK RUNNER CHAINS + +### 7.1 AirGap + +**Root Blocker:** `TASKRUN-AIRGAP-56-002` + +``` +TASKRUN-AIRGAP-56-002 + +-- TASKRUN-AIRGAP-57-001: Sealed environment check + +-- TASKRUN-AIRGAP-58-001: Evidence bundles +``` + +### 7.2 OAS Chain + +**Root Blocker:** `TASKRUN-41-001` (DONE - chain should unblock) + +``` +TASKRUN-41-001 (DONE) + +-- TASKRUN-OAS-61-001: Task Runner OAS docs + +-- TASKRUN-OAS-61-002: OpenAPI well-known + +-- TASKRUN-OAS-62-001: SDK examples + +-- TASKRUN-OAS-63-001: Deprecation handling +``` + +### 7.3 Observability Chain + +**Root Blocker:** `Timeline event schema + evidence-pointer contract` + +``` +Timeline event schema + evidence-pointer contract + +-- TASKRUN-OBS-52-001: Timeline events + +-- TASKRUN-OBS-53-001: Evidence locker snapshots + +-- TASKRUN-OBS-54-001: DSSE attestations + | +-- TASKRUN-OBS-55-001: Incident mode + +-- TASKRUN-TEN-48-001: Tenant context +``` + +**Impact:** 10+ tasks in Task Runner Guild + +**To Unblock:** Publish timeline event schema and evidence-pointer contract + +--- + +## 8. SCANNER CHAINS + +**Root Blocker:** `PHP analyzer bootstrap spec/fixtures` + +``` +PHP analyzer bootstrap spec/fixtures (composer/VFS schema) + +-- SCANNER-ANALYZERS-PHP-27-001 +``` + +**Root Blocker:** `18-503/504/505/506 outputs` (EntryTrace baseline) + +``` +18-503/504/505/506 outputs (EntryTrace baseline) + +-- SCANNER-ENTRYTRACE-18-508 +``` + +**Root Blocker:** `Task definition/contract missing` + +``` +Task definition/contract missing + +-- SCANNER-SURFACE-01 +``` + +**Root Blocker:** `SCANNER-ANALYZERS-JAVA-21-007` + +``` +SCANNER-ANALYZERS-JAVA-21-007 + +-- ANALYZERS-JAVA-21-008 +``` + +**Root Blocker:** `Local dotnet tests hanging` + +``` +SCANNER-ANALYZERS-LANG-10-309 (DONE, but local tests hanging) + +-- ANALYZERS-LANG-11-001 +``` + +**Impact:** 5 tasks in Scanner Guild + +**To Unblock:** +1. Publish PHP analyzer bootstrap spec +2. Complete EntryTrace 18-503/504/505/506 +3. Define SCANNER-SURFACE-01 contract +4. Complete JAVA-21-007 +5. Fix local dotnet test environment + +--- + +## 8.1 CLI COMPILE FAILURES (Detailed Analysis) + +> **Analysis Date:** 2025-12-04 +> **Status:** ✅ **RESOLVED** (2025-12-04) +> **Resolution:** See `docs/implplan/CLI_AUTH_MIGRATION_PLAN.md` + +The CLI (`src/Cli/StellaOps.Cli`) had significant API drift from its dependencies. This has been resolved. + +### Remediation Summary (All Fixed) + +| Library | Issue | Status | +|---------|-------|--------| +| `StellaOps.Auth.Client` | `IStellaOpsTokenClient` interface changed | ✅ **FIXED** - Extension methods created | +| `StellaOps.Cli.Output` | `CliError` constructor change | ✅ **FIXED** | +| `System.CommandLine` | API changes in 2.0.0-beta5+ | ✅ **FIXED** | +| `Spectre.Console` | `Table.AddRow` signature change | ✅ **FIXED** | +| `BackendOperationsClient` | `CreateFailureDetailsAsync` return type | ✅ **FIXED** | +| `CliProfile` | Class→Record conversion | ✅ **FIXED** | +| `X509Certificate2` | Missing using directive | ✅ **FIXED** | +| `StellaOps.PolicyDsl` | `PolicyIssue` properties changed | ✅ **FIXED** | +| `CommandHandlers` | Method signature mismatches | ✅ **FIXED** | + +### Build Result + +**Build succeeded with 0 errors, 6 warnings** (warnings are non-blocking) + +### Previously Blocked Tasks (Now Unblocked) + +``` +CLI Compile Failures (RESOLVED) + +-- CLI-ATTEST-73-001: stella attest sign → UNBLOCKED + +-- CLI-ATTEST-73-002: stella attest verify → UNBLOCKED + +-- CLI-AIAI-31-001: Advisory AI CLI integration → UNBLOCKED + +-- CLI-AIRGAP-56-001: stella mirror create → UNBLOCKED + +-- CLI-401-007: Reachability evidence chain → UNBLOCKED + +-- CLI-401-021: Reachability chain CI/attestor → UNBLOCKED +``` + +### Key Changes Made + +1. Created `src/Cli/StellaOps.Cli/Extensions/StellaOpsTokenClientExtensions.cs` with compatibility shims +2. Updated 8 service files to use new Auth.Client API pattern +3. Fixed CommandFactory.cs method call argument order/types +4. Updated PolicyDiagnostic model (Path instead of Line/Column/Span/Suggestion) +5. Fixed CommandHandlers.cs static type and diagnostic rendering + +--- + +## 9. CONCELIER RISK CHAIN + +**Root Blocker:** `POLICY-20-001 outputs + AUTH-TEN-47-001 + shared signals library` + +``` +POLICY-20-001 + AUTH-TEN-47-001 + shared signals library + +-- CONCELIER-RISK-66-001: Vendor CVSS/KEV data + +-- CONCELIER-RISK-66-002: Fix-availability metadata + +-- CONCELIER-RISK-67-001: Coverage/conflict metrics + +-- CONCELIER-RISK-68-001: Advisory signal pickers + +-- CONCELIER-RISK-69-001 (continues) +``` + +**Impact:** 5+ tasks in Concelier Core Guild + +**To Unblock:** Complete POLICY-20-001, AUTH-TEN-47-001, and adopt shared signals library + +--- + +## 10. WEB/GRAPH CHAIN + +**Root Blocker:** Upstream dependencies (unspecified) + +``` +Upstream dependencies + +-- WEB-GRAPH-21-001: Graph gateway routes + +-- WEB-GRAPH-21-002: Parameter validation + +-- WEB-GRAPH-21-003: Error mapping + +-- WEB-GRAPH-21-004: Policy Engine proxy +``` + +**Root Blocker:** `WEB-POLICY-20-004` + +``` +WEB-POLICY-20-004 + +-- WEB-POLICY-23-001: Policy packs API + +-- WEB-POLICY-23-002: Activation endpoint +``` + +**Impact:** 6 tasks in BE-Base Platform Guild + +**To Unblock:** Complete WEB-POLICY-20-004 and upstream graph dependencies + +--- + +## 11. STAFFING / PROGRAM MANAGEMENT BLOCKERS + +**Root Blocker:** `PGMI0101 staffing confirmation` + +``` +PGMI0101 staffing confirmation + +-- 54-001: Exporter/AirGap/CLI coordination + +-- 64-002: DevPortal Offline + +-- AIRGAP-46-001: Mirror staffing + DSSE plan +``` + +**Root Blocker:** `PROGRAM-STAFF-1001` (staffing not assigned) + +``` +PROGRAM-STAFF-1001 (staffing not assigned) + +-- 54-001 (same as above) +``` + +**Impact:** 3 tasks + +**To Unblock:** Confirm staffing assignments via Program Management Guild + +--- + +## 12. BENCHMARK CHAIN + +**Root Blocker:** `CAGR0101 outputs` (Graph platform) + +``` +CAGR0101 outputs (Graph platform) + +-- BENCH-GRAPH-21-001: Graph benchmark harness + +-- BENCH-GRAPH-21-002: UI load benchmark +``` + +**Impact:** 2 tasks in Bench Guild + +**To Unblock:** Complete CAGR0101 Graph platform outputs + +--- + +## 13. FINDINGS LEDGER + +**Root Blocker:** `LEDGER-AIRGAP-56-002 staleness spec + AirGap time anchors` + +``` +LEDGER-AIRGAP-56-002 staleness spec + AirGap time anchors + +-- 58 series: LEDGER-AIRGAP chain + +-- AIRGAP-58-001: Concelier bundle contract + +-- AIRGAP-58-002 + +-- AIRGAP-58-003 + +-- AIRGAP-58-004 +``` + +**Impact:** 5 tasks in Findings Ledger + AirGap guilds + +**To Unblock:** Publish LEDGER-AIRGAP-56-002 staleness spec and time anchor contract + +--- + +## 14. MISCELLANEOUS BLOCKED TASKS + +| Task ID | Root Blocker | Guild | +|---------|--------------|-------| +| FEED-REMEDIATION-1001 | Scope missing; needs remediation runbook | Concelier Feed Owners | +| CLI-41-001 | Pending clarified scope | Docs/DevEx Guild | +| CLI-42-001 | Pending clarified scope | Docs Guild | +| CLI-AIAI-31-001 | Scanner analyzers compile failures | DevEx/CLI Guild | +| CLI-401-007 | Reachability evidence chain contract | UI & CLI Guilds | +| CLI-401-021 | Reachability chain CI/attestor contract | CLI/DevOps Guild | +| SVC-35-001 | Unspecified | Exporter Service Guild | +| VEX-30-001 | Unspecified | Console/BE-Base Guild | +| VULN-29-001 | Unspecified | Console/BE-Base Guild | +| WEB-RISK-66-001 | npm ci hangs; Angular tests broken | BE-Base/Policy Guild | +| CONCELIER-LNM-21-003 | Requires #8 heuristics | Concelier Core Guild | + +--- + +## Summary Statistics + +| Root Blocker Category | Root Blockers | Downstream Tasks | +|----------------------|---------------|------------------| +| SGSI0101 (Signals/Runtime) | 2 | ~6 | +| APIG0101 (API Governance) | 1 | 6 | +| VEX Specs | 1 | 11 | +| Deployment/Compose | 1 | 7 | +| AirGap Ecosystem | 4 | 17+ | +| Scanner Compile/Specs | 5 | 5 | +| Task Runner Contracts | 3 | 10+ | +| Staffing/Program Mgmt | 2 | 3 | +| Disk Full | 1 | 6 | +| Graph/Policy Upstream | 2 | 6 | +| Miscellaneous | 11 | 11 | + +**Total BLOCKED tasks:** ~100+ + +--- + +## Priority Unblocking Actions + +These root blockers, if resolved, will unblock the most downstream tasks: + +1. **SGSI0101** — Unblocks Signals chain + Telemetry + Replay Core (~6 tasks) +2. **APIG0101** — Unblocks DevPortal + SDK Generator (6 tasks) +3. **VEX normalization spec** — Unblocks 11 VEX Lens tasks +4. **Mirror bundle contract** — Unblocks CLI AirGap + Importer chains (~8 tasks) +5. **Disk cleanup** — Unblocks AirGap Controller/Time chains (6 tasks) +6. **Scanner analyzer fixes** — Unblocks CLI Attestor + Advisory AI (5+ tasks) +7. **Upstream module releases** — Unblocks Deployment chain (7 tasks) +8. **Timeline event schema** — Unblocks Task Runner Observability (5 tasks) + +--- + +## Cross-Reference + +- Sprint files reference this document for BLOCKED task context +- Update this file when root blockers are resolved +- Notify dependent guilds when unblocking occurs diff --git a/docs/implplan/CLI_AUTH_MIGRATION_PLAN.md b/docs/implplan/CLI_AUTH_MIGRATION_PLAN.md new file mode 100644 index 000000000..f59d8f3dd --- /dev/null +++ b/docs/implplan/CLI_AUTH_MIGRATION_PLAN.md @@ -0,0 +1,143 @@ +# CLI Auth.Client Migration Plan + +> **Created:** 2025-12-04 +> **Status:** COMPLETED +> **Completed:** 2025-12-04 + +## Problem Statement + +The CLI services used an older `IStellaOpsTokenClient` API that no longer exists. This document outlines the migration strategy and tracks completion. + +## Summary of Changes + +### Files Created +- `src/Cli/StellaOps.Cli/Extensions/StellaOpsTokenClientExtensions.cs` - Compatibility shim methods + +### Files Modified + +#### Service Files (Auth.Client API Migration) +1. `OrchestratorClient.cs` - Updated scope references +2. `VexObservationsClient.cs` - Updated to use `GetAccessTokenAsync(string)` extension, removed `IsSuccess` check +3. `SbomerClient.cs` - Fixed `GetTokenAsync` to use `AccessToken` property +4. `ExceptionClient.cs` - Updated token acquisition pattern +5. `NotifyClient.cs` - Updated token acquisition pattern +6. `ObservabilityClient.cs` - Updated token acquisition pattern +7. `PackClient.cs` - Updated token acquisition pattern +8. `SbomClient.cs` - Updated token acquisition pattern + +#### Command Handlers (Signature Fixes) +9. `CommandHandlers.cs`: + - Fixed `CreateLogger()` static type error (line 80) + - Fixed PolicyDsl diagnostic rendering (removed Line/Column/Suggestion, added Path) + +10. `CommandFactory.cs`: + - Fixed `HandleExceptionsListAsync` argument order and count + - Fixed `HandleExceptionsCreateAsync` argument order, expiration type conversion + - Fixed `HandleExceptionsPromoteAsync` argument order + - Fixed `HandleExceptionsExportAsync` argument order and count + - Fixed `HandleExceptionsImportAsync` argument order + +#### Model Updates +11. `PolicyWorkspaceModels.cs` - Updated `PolicyDiagnostic` class (replaced Line/Column/Span/Suggestion with Path) + +## Old API (Removed) + +```csharp +// Methods that no longer exist +Task GetTokenAsync(StellaOpsTokenRequest request, CancellationToken ct); +Task GetAccessTokenAsync(string[] scopes, CancellationToken ct); + +// Types that no longer exist +class StellaOpsTokenRequest { string[] Scopes; } +static class StellaOpsScope { const string OrchRead = "orch:read"; } + +// Properties removed from StellaOpsTokenResult +bool IsSuccess; +``` + +## New API (Current) + +```csharp +interface IStellaOpsTokenClient +{ + Task RequestClientCredentialsTokenAsync( + string? scope = null, + IReadOnlyDictionary? additionalParameters = null, + CancellationToken cancellationToken = default); + + ValueTask GetCachedTokenAsync(string key, CancellationToken ct); + ValueTask CacheTokenAsync(string key, StellaOpsTokenCacheEntry entry, CancellationToken ct); +} + +// StellaOpsTokenResult record properties: +// - AccessToken (string) +// - TokenType (string) +// - ExpiresAtUtc (DateTimeOffset) +// - Scopes (IReadOnlyList) +``` + +## Migration Approach + +### Extension Methods Created + +```csharp +public static class StellaOpsTokenClientExtensions +{ + // Single scope version + public static async Task GetAccessTokenAsync( + this IStellaOpsTokenClient client, + string scope, + CancellationToken cancellationToken = default); + + // Multi-scope version + public static async Task GetAccessTokenAsync( + this IStellaOpsTokenClient client, + IEnumerable scopes, + CancellationToken cancellationToken = default); + + // Cached token version + public static async Task GetCachedAccessTokenAsync( + this IStellaOpsTokenClient client, + string scope, + CancellationToken cancellationToken = default); + + // Parameterless version + public static async Task GetTokenAsync( + this IStellaOpsTokenClient client, + CancellationToken cancellationToken = default); +} +``` + +### Scope Constants + +Used `StellaOpsScopes` from `StellaOps.Auth.Abstractions` namespace (e.g., `StellaOpsScopes.OrchRead`, `StellaOpsScopes.VexRead`). + +## Build Results + +**Build succeeded with 0 errors, 6 warnings:** +- 3x CS8629 nullable warnings in OutputRenderer.cs +- 1x CS0618 obsolete warning (VulnRead → VulnView) +- 1x SYSLIB0057 obsolete X509Certificate2 constructor +- 1x CS0219 unused variable warning + +## Implementation Checklist + +- [x] Create `StellaOpsTokenClientExtensions.cs` +- [x] Verify `StellaOpsScopes` exists in Auth.Abstractions +- [x] Update OrchestratorClient.cs +- [x] Update VexObservationsClient.cs +- [x] Update SbomerClient.cs +- [x] Update ExceptionClient.cs +- [x] Update NotifyClient.cs +- [x] Update ObservabilityClient.cs +- [x] Update PackClient.cs +- [x] Update SbomClient.cs +- [x] Fix CommandHandlers static type error +- [x] Fix PolicyDsl API changes (PolicyIssue properties) +- [x] Fix HandleExceptionsListAsync signature +- [x] Fix HandleExceptionsCreateAsync signature +- [x] Fix HandleExceptionsPromoteAsync signature +- [x] Fix HandleExceptionsExportAsync signature +- [x] Fix HandleExceptionsImportAsync signature +- [x] Update PolicyDiagnostic model +- [x] Build verification passed diff --git a/docs/implplan/SPRINT_0111_0001_0001_advisoryai.md b/docs/implplan/SPRINT_0111_0001_0001_advisoryai.md index 2b79a18d9..f499cab8a 100644 --- a/docs/implplan/SPRINT_0111_0001_0001_advisoryai.md +++ b/docs/implplan/SPRINT_0111_0001_0001_advisoryai.md @@ -21,6 +21,8 @@ - docs/modules/platform/architecture-overview.md - docs/modules/advisory-ai/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0113_0001_0002_concelier_ii.md b/docs/implplan/SPRINT_0113_0001_0002_concelier_ii.md index 38ad4144d..f212bdc12 100644 --- a/docs/implplan/SPRINT_0113_0001_0002_concelier_ii.md +++ b/docs/implplan/SPRINT_0113_0001_0002_concelier_ii.md @@ -25,6 +25,8 @@ - `src/Concelier/AGENTS.md` (module charter, testing/guardrail rules) - `docs/modules/concelier/link-not-merge-schema.md` (LNM schema v1, frozen 2025-11-17) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0114_0001_0003_concelier_iii.md b/docs/implplan/SPRINT_0114_0001_0003_concelier_iii.md index 1a199c579..9141efc22 100644 --- a/docs/implplan/SPRINT_0114_0001_0003_concelier_iii.md +++ b/docs/implplan/SPRINT_0114_0001_0003_concelier_iii.md @@ -23,6 +23,8 @@ - docs/modules/concelier/architecture.md (ingestion, observability, orchestrator notes) - Current OpenAPI spec + SDK docs referenced by CONCELIER-OAS-61/62/63 +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0115_0001_0004_concelier_iv.md b/docs/implplan/SPRINT_0115_0001_0004_concelier_iv.md index 27c08d081..1e30059ff 100644 --- a/docs/implplan/SPRINT_0115_0001_0004_concelier_iv.md +++ b/docs/implplan/SPRINT_0115_0001_0004_concelier_iv.md @@ -23,6 +23,8 @@ - docs/modules/concelier/architecture.md (policy/risk/tenant scope sections) - docs/dev/raw-linkset-backfill-plan.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0116_0001_0005_concelier_v.md b/docs/implplan/SPRINT_0116_0001_0005_concelier_v.md index 49945d5a7..88e0a7c23 100644 --- a/docs/implplan/SPRINT_0116_0001_0005_concelier_v.md +++ b/docs/implplan/SPRINT_0116_0001_0005_concelier_v.md @@ -24,6 +24,8 @@ - docs/modules/concelier/architecture.md (airgap, AOC, observability) - Link-Not-Merge API specs and error envelope guidelines +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0117_0001_0006_concelier_vi.md b/docs/implplan/SPRINT_0117_0001_0006_concelier_vi.md index b1b0821b9..b48a0ddaa 100644 --- a/docs/implplan/SPRINT_0117_0001_0006_concelier_vi.md +++ b/docs/implplan/SPRINT_0117_0001_0006_concelier_vi.md @@ -24,6 +24,8 @@ - docs/modules/concelier/architecture.md (connectors, evidence locker integration) - docs/migration/no-merge.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0119_0001_0004_excititor_iv.md b/docs/implplan/SPRINT_0119_0001_0004_excititor_iv.md index 6c45ee8e5..9e01171ea 100644 --- a/docs/implplan/SPRINT_0119_0001_0004_excititor_iv.md +++ b/docs/implplan/SPRINT_0119_0001_0004_excititor_iv.md @@ -24,6 +24,8 @@ - `docs/modules/excititor/implementation_plan.md` - Excititor component `AGENTS.md` files (Core, WebService, Worker). +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0119_0001_0005_excititor_v.md b/docs/implplan/SPRINT_0119_0001_0005_excititor_v.md index 8ac4e5e83..9a58b8bcd 100644 --- a/docs/implplan/SPRINT_0119_0001_0005_excititor_v.md +++ b/docs/implplan/SPRINT_0119_0001_0005_excititor_v.md @@ -24,6 +24,8 @@ - `docs/modules/excititor/implementation_plan.md` - Excititor component `AGENTS.md` files (WebService, Core, Storage). +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0119_0001_0006_excititor_vi.md b/docs/implplan/SPRINT_0119_0001_0006_excititor_vi.md index 21cb69442..8aff7618d 100644 --- a/docs/implplan/SPRINT_0119_0001_0006_excititor_vi.md +++ b/docs/implplan/SPRINT_0119_0001_0006_excititor_vi.md @@ -23,6 +23,8 @@ - `docs/modules/excititor/implementation_plan.md` - Excititor component `AGENTS.md` files (WebService). +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0120_0000_0001_policy_reasoning.md b/docs/implplan/SPRINT_0120_0000_0001_policy_reasoning.md index 4899d92ad..2d15fb477 100644 --- a/docs/implplan/SPRINT_0120_0000_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0120_0000_0001_policy_reasoning.md @@ -44,6 +44,8 @@ - `docs/modules/findings-ledger/airgap-provenance.md` - `docs/observability/policy.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0121_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0121_0001_0001_policy_reasoning.md index afdc6a658..bbb596b6b 100644 --- a/docs/implplan/SPRINT_0121_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0121_0001_0001_policy_reasoning.md @@ -26,6 +26,8 @@ - docs/modules/findings-ledger/workflow-inference.md - src/Findings/StellaOps.Findings.Ledger/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0121_0001_0002_policy_reasoning_blockers.md b/docs/implplan/SPRINT_0121_0001_0002_policy_reasoning_blockers.md index 9b349d9b3..59ab66d8e 100644 --- a/docs/implplan/SPRINT_0121_0001_0002_policy_reasoning_blockers.md +++ b/docs/implplan/SPRINT_0121_0001_0002_policy_reasoning_blockers.md @@ -20,6 +20,8 @@ - `docs/modules/findings-ledger/prep/ledger-attestations-http.md` - `docs/modules/findings-ledger/prep/ledger-risk-prep.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0122_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0122_0001_0001_policy_reasoning.md index b9366b394..e10e311ab 100644 --- a/docs/implplan/SPRINT_0122_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0122_0001_0001_policy_reasoning.md @@ -27,6 +27,8 @@ - docs/modules/findings-ledger/workflow-inference.md - src/Findings/StellaOps.Findings.Ledger/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0123_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0123_0001_0001_policy_reasoning.md index 133f72d40..0be1f1ced 100644 --- a/docs/implplan/SPRINT_0123_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0123_0001_0001_policy_reasoning.md @@ -25,6 +25,8 @@ - `docs/modules/policy/architecture.md` - Any export/air-gap/attestation contract docs once published. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0124_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0124_0001_0001_policy_reasoning.md index 296fbee38..27c69a039 100644 --- a/docs/implplan/SPRINT_0124_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0124_0001_0001_policy_reasoning.md @@ -20,6 +20,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/modules/policy/architecture.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Interlocks - POLICY-CONSOLE-23-001 (Console export/simulation contract from BE-Base Platform) satisfied on 2025-12-02 via `docs/modules/policy/contracts/policy-console-23-001-console-api.md`. diff --git a/docs/implplan/SPRINT_0125_0001_0001_mirror.md b/docs/implplan/SPRINT_0125_0001_0001_mirror.md index 742e551ae..f7d8c4e8f 100644 --- a/docs/implplan/SPRINT_0125_0001_0001_mirror.md +++ b/docs/implplan/SPRINT_0125_0001_0001_mirror.md @@ -17,6 +17,8 @@ - `docs/modules/devops/architecture.md` - `docs/modules/policy/architecture.md` (for provenance expectations) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0125_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0125_0001_0001_policy_reasoning.md index 1e5e8b21f..a1e9e137e 100644 --- a/docs/implplan/SPRINT_0125_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0125_0001_0001_policy_reasoning.md @@ -21,6 +21,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/modules/policy/architecture.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID & handle | State | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0126_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0126_0001_0001_policy_reasoning.md index cad847786..50161c50b 100644 --- a/docs/implplan/SPRINT_0126_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0126_0001_0001_policy_reasoning.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/modules/policy/architecture.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID & handle | State | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0127_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0127_0001_0001_policy_reasoning.md index 1141adb7d..ac664aa05 100644 --- a/docs/implplan/SPRINT_0127_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0127_0001_0001_policy_reasoning.md @@ -1,9 +1,9 @@ -# Sprint 0127-0001-0001 · Policy & Reasoning (Policy Engine phase V) - -## Topic & Scope -- Policy Engine V: reachability integration, telemetry, incident mode, and initial RiskProfile schema work. -- **Working directory:** `src/Policy/StellaOps.Policy.Engine` and `src/Policy/__Libraries/StellaOps.Policy.RiskProfile`. - +# Sprint 0127-0001-0001 · Policy & Reasoning (Policy Engine phase V) + +## Topic & Scope +- Policy Engine V: reachability integration, telemetry, incident mode, and initial RiskProfile schema work. +- **Working directory:** `src/Policy/StellaOps.Policy.Engine` and `src/Policy/__Libraries/StellaOps.Policy.RiskProfile`. + ## Dependencies & Concurrency - Upstream: Sprint 120.C Policy.IV must land. - Concurrency: execute tasks in listed order; all tasks currently TODO. @@ -11,72 +11,74 @@ ## Wave Coordination - **Wave A (reachability + observability + risk profiles):** Tasks P1 and 1–15 DONE; keep schemas/metrics stable. No remaining open tasks. -## Documentation Prerequisites -- `docs/README.md` -- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` -- `docs/modules/platform/architecture-overview.md` -- `docs/modules/policy/architecture.md` - -## Delivery Tracker -| # | Task ID & handle | State | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | -| P1 | PREP-POLICY-RISK-66-001-RISKPROFILE-LIBRARY-S | DONE (2025-11-22) | Due 2025-11-22 · Accountable: Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | RiskProfile library scaffold absent (`src/Policy/StellaOps.Policy.RiskProfile` contains only AGENTS.md); need project + storage contract to place schema/validators.

Document artefact/deliverable for POLICY-RISK-66-001 and publish location so downstream tasks can proceed. Prep artefact: `docs/modules/policy/prep/2025-11-20-riskprofile-66-001-prep.md`. | -| 1 | POLICY-ENGINE-80-002 | DONE (2025-11-27) | — | Policy · Storage Guild / `src/Policy/StellaOps.Policy.Engine` | Join reachability facts + Redis caches. | -| 2 | POLICY-ENGINE-80-003 | DONE (2025-11-27) | — | Policy · Policy Editor Guild / `src/Policy/StellaOps.Policy.Engine` | SPL predicates/actions reference reachability. | -| 3 | POLICY-ENGINE-80-004 | DONE (2025-11-27) | — | Policy · Observability Guild / `src/Policy/StellaOps.Policy.Engine` | Metrics/traces for signals usage. | -| 4 | POLICY-OBS-50-001 | DONE (2025-11-27) | — | Policy · Observability Guild / `src/Policy/StellaOps.Policy.Engine` | Telemetry core for API/worker hosts. | -| 5 | POLICY-OBS-51-001 | DONE (2025-11-27) | Depends on 50-001. | Policy · DevOps Guild / `src/Policy/StellaOps.Policy.Engine` | Golden-signal metrics + SLOs. | -| 6 | POLICY-OBS-52-001 | DONE (2025-11-27) | Depends on 51-001. | Policy Guild / `src/Policy/StellaOps.Policy.Engine` | Timeline events for evaluate/decision flows. | -| 7 | POLICY-OBS-53-001 | DONE (2025-11-27) | Depends on 52-001. | Policy · Evidence Locker Guild / `src/Policy/StellaOps.Policy.Engine` | Evaluation evidence bundles + manifests. | -| 8 | POLICY-OBS-54-001 | DONE (2025-11-27) | Depends on 53-001. | Policy · Provenance Guild / `src/Policy/StellaOps.Policy.Engine` | DSSE attestations for evaluations. | -| 9 | POLICY-OBS-55-001 | DONE (2025-11-27) | Depends on 54-001. | Policy · DevOps Guild / `src/Policy/StellaOps.Policy.Engine` | Incident mode sampling overrides. | -| 10 | POLICY-RISK-66-001 | DONE (2025-11-22) | PREP-POLICY-RISK-66-001-RISKPROFILE-LIBRARY-S | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | RiskProfile JSON schema + validator stubs. | -| 11 | POLICY-RISK-66-002 | DONE (2025-11-27) | Depends on 66-001. | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Inheritance/merge + deterministic hashing. | -| 12 | POLICY-RISK-66-003 | DONE (2025-11-27) | Depends on 66-002. | Policy · Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.Engine` | Integrate RiskProfile into Policy Engine config. | -| 13 | POLICY-RISK-66-004 | DONE (2025-11-27) | Depends on 66-003. | Policy · Risk Profile Schema Guild / `src/Policy/__Libraries/StellaOps.Policy` | Load/save RiskProfiles; validation diagnostics. | -| 14 | POLICY-RISK-67-001 | DONE (2025-11-27) | Depends on 66-004. | Policy · Risk Engine Guild / `src/Policy/StellaOps.Policy.Engine` | Trigger scoring jobs on new/updated findings. | -| 15 | POLICY-RISK-67-001 | DONE (2025-11-27) | Depends on 67-001. | Risk Profile Schema Guild · Policy Engine Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Profile storage/versioning lifecycle. | - -## Execution Log +## Documentation Prerequisites +- `docs/README.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/modules/policy/architecture.md` + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID & handle | State | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| P1 | PREP-POLICY-RISK-66-001-RISKPROFILE-LIBRARY-S | DONE (2025-11-22) | Due 2025-11-22 · Accountable: Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | RiskProfile library scaffold absent (`src/Policy/StellaOps.Policy.RiskProfile` contains only AGENTS.md); need project + storage contract to place schema/validators.

Document artefact/deliverable for POLICY-RISK-66-001 and publish location so downstream tasks can proceed. Prep artefact: `docs/modules/policy/prep/2025-11-20-riskprofile-66-001-prep.md`. | +| 1 | POLICY-ENGINE-80-002 | DONE (2025-11-27) | — | Policy · Storage Guild / `src/Policy/StellaOps.Policy.Engine` | Join reachability facts + Redis caches. | +| 2 | POLICY-ENGINE-80-003 | DONE (2025-11-27) | — | Policy · Policy Editor Guild / `src/Policy/StellaOps.Policy.Engine` | SPL predicates/actions reference reachability. | +| 3 | POLICY-ENGINE-80-004 | DONE (2025-11-27) | — | Policy · Observability Guild / `src/Policy/StellaOps.Policy.Engine` | Metrics/traces for signals usage. | +| 4 | POLICY-OBS-50-001 | DONE (2025-11-27) | — | Policy · Observability Guild / `src/Policy/StellaOps.Policy.Engine` | Telemetry core for API/worker hosts. | +| 5 | POLICY-OBS-51-001 | DONE (2025-11-27) | Depends on 50-001. | Policy · DevOps Guild / `src/Policy/StellaOps.Policy.Engine` | Golden-signal metrics + SLOs. | +| 6 | POLICY-OBS-52-001 | DONE (2025-11-27) | Depends on 51-001. | Policy Guild / `src/Policy/StellaOps.Policy.Engine` | Timeline events for evaluate/decision flows. | +| 7 | POLICY-OBS-53-001 | DONE (2025-11-27) | Depends on 52-001. | Policy · Evidence Locker Guild / `src/Policy/StellaOps.Policy.Engine` | Evaluation evidence bundles + manifests. | +| 8 | POLICY-OBS-54-001 | DONE (2025-11-27) | Depends on 53-001. | Policy · Provenance Guild / `src/Policy/StellaOps.Policy.Engine` | DSSE attestations for evaluations. | +| 9 | POLICY-OBS-55-001 | DONE (2025-11-27) | Depends on 54-001. | Policy · DevOps Guild / `src/Policy/StellaOps.Policy.Engine` | Incident mode sampling overrides. | +| 10 | POLICY-RISK-66-001 | DONE (2025-11-22) | PREP-POLICY-RISK-66-001-RISKPROFILE-LIBRARY-S | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | RiskProfile JSON schema + validator stubs. | +| 11 | POLICY-RISK-66-002 | DONE (2025-11-27) | Depends on 66-001. | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Inheritance/merge + deterministic hashing. | +| 12 | POLICY-RISK-66-003 | DONE (2025-11-27) | Depends on 66-002. | Policy · Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.Engine` | Integrate RiskProfile into Policy Engine config. | +| 13 | POLICY-RISK-66-004 | DONE (2025-11-27) | Depends on 66-003. | Policy · Risk Profile Schema Guild / `src/Policy/__Libraries/StellaOps.Policy` | Load/save RiskProfiles; validation diagnostics. | +| 14 | POLICY-RISK-67-001 | DONE (2025-11-27) | Depends on 66-004. | Policy · Risk Engine Guild / `src/Policy/StellaOps.Policy.Engine` | Trigger scoring jobs on new/updated findings. | +| 15 | POLICY-RISK-67-001 | DONE (2025-11-27) | Depends on 67-001. | Risk Profile Schema Guild · Policy Engine Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Profile storage/versioning lifecycle. | + +## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | | 2025-12-03 | Added Wave Coordination (Wave A reachability/observability/risk profiles done; sprint complete). No status changes. | Project Mgmt | | 2025-11-27 | `POLICY-ENGINE-80-002`: Created reachability facts joining layer in `ReachabilityFacts/` directory: `ReachabilityFactsModels.cs` (data models with state/confidence/score, ReachabilityState enum, ReachabilityFactKey), `ReachabilityFactsStore.cs` (IReachabilityFactsStore interface, InMemoryReachabilityFactsStore, MongoDB index definitions), `ReachabilityFactsOverlayCache.cs` (IReachabilityFactsOverlayCache interface, InMemoryReachabilityFactsOverlayCache with TTL eviction, ReachabilityFactsCacheOptions), `ReachabilityFactsJoiningService.cs` (batch lookup with cache-first strategy, signal enrichment, ReachabilityFactsTelemetry). Registered services in Program.cs DI. | Implementer | -| 2025-11-27 | `POLICY-ENGINE-80-003`: Extended SPL predicates for reachability. Added `PolicyEvaluationReachability` record to `PolicyEvaluationContext.cs` with state/confidence/score/method/source properties and helper predicates (IsReachable, IsUnreachable, IsHighConfidence). Added `ReachabilityScope` to `PolicyExpressionEvaluator.cs` supporting SPL expressions like `reachability.state == "reachable"`, `reachability.confidence >= 0.8`, `reachability.is_high_confidence`. | Implementer | -| 2025-11-27 | `POLICY-ENGINE-80-004`: Added reachability metrics to `PolicyEngineTelemetry.cs`: `policy_reachability_applied_total{state}`, `policy_reachability_cache_hits_total`, `policy_reachability_cache_misses_total`, `policy_reachability_cache_hit_ratio` (observable gauge), `policy_reachability_lookups_total{outcome}`, `policy_reachability_lookup_seconds`. Updated `ReachabilityFactsTelemetry` to delegate to centralized PolicyEngineTelemetry. | Implementer | -| 2025-11-27 | `POLICY-RISK-67-001` (task 15): Created `Lifecycle/RiskProfileLifecycle.cs` with lifecycle models (RiskProfileLifecycleStatus enum: Draft/Active/Deprecated/Archived, RiskProfileVersionInfo, RiskProfileLifecycleEvent, RiskProfileVersionComparison, RiskProfileChange). Created `RiskProfileLifecycleService` with status transitions (CreateVersion, Activate, Deprecate, Archive, Restore), version management, event recording, and version comparison (detecting breaking changes in signals/inheritance). | Implementer | -| 2025-11-27 | `POLICY-RISK-67-001`: Created `Scoring/RiskScoringModels.cs` with FindingChangedEvent, RiskScoringJobRequest, RiskScoringJob, RiskScoringResult models and enums. Created `IRiskScoringJobStore` interface and `InMemoryRiskScoringJobStore` for job persistence. Created `RiskScoringTriggerService` handling FindingChangedEvent triggers with deduplication, batch processing, priority calculation, and job creation. Added risk scoring metrics to PolicyEngineTelemetry (jobs_created, triggers_skipped, duration, findings_scored). Registered services in Program.cs DI. | Implementer | -| 2025-11-27 | `POLICY-RISK-66-004`: Added RiskProfile project reference to StellaOps.Policy library. Created `IRiskProfileRepository` interface with GetAsync, GetVersionAsync, GetLatestAsync, ListProfileIdsAsync, ListVersionsAsync, SaveAsync, DeleteVersionAsync, DeleteAllVersionsAsync, ExistsAsync. Created `InMemoryRiskProfileRepository` for testing/development. Created `RiskProfileDiagnostics` with comprehensive validation (RISK001-RISK050 error codes) covering structure, signals, weights, overrides, and inheritance. Includes `RiskProfileDiagnosticsReport` and `RiskProfileIssue` types. | Implementer | -| 2025-11-27 | `POLICY-RISK-66-003`: Added RiskProfile project reference to Policy Engine. Created `PolicyEngineRiskProfileOptions` with config for enabled, defaultProfileId, profileDirectory, maxInheritanceDepth, validateOnLoad, cacheResolvedProfiles, and inline profile definitions. Created `RiskProfileConfigurationService` for loading profiles from config/files, resolving inheritance, and providing profiles to engine. Updated `PolicyEngineBootstrapWorker` to load profiles at startup. Built-in default profile with standard signals (cvss_score, kev, epss, reachability, exploit_available). | Implementer | -| 2025-11-27 | `POLICY-RISK-66-002`: Created `Models/RiskProfileModel.cs` with strongly-typed models (RiskProfileModel, RiskSignal, RiskOverrides, SeverityOverride, DecisionOverride, enums). Created `Merge/RiskProfileMergeService.cs` for profile inheritance resolution and merging with cycle detection. Created `Hashing/RiskProfileHasher.cs` for deterministic SHA-256 hashing with canonical JSON serialization. | Implementer | -| 2025-11-27 | `POLICY-OBS-55-001`: Created `IncidentMode.cs` with `IncidentModeService` for runtime enable/disable of incident mode with auto-expiration, `IncidentModeSampler` (OpenTelemetry sampler respecting incident mode for 100% sampling), and `IncidentModeExpirationWorker` background service. Added `IncidentMode` option to telemetry config. Registered in Program.cs DI. | Implementer | -| 2025-11-27 | `POLICY-OBS-54-001`: Created `PolicyEvaluationAttestation.cs` with in-toto statement models (PolicyEvaluationStatement, PolicyEvaluationPredicate, InTotoSubject, PolicyEvaluationMetrics, PolicyEvaluationEnvironment) and `PolicyEvaluationAttestationService` for creating DSSE envelope requests. Added Attestor.Envelope project reference. Registered in Program.cs DI. | Implementer | -| 2025-11-27 | `POLICY-OBS-53-001`: Created `EvidenceBundle.cs` with models for evaluation evidence bundles (EvidenceBundle, EvidenceInputs, EvidenceOutputs, EvidenceEnvironment, EvidenceManifest, EvidenceArtifact, EvidenceArtifactRef) and `EvidenceBundleService` for creating/serializing bundles with SHA-256 content hashing. Registered in Program.cs DI. | Implementer | -| 2025-11-27 | `POLICY-OBS-52-001`: Created `PolicyTimelineEvents.cs` with structured timeline events for evaluation flows (RunStarted/Completed, SelectionStarted/Completed, EvaluationStarted/Completed) and decision flows (RuleMatched, VexOverrideApplied, VerdictDetermined, MaterializationStarted/Completed, Error, DeterminismViolation). Events include trace correlation and structured data. Registered in Program.cs DI. | Implementer | -| 2025-11-27 | `POLICY-OBS-51-001`: Added golden-signal metrics (Latency: `policy_api_latency_seconds`, `policy_evaluation_latency_seconds`; Traffic: `policy_requests_total`, `policy_evaluations_total`, `policy_findings_materialized_total`; Errors: `policy_errors_total`, `policy_api_errors_total`, `policy_evaluation_failures_total`; Saturation: `policy_concurrent_evaluations`, `policy_worker_utilization`) and SLO metrics (`policy_slo_burn_rate`, `policy_error_budget_remaining`, `policy_slo_violations_total`). | Implementer | -| 2025-11-27 | `POLICY-OBS-50-001`: Implemented telemetry core for Policy Engine. Added `PolicyEngineTelemetry.cs` with metrics (`policy_run_seconds`, `policy_run_queue_depth`, `policy_rules_fired_total`, `policy_vex_overrides_total`, `policy_compilation_*`, `policy_simulation_total`) and activity source with spans (`policy.select`, `policy.evaluate`, `policy.materialize`, `policy.simulate`, `policy.compile`). Created `TelemetryExtensions.cs` with OpenTelemetry + Serilog configuration. Wired into `Program.cs`. | Implementer | -| 2025-11-20 | Published risk profile library prep (docs/modules/policy/prep/2025-11-20-riskprofile-66-001-prep.md); set PREP-POLICY-RISK-66-001 to DOING. | Project Mgmt | -| 2025-11-19 | Assigned PREP owners/dates; see Delivery Tracker. | Planning | -| 2025-11-08 | Sprint stub; awaiting upstream phases. | Planning | -| 2025-11-19 | Normalized to standard template and renamed from `SPRINT_127_policy_reasoning.md` to `SPRINT_0127_0001_0001_policy_reasoning.md`; content preserved. | Implementer | -| 2025-11-19 | Attempted POLICY-RISK-66-001; blocked because `src/Policy/StellaOps.Policy.RiskProfile` lacks a project/scaffold to host schema + validators. Needs project creation + contract placement guidance. | Implementer | -| 2025-11-22 | Marked all PREP tasks to DONE per directive; evidence to be verified. | Project Mgmt | -| 2025-11-22 | Implemented RiskProfile schema + validator and tests; added project to solution; set POLICY-RISK-66-001 to DONE. | Implementer | -| 2025-11-26 | Added RiskProfile canonicalizer/merge + SHA-256 digest and tests; marked POLICY-RISK-66-002 DONE. | Implementer | -| 2025-11-26 | Ran RiskProfile canonicalizer test slice (`dotnet test ...RiskProfile.RiskProfile.Tests.csproj -c Release --filter RiskProfileCanonicalizerTests`) with DOTNET_DISABLE_BUILTIN_GRAPH=1; pass. | Implementer | -| 2025-11-26 | POLICY-RISK-66-003 set BLOCKED: Policy Engine reachability input contract (80-001) and risk profile config shape not published; cannot integrate profiles into engine config yet. | Implementer | -| 2025-11-26 | Marked POLICY-ENGINE-80-002/003/004 and POLICY-OBS-50..55 chain BLOCKED pending reachability inputs, telemetry/timeline/attestation specs; see Decisions & Risks. | Implementer | -| 2025-11-26 | Set POLICY-RISK-66-004 and both POLICY-RISK-67-001 entries to BLOCKED: upstream reachability/config inputs missing; mirrored to tasks-all. | Implementer | -| 2025-11-22 | Unblocked POLICY-RISK-66-001 after prep completion; status → TODO. | Project Mgmt | - -## Decisions & Risks -- All sprint tasks completed 2025-11-27. -- Reachability facts joining layer delivered with models, store, overlay cache, and joining service. -- SPL predicates extended for reachability: `reachability.state`, `reachability.confidence`, `reachability.score`, etc. -- Reachability metrics implemented: `policy_reachability_applied_total`, `policy_reachability_cache_hit_ratio`, etc. -- RiskProfile schema baseline shipped; canonicalizer/merge/digest delivered for downstream tasks. -- Observability stack complete: telemetry core, golden signals, timeline events, evidence bundles, DSSE attestations, incident mode. -- RiskProfile lifecycle and scoring triggers implemented. - -## Next Checkpoints -- Sprint complete. Proceed to Sprint 0128 (Policy Engine phase VI). +| 2025-11-27 | `POLICY-ENGINE-80-003`: Extended SPL predicates for reachability. Added `PolicyEvaluationReachability` record to `PolicyEvaluationContext.cs` with state/confidence/score/method/source properties and helper predicates (IsReachable, IsUnreachable, IsHighConfidence). Added `ReachabilityScope` to `PolicyExpressionEvaluator.cs` supporting SPL expressions like `reachability.state == "reachable"`, `reachability.confidence >= 0.8`, `reachability.is_high_confidence`. | Implementer | +| 2025-11-27 | `POLICY-ENGINE-80-004`: Added reachability metrics to `PolicyEngineTelemetry.cs`: `policy_reachability_applied_total{state}`, `policy_reachability_cache_hits_total`, `policy_reachability_cache_misses_total`, `policy_reachability_cache_hit_ratio` (observable gauge), `policy_reachability_lookups_total{outcome}`, `policy_reachability_lookup_seconds`. Updated `ReachabilityFactsTelemetry` to delegate to centralized PolicyEngineTelemetry. | Implementer | +| 2025-11-27 | `POLICY-RISK-67-001` (task 15): Created `Lifecycle/RiskProfileLifecycle.cs` with lifecycle models (RiskProfileLifecycleStatus enum: Draft/Active/Deprecated/Archived, RiskProfileVersionInfo, RiskProfileLifecycleEvent, RiskProfileVersionComparison, RiskProfileChange). Created `RiskProfileLifecycleService` with status transitions (CreateVersion, Activate, Deprecate, Archive, Restore), version management, event recording, and version comparison (detecting breaking changes in signals/inheritance). | Implementer | +| 2025-11-27 | `POLICY-RISK-67-001`: Created `Scoring/RiskScoringModels.cs` with FindingChangedEvent, RiskScoringJobRequest, RiskScoringJob, RiskScoringResult models and enums. Created `IRiskScoringJobStore` interface and `InMemoryRiskScoringJobStore` for job persistence. Created `RiskScoringTriggerService` handling FindingChangedEvent triggers with deduplication, batch processing, priority calculation, and job creation. Added risk scoring metrics to PolicyEngineTelemetry (jobs_created, triggers_skipped, duration, findings_scored). Registered services in Program.cs DI. | Implementer | +| 2025-11-27 | `POLICY-RISK-66-004`: Added RiskProfile project reference to StellaOps.Policy library. Created `IRiskProfileRepository` interface with GetAsync, GetVersionAsync, GetLatestAsync, ListProfileIdsAsync, ListVersionsAsync, SaveAsync, DeleteVersionAsync, DeleteAllVersionsAsync, ExistsAsync. Created `InMemoryRiskProfileRepository` for testing/development. Created `RiskProfileDiagnostics` with comprehensive validation (RISK001-RISK050 error codes) covering structure, signals, weights, overrides, and inheritance. Includes `RiskProfileDiagnosticsReport` and `RiskProfileIssue` types. | Implementer | +| 2025-11-27 | `POLICY-RISK-66-003`: Added RiskProfile project reference to Policy Engine. Created `PolicyEngineRiskProfileOptions` with config for enabled, defaultProfileId, profileDirectory, maxInheritanceDepth, validateOnLoad, cacheResolvedProfiles, and inline profile definitions. Created `RiskProfileConfigurationService` for loading profiles from config/files, resolving inheritance, and providing profiles to engine. Updated `PolicyEngineBootstrapWorker` to load profiles at startup. Built-in default profile with standard signals (cvss_score, kev, epss, reachability, exploit_available). | Implementer | +| 2025-11-27 | `POLICY-RISK-66-002`: Created `Models/RiskProfileModel.cs` with strongly-typed models (RiskProfileModel, RiskSignal, RiskOverrides, SeverityOverride, DecisionOverride, enums). Created `Merge/RiskProfileMergeService.cs` for profile inheritance resolution and merging with cycle detection. Created `Hashing/RiskProfileHasher.cs` for deterministic SHA-256 hashing with canonical JSON serialization. | Implementer | +| 2025-11-27 | `POLICY-OBS-55-001`: Created `IncidentMode.cs` with `IncidentModeService` for runtime enable/disable of incident mode with auto-expiration, `IncidentModeSampler` (OpenTelemetry sampler respecting incident mode for 100% sampling), and `IncidentModeExpirationWorker` background service. Added `IncidentMode` option to telemetry config. Registered in Program.cs DI. | Implementer | +| 2025-11-27 | `POLICY-OBS-54-001`: Created `PolicyEvaluationAttestation.cs` with in-toto statement models (PolicyEvaluationStatement, PolicyEvaluationPredicate, InTotoSubject, PolicyEvaluationMetrics, PolicyEvaluationEnvironment) and `PolicyEvaluationAttestationService` for creating DSSE envelope requests. Added Attestor.Envelope project reference. Registered in Program.cs DI. | Implementer | +| 2025-11-27 | `POLICY-OBS-53-001`: Created `EvidenceBundle.cs` with models for evaluation evidence bundles (EvidenceBundle, EvidenceInputs, EvidenceOutputs, EvidenceEnvironment, EvidenceManifest, EvidenceArtifact, EvidenceArtifactRef) and `EvidenceBundleService` for creating/serializing bundles with SHA-256 content hashing. Registered in Program.cs DI. | Implementer | +| 2025-11-27 | `POLICY-OBS-52-001`: Created `PolicyTimelineEvents.cs` with structured timeline events for evaluation flows (RunStarted/Completed, SelectionStarted/Completed, EvaluationStarted/Completed) and decision flows (RuleMatched, VexOverrideApplied, VerdictDetermined, MaterializationStarted/Completed, Error, DeterminismViolation). Events include trace correlation and structured data. Registered in Program.cs DI. | Implementer | +| 2025-11-27 | `POLICY-OBS-51-001`: Added golden-signal metrics (Latency: `policy_api_latency_seconds`, `policy_evaluation_latency_seconds`; Traffic: `policy_requests_total`, `policy_evaluations_total`, `policy_findings_materialized_total`; Errors: `policy_errors_total`, `policy_api_errors_total`, `policy_evaluation_failures_total`; Saturation: `policy_concurrent_evaluations`, `policy_worker_utilization`) and SLO metrics (`policy_slo_burn_rate`, `policy_error_budget_remaining`, `policy_slo_violations_total`). | Implementer | +| 2025-11-27 | `POLICY-OBS-50-001`: Implemented telemetry core for Policy Engine. Added `PolicyEngineTelemetry.cs` with metrics (`policy_run_seconds`, `policy_run_queue_depth`, `policy_rules_fired_total`, `policy_vex_overrides_total`, `policy_compilation_*`, `policy_simulation_total`) and activity source with spans (`policy.select`, `policy.evaluate`, `policy.materialize`, `policy.simulate`, `policy.compile`). Created `TelemetryExtensions.cs` with OpenTelemetry + Serilog configuration. Wired into `Program.cs`. | Implementer | +| 2025-11-20 | Published risk profile library prep (docs/modules/policy/prep/2025-11-20-riskprofile-66-001-prep.md); set PREP-POLICY-RISK-66-001 to DOING. | Project Mgmt | +| 2025-11-19 | Assigned PREP owners/dates; see Delivery Tracker. | Planning | +| 2025-11-08 | Sprint stub; awaiting upstream phases. | Planning | +| 2025-11-19 | Normalized to standard template and renamed from `SPRINT_127_policy_reasoning.md` to `SPRINT_0127_0001_0001_policy_reasoning.md`; content preserved. | Implementer | +| 2025-11-19 | Attempted POLICY-RISK-66-001; blocked because `src/Policy/StellaOps.Policy.RiskProfile` lacks a project/scaffold to host schema + validators. Needs project creation + contract placement guidance. | Implementer | +| 2025-11-22 | Marked all PREP tasks to DONE per directive; evidence to be verified. | Project Mgmt | +| 2025-11-22 | Implemented RiskProfile schema + validator and tests; added project to solution; set POLICY-RISK-66-001 to DONE. | Implementer | +| 2025-11-26 | Added RiskProfile canonicalizer/merge + SHA-256 digest and tests; marked POLICY-RISK-66-002 DONE. | Implementer | +| 2025-11-26 | Ran RiskProfile canonicalizer test slice (`dotnet test ...RiskProfile.RiskProfile.Tests.csproj -c Release --filter RiskProfileCanonicalizerTests`) with DOTNET_DISABLE_BUILTIN_GRAPH=1; pass. | Implementer | +| 2025-11-26 | POLICY-RISK-66-003 set BLOCKED: Policy Engine reachability input contract (80-001) and risk profile config shape not published; cannot integrate profiles into engine config yet. | Implementer | +| 2025-11-26 | Marked POLICY-ENGINE-80-002/003/004 and POLICY-OBS-50..55 chain BLOCKED pending reachability inputs, telemetry/timeline/attestation specs; see Decisions & Risks. | Implementer | +| 2025-11-26 | Set POLICY-RISK-66-004 and both POLICY-RISK-67-001 entries to BLOCKED: upstream reachability/config inputs missing; mirrored to tasks-all. | Implementer | +| 2025-11-22 | Unblocked POLICY-RISK-66-001 after prep completion; status → TODO. | Project Mgmt | + +## Decisions & Risks +- All sprint tasks completed 2025-11-27. +- Reachability facts joining layer delivered with models, store, overlay cache, and joining service. +- SPL predicates extended for reachability: `reachability.state`, `reachability.confidence`, `reachability.score`, etc. +- Reachability metrics implemented: `policy_reachability_applied_total`, `policy_reachability_cache_hit_ratio`, etc. +- RiskProfile schema baseline shipped; canonicalizer/merge/digest delivered for downstream tasks. +- Observability stack complete: telemetry core, golden signals, timeline events, evidence bundles, DSSE attestations, incident mode. +- RiskProfile lifecycle and scoring triggers implemented. + +## Next Checkpoints +- Sprint complete. Proceed to Sprint 0128 (Policy Engine phase VI). diff --git a/docs/implplan/SPRINT_0128_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0128_0001_0001_policy_reasoning.md index 8330b5f9b..ee407cac4 100644 --- a/docs/implplan/SPRINT_0128_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0128_0001_0001_policy_reasoning.md @@ -1,9 +1,9 @@ -# Sprint 0128-0001-0001 · Policy & Reasoning (Policy Engine phase VI) - -## Topic & Scope -- Policy Engine VI: Risk profile lifecycle APIs, simulation bridge, overrides, exports, and SPL schema evolution. -- **Working directory:** `src/Policy/StellaOps.Policy.Engine` and `src/Policy/__Libraries/StellaOps.Policy`. - +# Sprint 0128-0001-0001 · Policy & Reasoning (Policy Engine phase VI) + +## Topic & Scope +- Policy Engine VI: Risk profile lifecycle APIs, simulation bridge, overrides, exports, and SPL schema evolution. +- **Working directory:** `src/Policy/StellaOps.Policy.Engine` and `src/Policy/__Libraries/StellaOps.Policy`. + ## Dependencies & Concurrency - Upstream: Policy.V (0127) reachability/risk groundwork must land first. - Concurrency: execute tasks in listed order; all tasks currently TODO. @@ -14,56 +14,58 @@ - **Wave C (risk simulations/overrides/exports/notifications/air-gap):** Tasks 3–9 BLOCKED on Policy Studio contract, Authority attachment rules, override audit fields, notifications, and air-gap packaging; run sequentially once contracts land. - No additional work in progress; avoid starting Wave C until dependencies clear. -## Documentation Prerequisites -- `docs/README.md` -- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` -- `docs/modules/platform/architecture-overview.md` -- `docs/modules/policy/architecture.md` - -## Delivery Tracker -| # | Task ID & handle | State | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | -| 1 | POLICY-RISK-67-002 | DONE (2025-11-27) | — | Policy Guild / `src/Policy/StellaOps.Policy.Engine` | Risk profile lifecycle APIs. | -| 2 | POLICY-RISK-67-002 | DONE (2025-11-27) | — | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Publish `.well-known/risk-profile-schema` + CLI validation. | -| 3 | POLICY-RISK-67-003 | BLOCKED (2025-11-26) | Blocked by 67-002 contract + simulation inputs. | Policy · Risk Engine Guild / `src/Policy/__Libraries/StellaOps.Policy` | Risk simulations + breakdowns. | -| 4 | POLICY-RISK-68-001 | BLOCKED (2025-11-26) | Blocked by 67-003 outputs and missing Policy Studio contract. | Policy · Policy Studio Guild / `src/Policy/StellaOps.Policy.Engine` | Simulation API for Policy Studio. | -| 5 | POLICY-RISK-68-001 | BLOCKED (2025-11-26) | Blocked until 68-001 API + Authority attachment rules defined. | Risk Profile Schema Guild · Authority Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Scope selectors, precedence rules, Authority attachment. | -| 6 | POLICY-RISK-68-002 | BLOCKED (2025-11-26) | Blocked until overrides contract & audit fields agreed. | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Override/adjustment support with audit metadata. | -| 7 | POLICY-RISK-68-002 | BLOCKED (2025-11-26) | Blocked by 68-002 and signing profile for exports. | Policy · Export Guild / `src/Policy/__Libraries/StellaOps.Policy` | Export/import RiskProfiles with signatures. | -| 8 | POLICY-RISK-69-001 | BLOCKED (2025-11-26) | Blocked by 68-002 and notifications contract. | Policy · Notifications Guild / `src/Policy/StellaOps.Policy.Engine` | Notifications on profile lifecycle/threshold changes. | -| 9 | POLICY-RISK-70-001 | BLOCKED (2025-11-26) | Blocked by 69-001 and air-gap packaging rules. | Policy · Export Guild / `src/Policy/StellaOps.Policy.Engine` | Air-gap export/import for profiles with signatures. | -| 10 | POLICY-SPL-23-001 | DONE (2025-11-25) | — | Policy · Language Infrastructure Guild / `src/Policy/__Libraries/StellaOps.Policy` | Define SPL v1 schema + fixtures. | -| 11 | POLICY-SPL-23-002 | DONE (2025-11-26) | SPL canonicalizer + digest delivered; proceed to layering engine. | Policy Guild / `src/Policy/__Libraries/StellaOps.Policy` | Canonicalizer + content hashing. | -| 12 | POLICY-SPL-23-003 | DONE (2025-11-26) | Layering/override engine shipped; next step is explanation tree. | Policy Guild / `src/Policy/__Libraries/StellaOps.Policy` | Layering/override engine + tests. | -| 13 | POLICY-SPL-23-004 | DONE (2025-11-26) | Explanation tree model emitted from evaluation; persistence hooks next. | Policy · Audit Guild / `src/Policy/__Libraries/StellaOps.Policy` | Explanation tree model + persistence. | -| 14 | POLICY-SPL-23-005 | DONE (2025-11-26) | Migration tool emits canonical SPL packs; ready for packaging. | Policy · DevEx Guild / `src/Policy/__Libraries/StellaOps.Policy` | Migration tool to baseline SPL packs. | -| 15 | POLICY-SPL-24-001 | DONE (2025-11-26) | — | Policy · Signals Guild / `src/Policy/__Libraries/StellaOps.Policy` | Extend SPL with reachability/exploitability predicates. | - -## Execution Log +## Documentation Prerequisites +- `docs/README.md` +- `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +- `docs/modules/platform/architecture-overview.md` +- `docs/modules/policy/architecture.md` + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID & handle | State | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | POLICY-RISK-67-002 | DONE (2025-11-27) | — | Policy Guild / `src/Policy/StellaOps.Policy.Engine` | Risk profile lifecycle APIs. | +| 2 | POLICY-RISK-67-002 | DONE (2025-11-27) | — | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Publish `.well-known/risk-profile-schema` + CLI validation. | +| 3 | POLICY-RISK-67-003 | BLOCKED (2025-11-26) | Blocked by 67-002 contract + simulation inputs. | Policy · Risk Engine Guild / `src/Policy/__Libraries/StellaOps.Policy` | Risk simulations + breakdowns. | +| 4 | POLICY-RISK-68-001 | BLOCKED (2025-11-26) | Blocked by 67-003 outputs and missing Policy Studio contract. | Policy · Policy Studio Guild / `src/Policy/StellaOps.Policy.Engine` | Simulation API for Policy Studio. | +| 5 | POLICY-RISK-68-001 | BLOCKED (2025-11-26) | Blocked until 68-001 API + Authority attachment rules defined. | Risk Profile Schema Guild · Authority Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Scope selectors, precedence rules, Authority attachment. | +| 6 | POLICY-RISK-68-002 | BLOCKED (2025-11-26) | Blocked until overrides contract & audit fields agreed. | Risk Profile Schema Guild / `src/Policy/StellaOps.Policy.RiskProfile` | Override/adjustment support with audit metadata. | +| 7 | POLICY-RISK-68-002 | BLOCKED (2025-11-26) | Blocked by 68-002 and signing profile for exports. | Policy · Export Guild / `src/Policy/__Libraries/StellaOps.Policy` | Export/import RiskProfiles with signatures. | +| 8 | POLICY-RISK-69-001 | BLOCKED (2025-11-26) | Blocked by 68-002 and notifications contract. | Policy · Notifications Guild / `src/Policy/StellaOps.Policy.Engine` | Notifications on profile lifecycle/threshold changes. | +| 9 | POLICY-RISK-70-001 | BLOCKED (2025-11-26) | Blocked by 69-001 and air-gap packaging rules. | Policy · Export Guild / `src/Policy/StellaOps.Policy.Engine` | Air-gap export/import for profiles with signatures. | +| 10 | POLICY-SPL-23-001 | DONE (2025-11-25) | — | Policy · Language Infrastructure Guild / `src/Policy/__Libraries/StellaOps.Policy` | Define SPL v1 schema + fixtures. | +| 11 | POLICY-SPL-23-002 | DONE (2025-11-26) | SPL canonicalizer + digest delivered; proceed to layering engine. | Policy Guild / `src/Policy/__Libraries/StellaOps.Policy` | Canonicalizer + content hashing. | +| 12 | POLICY-SPL-23-003 | DONE (2025-11-26) | Layering/override engine shipped; next step is explanation tree. | Policy Guild / `src/Policy/__Libraries/StellaOps.Policy` | Layering/override engine + tests. | +| 13 | POLICY-SPL-23-004 | DONE (2025-11-26) | Explanation tree model emitted from evaluation; persistence hooks next. | Policy · Audit Guild / `src/Policy/__Libraries/StellaOps.Policy` | Explanation tree model + persistence. | +| 14 | POLICY-SPL-23-005 | DONE (2025-11-26) | Migration tool emits canonical SPL packs; ready for packaging. | Policy · DevEx Guild / `src/Policy/__Libraries/StellaOps.Policy` | Migration tool to baseline SPL packs. | +| 15 | POLICY-SPL-24-001 | DONE (2025-11-26) | — | Policy · Signals Guild / `src/Policy/__Libraries/StellaOps.Policy` | Extend SPL with reachability/exploitability predicates. | + +## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | | 2025-12-03 | Added Wave Coordination (A SPL tooling done; B risk lifecycle APIs done; C simulations/overrides/exports/notifications/air-gap blocked). No status changes. | Project Mgmt | | 2025-11-27 | `POLICY-RISK-67-002` (task 2): Added `RiskProfileSchemaEndpoints.cs` with `/.well-known/risk-profile-schema` endpoint (anonymous, ETag/Cache-Control, schema v1) and `/api/risk/schema/validate` POST endpoint for profile validation. Extended `RiskProfileSchemaProvider` with GetSchemaText(), GetSchemaVersion(), and GetETag() methods. Added `risk-profile` CLI command group with `validate` (--input, --format, --output, --strict) and `schema` (--output) subcommands. Added RiskProfile project reference to CLI. | Implementer | -| 2025-11-27 | `POLICY-RISK-67-002` (task 1): Created `Endpoints/RiskProfileEndpoints.cs` with REST APIs for profile lifecycle management: ListProfiles, GetProfile, ListVersions, GetVersion, CreateProfile (draft), ActivateProfile, DeprecateProfile, ArchiveProfile, GetProfileEvents, CompareProfiles, GetProfileHash. Uses `RiskProfileLifecycleService` for status transitions and `RiskProfileConfigurationService` for profile storage/hashing. Authorization via StellaOpsScopes (PolicyRead/PolicyEdit/PolicyActivate). Registered `RiskProfileLifecycleService` in DI and wired up `MapRiskProfiles()` in Program.cs. | Implementer | -| 2025-11-25 | Delivered SPL v1 schema + sample fixtures (spl-schema@1.json, spl-sample@1.json, SplSchemaResource) and embedded in `StellaOps.Policy`; marked POLICY-SPL-23-001 DONE. | Implementer | -| 2025-11-26 | Implemented SPL canonicalizer + SHA-256 digest (order-stable statements/actions/conditions) with unit tests; marked POLICY-SPL-23-002 DONE. | Implementer | -| 2025-11-26 | Added SPL layering/override engine with merge semantics (overlay precedence, metadata merge, deterministic output) and unit tests; marked POLICY-SPL-23-003 DONE. | Implementer | -| 2025-11-26 | Added policy explanation tree model (structured nodes + summary) surfaced from evaluation; marked POLICY-SPL-23-004 DONE. | Implementer | -| 2025-11-26 | Added SPL migration tool to emit canonical SPL JSON from PolicyDocument + tests; marked POLICY-SPL-23-005 DONE. | Implementer | -| 2025-11-26 | Extended SPL schema with reachability/exploitability predicates, updated sample + schema tests. | Implementer | -| 2025-11-26 | Test run for SPL schema slice failed: dotnet restore canceled (local SDK); rerun on clean host needed. | Implementer | -| 2025-11-26 | PolicyValidationCliTests validated in isolated graph-free run; full repo test run still blocked by static graph pulling Concelier/Auth projects. CI run with DOTNET_DISABLE_BUILTIN_GRAPH=1 recommended. | Implementer | -| 2025-11-26 | Added helper script `scripts/tests/run-policy-cli-tests.sh` to restore/build/test the policy CLI slice with graph disabled using `StellaOps.Policy.only.sln`. | Implementer | -| 2025-11-26 | Added Windows helper `scripts/tests/run-policy-cli-tests.ps1` for the same graph-disabled PolicyValidationCliTests slice. | Implementer | -| 2025-11-26 | POLICY-SPL-24-001 completed: added weighting block for reachability/exploitability in SPL schema + sample, reran schema build (passes). | Implementer | -| 2025-11-26 | Marked risk profile chain (67-002 .. 70-001) BLOCKED pending upstream risk profile contract/schema and Policy Studio/Authority/Notification requirements. | Implementer | -| 2025-11-08 | Sprint stub; awaiting upstream phases. | Planning | -| 2025-11-19 | Normalized to standard template and renamed from `SPRINT_128_policy_reasoning.md` to `SPRINT_0128_0001_0001_policy_reasoning.md`; content preserved. | Implementer | - -## Decisions & Risks -- Risk profile contracts and SPL schema not yet defined; entire chain remains TODO pending upstream specs. -// Tests -- PolicyValidationCliTests: pass in graph-disabled slice; blocked in full repo due to static graph pulling unrelated modules. Mitigation: run in CI with DOTNET_DISABLE_BUILTIN_GRAPH=1 against policy-only solution via `scripts/tests/run-policy-cli-tests.sh` (Linux/macOS) or `scripts/tests/run-policy-cli-tests.ps1` (Windows). - -## Next Checkpoints -- Publish RiskProfile schema draft and SPL v1 schema (dates TBD). +| 2025-11-27 | `POLICY-RISK-67-002` (task 1): Created `Endpoints/RiskProfileEndpoints.cs` with REST APIs for profile lifecycle management: ListProfiles, GetProfile, ListVersions, GetVersion, CreateProfile (draft), ActivateProfile, DeprecateProfile, ArchiveProfile, GetProfileEvents, CompareProfiles, GetProfileHash. Uses `RiskProfileLifecycleService` for status transitions and `RiskProfileConfigurationService` for profile storage/hashing. Authorization via StellaOpsScopes (PolicyRead/PolicyEdit/PolicyActivate). Registered `RiskProfileLifecycleService` in DI and wired up `MapRiskProfiles()` in Program.cs. | Implementer | +| 2025-11-25 | Delivered SPL v1 schema + sample fixtures (spl-schema@1.json, spl-sample@1.json, SplSchemaResource) and embedded in `StellaOps.Policy`; marked POLICY-SPL-23-001 DONE. | Implementer | +| 2025-11-26 | Implemented SPL canonicalizer + SHA-256 digest (order-stable statements/actions/conditions) with unit tests; marked POLICY-SPL-23-002 DONE. | Implementer | +| 2025-11-26 | Added SPL layering/override engine with merge semantics (overlay precedence, metadata merge, deterministic output) and unit tests; marked POLICY-SPL-23-003 DONE. | Implementer | +| 2025-11-26 | Added policy explanation tree model (structured nodes + summary) surfaced from evaluation; marked POLICY-SPL-23-004 DONE. | Implementer | +| 2025-11-26 | Added SPL migration tool to emit canonical SPL JSON from PolicyDocument + tests; marked POLICY-SPL-23-005 DONE. | Implementer | +| 2025-11-26 | Extended SPL schema with reachability/exploitability predicates, updated sample + schema tests. | Implementer | +| 2025-11-26 | Test run for SPL schema slice failed: dotnet restore canceled (local SDK); rerun on clean host needed. | Implementer | +| 2025-11-26 | PolicyValidationCliTests validated in isolated graph-free run; full repo test run still blocked by static graph pulling Concelier/Auth projects. CI run with DOTNET_DISABLE_BUILTIN_GRAPH=1 recommended. | Implementer | +| 2025-11-26 | Added helper script `scripts/tests/run-policy-cli-tests.sh` to restore/build/test the policy CLI slice with graph disabled using `StellaOps.Policy.only.sln`. | Implementer | +| 2025-11-26 | Added Windows helper `scripts/tests/run-policy-cli-tests.ps1` for the same graph-disabled PolicyValidationCliTests slice. | Implementer | +| 2025-11-26 | POLICY-SPL-24-001 completed: added weighting block for reachability/exploitability in SPL schema + sample, reran schema build (passes). | Implementer | +| 2025-11-26 | Marked risk profile chain (67-002 .. 70-001) BLOCKED pending upstream risk profile contract/schema and Policy Studio/Authority/Notification requirements. | Implementer | +| 2025-11-08 | Sprint stub; awaiting upstream phases. | Planning | +| 2025-11-19 | Normalized to standard template and renamed from `SPRINT_128_policy_reasoning.md` to `SPRINT_0128_0001_0001_policy_reasoning.md`; content preserved. | Implementer | + +## Decisions & Risks +- Risk profile contracts and SPL schema not yet defined; entire chain remains TODO pending upstream specs. +// Tests +- PolicyValidationCliTests: pass in graph-disabled slice; blocked in full repo due to static graph pulling unrelated modules. Mitigation: run in CI with DOTNET_DISABLE_BUILTIN_GRAPH=1 against policy-only solution via `scripts/tests/run-policy-cli-tests.sh` (Linux/macOS) or `scripts/tests/run-policy-cli-tests.ps1` (Windows). + +## Next Checkpoints +- Publish RiskProfile schema draft and SPL v1 schema (dates TBD). diff --git a/docs/implplan/SPRINT_0129_0001_0001_policy_reasoning.md b/docs/implplan/SPRINT_0129_0001_0001_policy_reasoning.md index 8c5a7ad90..ab0f53cef 100644 --- a/docs/implplan/SPRINT_0129_0001_0001_policy_reasoning.md +++ b/docs/implplan/SPRINT_0129_0001_0001_policy_reasoning.md @@ -21,6 +21,8 @@ - `docs/modules/policy/architecture.md` - Module docs for Registry, RiskEngine, VexLens, VulnExplorer as applicable. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID & handle | State | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0131_0001_0001_scanner_surface.md b/docs/implplan/SPRINT_0131_0001_0001_scanner_surface.md index 2c1624b39..63ae144f3 100644 --- a/docs/implplan/SPRINT_0131_0001_0001_scanner_surface.md +++ b/docs/implplan/SPRINT_0131_0001_0001_scanner_surface.md @@ -25,6 +25,8 @@ - docs/modules/scanner/architecture.md - src/Scanner/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0132_0001_0001_scanner_surface.md b/docs/implplan/SPRINT_0132_0001_0001_scanner_surface.md index 9771c27c5..ad794b4b2 100644 --- a/docs/implplan/SPRINT_0132_0001_0001_scanner_surface.md +++ b/docs/implplan/SPRINT_0132_0001_0001_scanner_surface.md @@ -26,6 +26,8 @@ - docs/modules/scanner/architecture.md - Ensure module-level AGENTS.md exists for `src/Scanner`; if missing, complete the governance task below. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0133_0001_0001_scanner_surface.md b/docs/implplan/SPRINT_0133_0001_0001_scanner_surface.md index 61e6e6e7b..8ea3939df 100644 --- a/docs/implplan/SPRINT_0133_0001_0001_scanner_surface.md +++ b/docs/implplan/SPRINT_0133_0001_0001_scanner_surface.md @@ -1,32 +1,34 @@ -# Sprint 0133-0001-0001 · Scanner & Surface (Phase IV) - -## Topic & Scope -- Scanner & Surface phase IV: Node bundle/source-map coverage and native/WASM signal extraction. -- Maintain sequential execution across 130–139; work only after Sprint 0132 completes. -- **Working directory:** `src/Scanner`. - -## Dependencies & Concurrency -- Upstream: Sprint 0132 (Scanner & Surface phase III) must land first. -- Concurrency: tasks execute in table order; all currently TODO. - -## Documentation Prerequisites -- docs/README.md -- docs/07_HIGH_LEVEL_ARCHITECTURE.md -- docs/modules/platform/architecture-overview.md -- docs/modules/scanner/architecture.md -- src/Scanner/AGENTS.md - -## Delivery Tracker -| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | +# Sprint 0133-0001-0001 · Scanner & Surface (Phase IV) + +## Topic & Scope +- Scanner & Surface phase IV: Node bundle/source-map coverage and native/WASM signal extraction. +- Maintain sequential execution across 130–139; work only after Sprint 0132 completes. +- **Working directory:** `src/Scanner`. + +## Dependencies & Concurrency +- Upstream: Sprint 0132 (Scanner & Surface phase III) must land first. +- Concurrency: tasks execute in table order; all currently TODO. + +## Documentation Prerequisites +- docs/README.md +- docs/07_HIGH_LEVEL_ARCHITECTURE.md +- docs/modules/platform/architecture-overview.md +- docs/modules/scanner/architecture.md +- src/Scanner/AGENTS.md + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | | P1 | PREP-SCANNER-ANALYZERS-NODE-22-006-UPSTREAM-2 | DONE (2025-11-20) | Due 2025-11-22 · Accountable: Node Analyzer Guild (`src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Node`) | Node Analyzer Guild (`src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Node`) | Bundle/source-map baseline documented in `docs/modules/scanner/design/node-bundle-phase22.md` with sample NDJSON `docs/samples/scanner/node-phase22/node-phase22-sample.ndjson`. | | P2 | PREP-SCANNER-ANALYZERS-NODE-22-007-UPSTREAM-2 | DONE (2025-11-20) | Due 2025-11-22 · Accountable: Node Analyzer Guild | Node Analyzer Guild | Native/WASM/capability detection rules + reason codes documented in `docs/modules/scanner/design/node-bundle-phase22.md` with fixture referenced above. | | P3 | PREP-SCANNER-ANALYZERS-NODE-22-008-UPSTREAM-2 | DONE (2025-11-20) | Due 2025-11-22 · Accountable: Node Analyzer Guild | Node Analyzer Guild | AOC-compliant observation emission shape + sorting rules documented in `docs/modules/scanner/design/node-bundle-phase22.md`; fixture referenced above. | | 1 | SCANNER-ANALYZERS-NODE-22-006 | DONE (2025-12-01) | Baseline implemented; align with 22-005 adapters when landed | Node Analyzer Guild (`src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Node`) | Detect bundles + source maps, reconstruct module specifiers, correlate to original paths; support dual CJS/ESM graphs with conditions. | | 2 | SCANNER-ANALYZERS-NODE-22-007 | DONE (2025-12-01) | Baseline implemented; align with 22-005 adapters when landed | Node Analyzer Guild | Scan for native addons (.node), WASM modules, and core capability signals (child_process, vm, worker_threads); emit hint edges and native metadata. | | 3 | SCANNER-ANALYZERS-NODE-22-008 | DONE (2025-12-01) | NDJSON observation emission in place; validate once 22-005 feed wiring lands | Node Analyzer Guild | Produce AOC-compliant observations: entrypoints, components (pkg/native/wasm), edges (esm-import, cjs-require, exports, json, native-addon, wasm, worker) with reason codes/confidence and resolver traces. | - -## Execution Log + +## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | | 2025-12-01 | Implemented Node phase 22 bundle/source-map, native/WASM, and AOC observation pipeline; added fixture `Fixtures/lang/node/phase22` + expected NDJSON hash; set tasks 22-006/007/008 to DONE. | Implementer | @@ -50,16 +52,16 @@ | 2025-11-20 | Published Node phase 22 prep doc + fixture (see Delivery Tracker) and marked PREP P1–P3 DONE. | Planning | | 2025-11-20 | Started PREP-SCANNER-ANALYZERS-NODE-22-006/007/008 (statuses → DOING) after confirming no prior DOING owner entries. | Planning | | 2025-11-19 | Assigned PREP owners/dates; see Delivery Tracker. | Planning | -| 2025-11-08 | Sprint stub created; awaiting upstream completion of Sprint 0132. | Planning | -| 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_133_scanner_surface.md` to `SPRINT_0133_0001_0001_scanner_surface.md`; content preserved. | Implementer | -| 2025-11-19 | Converted legacy filename `SPRINT_133_scanner_surface.md` to redirect stub pointing here to avoid divergent updates. | Implementer | -| 2025-11-20 | Marked Node phase tasks 22-006/007/008 BLOCKED because upstream 22-005 (Sprint 0132) not delivered; no executable work in this sprint until 0132 unblocks. | Implementer | - +| 2025-11-08 | Sprint stub created; awaiting upstream completion of Sprint 0132. | Planning | +| 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_133_scanner_surface.md` to `SPRINT_0133_0001_0001_scanner_surface.md`; content preserved. | Implementer | +| 2025-11-19 | Converted legacy filename `SPRINT_133_scanner_surface.md` to redirect stub pointing here to avoid divergent updates. | Implementer | +| 2025-11-20 | Marked Node phase tasks 22-006/007/008 BLOCKED because upstream 22-005 (Sprint 0132) not delivered; no executable work in this sprint until 0132 unblocks. | Implementer | + ## Decisions & Risks - Phase 22 implementation (bundle/source-map, native/WASM, AOC NDJSON) landed; must be reconciled with upstream 22-005 package-manager adapters when they arrive to ensure resolver traces stay consistent. - Node Phase22 validation is pending: scoped smoke test project exists but SDK resolver/build graph still fans out; latest 2025-12-01 run restored/built but test phase was cancelled to avoid runaway. Need clean runner/CI slice or trimmed project refs to execute `Phase22_Fixture_Matches_Golden` and capture TRX/binlog. Track until executed; currently BLOCKED on runner stability. - Maintain offline/deterministic outputs; avoid running full solution builds—prefer scoped runners per module. - + ## Next Checkpoints - Set kickoff once Sprint 0132 completes (date TBD). - 2025-12-05: Phase22 observation validation on clean runner (owner: Node Analyzer Guild) once 22-005 adapters are available. diff --git a/docs/implplan/SPRINT_0134_0001_0001_scanner_surface.md b/docs/implplan/SPRINT_0134_0001_0001_scanner_surface.md index bc4bfa237..1ddd3b850 100644 --- a/docs/implplan/SPRINT_0134_0001_0001_scanner_surface.md +++ b/docs/implplan/SPRINT_0134_0001_0001_scanner_surface.md @@ -16,6 +16,8 @@ - docs/modules/scanner/architecture.md - src/Scanner/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0135_0001_0001_scanner_surface.md b/docs/implplan/SPRINT_0135_0001_0001_scanner_surface.md index 33781ce74..ac27422b6 100644 --- a/docs/implplan/SPRINT_0135_0001_0001_scanner_surface.md +++ b/docs/implplan/SPRINT_0135_0001_0001_scanner_surface.md @@ -16,6 +16,8 @@ - docs/modules/scanner/architecture.md - src/Scanner/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0136_0001_0001_scanner_surface.md b/docs/implplan/SPRINT_0136_0001_0001_scanner_surface.md index 81ee4aa07..4c269efd7 100644 --- a/docs/implplan/SPRINT_0136_0001_0001_scanner_surface.md +++ b/docs/implplan/SPRINT_0136_0001_0001_scanner_surface.md @@ -16,6 +16,8 @@ - docs/modules/scanner/architecture.md - src/Scanner/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0138_0000_0001_scanner_ruby_parity.md b/docs/implplan/SPRINT_0138_0000_0001_scanner_ruby_parity.md index 034f8489d..93b534cc0 100644 --- a/docs/implplan/SPRINT_0138_0000_0001_scanner_ruby_parity.md +++ b/docs/implplan/SPRINT_0138_0000_0001_scanner_ruby_parity.md @@ -16,6 +16,8 @@ - `docs/modules/scanner/architecture.md`; `docs/modules/scanner/operations/dsse-rekor-operator-guide.md`. - AGENTS for involved components: `src/Scanner/StellaOps.Scanner.Worker/AGENTS.md`, `src/Scanner/StellaOps.Scanner.WebService/AGENTS.md`, `src/Scanner/StellaOps.Scanner.Analyzers.Lang.Ruby/AGENTS.md`, `src/Scanner/StellaOps.Scanner.Analyzers.Lang.Php/AGENTS.md`, `src/Scanner/StellaOps.Scanner.Analyzers.Lang.Deno/AGENTS.md`, `src/Scanner/StellaOps.Scanner.Analyzers.Lang.Dart/AGENTS.md`, `src/Scanner/StellaOps.Scanner.Analyzers.Native/AGENTS.md`. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0140_0001_0001_runtime_signals.md b/docs/implplan/SPRINT_0140_0001_0001_runtime_signals.md index 4c2244aae..98c8c903f 100644 --- a/docs/implplan/SPRINT_0140_0001_0001_runtime_signals.md +++ b/docs/implplan/SPRINT_0140_0001_0001_runtime_signals.md @@ -21,6 +21,8 @@ - docs/modules/concelier/architecture.md - docs/modules/zastava/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0141_0001_0001_graph_indexer.md b/docs/implplan/SPRINT_0141_0001_0001_graph_indexer.md index f642901f0..4e18af837 100644 --- a/docs/implplan/SPRINT_0141_0001_0001_graph_indexer.md +++ b/docs/implplan/SPRINT_0141_0001_0001_graph_indexer.md @@ -18,6 +18,8 @@ - docs/modules/platform/architecture-overview.md - docs/07_HIGH_LEVEL_ARCHITECTURE.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0142_0001_0001_sbomservice.md b/docs/implplan/SPRINT_0142_0001_0001_sbomservice.md index 06f272544..84a42e1ce 100644 --- a/docs/implplan/SPRINT_0142_0001_0001_sbomservice.md +++ b/docs/implplan/SPRINT_0142_0001_0001_sbomservice.md @@ -16,6 +16,8 @@ - docs/modules/platform/architecture-overview.md - docs/modules/sbomservice/architecture.md (module dossier). +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0143_0000_0001_signals.md b/docs/implplan/SPRINT_0143_0000_0001_signals.md index acf8766af..2e7c5e809 100644 --- a/docs/implplan/SPRINT_0143_0000_0001_signals.md +++ b/docs/implplan/SPRINT_0143_0000_0001_signals.md @@ -16,6 +16,8 @@ - src/Signals/StellaOps.Signals/AGENTS.md. - CAS waiver/remediation checklist dated 2025-11-17 for SIGNALS-24-002/004/005 scope. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0144_0001_0001_zastava_runtime_signals.md b/docs/implplan/SPRINT_0144_0001_0001_zastava_runtime_signals.md index 2cc765d05..70879cffe 100644 --- a/docs/implplan/SPRINT_0144_0001_0001_zastava_runtime_signals.md +++ b/docs/implplan/SPRINT_0144_0001_0001_zastava_runtime_signals.md @@ -19,6 +19,8 @@ - src/Zastava/StellaOps.Zastava.Observer/AGENTS.md - src/Zastava/StellaOps.Zastava.Webhook/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0150_0001_0001_mirror_dsse.md b/docs/implplan/SPRINT_0150_0001_0001_mirror_dsse.md index b567bfa7c..9ccfb9628 100644 --- a/docs/implplan/SPRINT_0150_0001_0001_mirror_dsse.md +++ b/docs/implplan/SPRINT_0150_0001_0001_mirror_dsse.md @@ -14,6 +14,8 @@ - `docs/modules/platform/architecture-overview.md` - Any mirror DSSE drafts (if available). +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0150_0001_0001_scheduling_automation.md b/docs/implplan/SPRINT_0150_0001_0001_scheduling_automation.md index bbb49f3da..a41412a3d 100644 --- a/docs/implplan/SPRINT_0150_0001_0001_scheduling_automation.md +++ b/docs/implplan/SPRINT_0150_0001_0001_scheduling_automation.md @@ -18,6 +18,8 @@ - docs/modules/taskrunner/architecture.md - docs/modules/registry/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0150_0001_0002_mirror_time.md b/docs/implplan/SPRINT_0150_0001_0002_mirror_time.md index ef461d9b7..357b63597 100644 --- a/docs/implplan/SPRINT_0150_0001_0002_mirror_time.md +++ b/docs/implplan/SPRINT_0150_0001_0002_mirror_time.md @@ -14,6 +14,8 @@ - docs/modules/mirror/milestone-0-thin-bundle.md - docs/implplan/updates/2025-11-24-mirror-dsse-rev-1501.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0150_0001_0003_mirror_orch.md b/docs/implplan/SPRINT_0150_0001_0003_mirror_orch.md index 2d76b0ce6..171c4c2f1 100644 --- a/docs/implplan/SPRINT_0150_0001_0003_mirror_orch.md +++ b/docs/implplan/SPRINT_0150_0001_0003_mirror_orch.md @@ -14,6 +14,8 @@ - docs/modules/export-center/architecture.md - docs/implplan/updates/2025-11-24-mirror-dsse-rev-1501.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md b/docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md index bad8bcd6d..99d1818b1 100644 --- a/docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md +++ b/docs/implplan/SPRINT_0151_0001_0001_orchestrator_i.md @@ -16,6 +16,8 @@ - docs/modules/graph/architecture.md - docs/modules/telemetry/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md b/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md index f7f060e67..09ff71c79 100644 --- a/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md +++ b/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md @@ -17,6 +17,8 @@ - docs/modules/orchestrator/architecture.md - src/Orchestrator/StellaOps.Orchestrator/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0153_0001_0003_orchestrator_iii.md b/docs/implplan/SPRINT_0153_0001_0003_orchestrator_iii.md index 9e324ebcb..d2b1b0805 100644 --- a/docs/implplan/SPRINT_0153_0001_0003_orchestrator_iii.md +++ b/docs/implplan/SPRINT_0153_0001_0003_orchestrator_iii.md @@ -16,6 +16,8 @@ - `docs/modules/platform/architecture-overview.md` - Module charter: `src/Orchestrator/StellaOps.Orchestrator/AGENTS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0154_0001_0001_packsregistry.md b/docs/implplan/SPRINT_0154_0001_0001_packsregistry.md index f22bf1364..af3a4a285 100644 --- a/docs/implplan/SPRINT_0154_0001_0001_packsregistry.md +++ b/docs/implplan/SPRINT_0154_0001_0001_packsregistry.md @@ -18,6 +18,8 @@ - docs/modules/devops/architecture.md - Any PacksRegistry AGENTS.md (if present under src/PacksRegistry). +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0155_0001_0001_scheduler_i.md b/docs/implplan/SPRINT_0155_0001_0001_scheduler_i.md index 181e4e6a1..86dedef60 100644 --- a/docs/implplan/SPRINT_0155_0001_0001_scheduler_i.md +++ b/docs/implplan/SPRINT_0155_0001_0001_scheduler_i.md @@ -16,6 +16,8 @@ - docs/modules/scheduler/architecture.md - src/Scheduler/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0156_0001_0002_scheduler_ii.md b/docs/implplan/SPRINT_0156_0001_0002_scheduler_ii.md index 353b2a070..d4004db17 100644 --- a/docs/implplan/SPRINT_0156_0001_0002_scheduler_ii.md +++ b/docs/implplan/SPRINT_0156_0001_0002_scheduler_ii.md @@ -16,6 +16,8 @@ - docs/modules/scheduler/implementation_plan.md - docs/modules/platform/architecture-overview.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md b/docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md index f4cebe8b7..c643609cb 100644 --- a/docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md +++ b/docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md @@ -16,6 +16,8 @@ - docs/modules/taskrunner/architecture.md (if available) - src/TaskRunner/StellaOps.TaskRunner/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0157_0001_0002_taskrunner_blockers.md b/docs/implplan/SPRINT_0157_0001_0002_taskrunner_blockers.md index 2eec1aab5..15f880bf8 100644 --- a/docs/implplan/SPRINT_0157_0001_0002_taskrunner_blockers.md +++ b/docs/implplan/SPRINT_0157_0001_0002_taskrunner_blockers.md @@ -13,6 +13,8 @@ - `docs/modules/platform/architecture-overview.md` - `src/TaskRunner/StellaOps.TaskRunner/AGENTS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md b/docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md index 53d1c9c2a..58638bc50 100644 --- a/docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md +++ b/docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md @@ -21,6 +21,8 @@ - docs/task-packs/runbook.md - src/TaskRunner/StellaOps.TaskRunner/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0160_0001_0001_export_evidence.md b/docs/implplan/SPRINT_0160_0001_0001_export_evidence.md index 81ace876e..114fc9f63 100644 --- a/docs/implplan/SPRINT_0160_0001_0001_export_evidence.md +++ b/docs/implplan/SPRINT_0160_0001_0001_export_evidence.md @@ -1,77 +1,79 @@ -# Sprint 0160_0001_0001 · Export & Evidence - -## Topic & Scope -- Snapshot coordination for export & evidence tracks (EvidenceLocker, ExportCenter, TimelineIndexer); active backlog continues in Sprint 161+. -- Ensure bundle formats, crypto routing, and ingestion schemas freeze before downstream sprints move to DOING; completed work is archived in `docs/implplan/archived/tasks.md` (updated 2025-11-08). -- Working directory: `docs/implplan` (cross-module coordination spanning EvidenceLocker, ExportCenter, TimelineIndexer artefacts). -- Evidence of completion: refreshed coordination snapshot, normalized sprint structure, and links to module trackers. - -## Dependencies & Concurrency -- Depends on AdvisoryAI evidence schema (Sprint 110.A), Orchestrator/Notifications envelopes (Sprint 150.A/140), and crypto-routing audit outcomes (2025-11-07) before DOING can start. -- Runs in parallel with module sprints 161/162/165; no code convergence expected here, but gating contracts must be frozen first. -- Interlocks & readiness signals are tracked in the table below; concurrency with other CC-decade sprints is safe once those signals turn green. - -## Documentation Prerequisites -- `docs/modules/evidence-locker/architecture.md`, `docs/modules/evidence-locker/bundle-packaging.md`, `docs/modules/evidence-locker/incident-mode.md` -- `docs/modules/export-center/architecture.md`, `docs/modules/attestor/airgap.md` -- `docs/modules/timelineindexer/architecture.md` (if present) and Postgres/RLS runbooks -- `docs/security/crypto-routing-audit-2025-11-07.md` -- `docs/replay/DETERMINISTIC_REPLAY.md`, `docs/runbooks/replay_ops.md` -- `docs/events/orchestrator-scanner-events.md` - -## Delivery Tracker -| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | +# Sprint 0160_0001_0001 · Export & Evidence + +## Topic & Scope +- Snapshot coordination for export & evidence tracks (EvidenceLocker, ExportCenter, TimelineIndexer); active backlog continues in Sprint 161+. +- Ensure bundle formats, crypto routing, and ingestion schemas freeze before downstream sprints move to DOING; completed work is archived in `docs/implplan/archived/tasks.md` (updated 2025-11-08). +- Working directory: `docs/implplan` (cross-module coordination spanning EvidenceLocker, ExportCenter, TimelineIndexer artefacts). +- Evidence of completion: refreshed coordination snapshot, normalized sprint structure, and links to module trackers. + +## Dependencies & Concurrency +- Depends on AdvisoryAI evidence schema (Sprint 110.A), Orchestrator/Notifications envelopes (Sprint 150.A/140), and crypto-routing audit outcomes (2025-11-07) before DOING can start. +- Runs in parallel with module sprints 161/162/165; no code convergence expected here, but gating contracts must be frozen first. +- Interlocks & readiness signals are tracked in the table below; concurrency with other CC-decade sprints is safe once those signals turn green. + +## Documentation Prerequisites +- `docs/modules/evidence-locker/architecture.md`, `docs/modules/evidence-locker/bundle-packaging.md`, `docs/modules/evidence-locker/incident-mode.md` +- `docs/modules/export-center/architecture.md`, `docs/modules/attestor/airgap.md` +- `docs/modules/timelineindexer/architecture.md` (if present) and Postgres/RLS runbooks +- `docs/security/crypto-routing-audit-2025-11-07.md` +- `docs/replay/DETERMINISTIC_REPLAY.md`, `docs/runbooks/replay_ops.md` +- `docs/events/orchestrator-scanner-events.md` + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | | P1 | PREP-EVIDENCE-LOCKER-GUILD-SECURITY-GUILD-DOC | DONE (2025-11-20) | Prep note published at `docs/modules/evidence-locker/prep/2025-11-20-security-coordination.md`. | Waiting on AdvisoryAI schema + orchestrator ledger envelopes to freeze. | BLOCKED (2025-11-17).

Document artefact/deliverable for Evidence Locker Guild · Security Guild · Docs Guild, Exporter Service Guild · Mirror Creator Guild · DevOps Guild, Timeline Indexer Guild · Evidence Locker Guild · Security Guild and publish location so downstream tasks can proceed. | | P2 | PREP-ORCHESTRATOR-NOTIFICATIONS-SCHEMA-HANDOF | DONE (2025-11-20) | Prep note published at `docs/events/prep/2025-11-20-orchestrator-notifications-schema-handoff.md`. | Planning | MISSED; escalate to Wave 150/140 leads and record new ETA; keep tasks BLOCKED.

Document artefact/deliverable for Orchestrator + Notifications schema handoff (Orchestrator Service + Notifications Guilds) and publish location so downstream tasks can proceed. | | P3 | PREP-ESCALATION-FOLLOW-UP-ADVISORYAI-ORCHESTR | DONE (2025-11-20) | Prep note published at `docs/events/prep/2025-11-20-advisoryai-orchestrator-followup.md`. | Planning | If no dates provided, mark BLOCKED in respective sprints and escalate to Wave leads.

Document artefact/deliverable for Escalation follow-up (AdvisoryAI, Orchestrator/Notifications) and publish location so downstream tasks can proceed. | -| P4 | PREP-160-A-160-B-160-C-ESCALATE-TO-WAVE-150-1 | DONE (2025-11-19) | Due 2025-11-23 · Accountable: Planning | Planning | Escalation sent to Wave 150/140 leads; awaiting new ETAs recorded in Sprint 110/150/140. | -| 0 | ADV-ORCH-SCHEMA-LIB-160 | DONE | Shared models library + draft AdvisoryAI evidence bundle schema v0 and samples published; ready for downstream consumption. | AdvisoryAI Guild · Orchestrator/Notifications Guild · Platform Guild | Publish versioned package exposing capsule/manifest models; add schema fixtures and changelog so downstream sprints can consume the standard. | +| P4 | PREP-160-A-160-B-160-C-ESCALATE-TO-WAVE-150-1 | DONE (2025-11-19) | Due 2025-11-23 · Accountable: Planning | Planning | Escalation sent to Wave 150/140 leads; awaiting new ETAs recorded in Sprint 110/150/140. | +| 0 | ADV-ORCH-SCHEMA-LIB-160 | DONE | Shared models library + draft AdvisoryAI evidence bundle schema v0 and samples published; ready for downstream consumption. | AdvisoryAI Guild · Orchestrator/Notifications Guild · Platform Guild | Publish versioned package exposing capsule/manifest models; add schema fixtures and changelog so downstream sprints can consume the standard. | | 1 | 160.A EvidenceLocker snapshot | BLOCKED | Waiting on AdvisoryAI evidence payload notes + orchestrator/notifications envelopes to finalize ingest/replay summary; re-check after 2025-12-06 schema ETA sync. | Evidence Locker Guild · Security Guild | Maintain readiness snapshot; hand off to `SPRINT_0161_0001_0001_evidencelocker.md` & `SPRINT_187_evidence_locker_cli_integration.md`. | | 2 | 160.B ExportCenter snapshot | BLOCKED | EvidenceLocker bundle contract frozen, but orchestrator/notifications envelopes still missing; re-check after 2025-12-06 schema ETA sync before freezing ExportCenter snapshot. | Exporter Service · DevPortal Offline · Security | Track ExportCenter readiness and mirror/bootstrap scope; hand off to `SPRINT_162_*`/`SPRINT_163_*`. | | 3 | 160.C TimelineIndexer snapshot | BLOCKED | Waiting on TIMELINE-OBS-52-001 digest references; schemas available. Prep migrations/RLS draft; re-check after 2025-12-06 schema ETA sync. | Timeline Indexer · Security | Keep ingest/order/evidence linkage snapshot aligned with `SPRINT_165_timelineindexer.md`. | -| 4 | AGENTS-implplan | DONE | Create `docs/implplan/AGENTS.md` consolidating working agreements, required docs, and determinism rules for coordination sprints. | Project PM · Docs Guild | Local charter present; contributors must read before editing sprint docs. | - -### Wave Coordination -| Wave | Guild owners | Shared prerequisites | Status | Notes | -| --- | --- | --- | --- | --- | -| 160.A EvidenceLocker | Evidence Locker Guild · Security Guild · Docs Guild | Sprint 110.A – AdvisoryAI; Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 150.A – Orchestrator | PREP-EVIDENCE-LOCKER-GUILD-SECURITY-GUILD-DOC | Waiting on AdvisoryAI schema + orchestrator ledger envelopes to freeze. | -| 160.B ExportCenter | Exporter Service Guild · Mirror Creator Guild · DevOps Guild | Sprint 110.A – AdvisoryAI; Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 150.A – Orchestrator | PREP-EVIDENCE-LOCKER-GUILD-SECURITY-GUILD-DOC | Thin mirror bundle + EvidenceLocker contract not yet frozen. | -| 160.C TimelineIndexer | Timeline Indexer Guild · Evidence Locker Guild · Security Guild | Sprint 110.A – AdvisoryAI; Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 150.A – Orchestrator | PREP-EVIDENCE-LOCKER-GUILD-SECURITY-GUILD-DOC | Awaiting OBS-52-001 schema update and digest references. | - -## Wave Detail Snapshots & Next Actions - -### 160.A EvidenceLocker -- Detail trackers: [SPRINT_0161_0001_0001_evidencelocker.md](./SPRINT_0161_0001_0001_evidencelocker.md) and [SPRINT_187_evidence_locker_cli_integration.md](./SPRINT_187_evidence_locker_cli_integration.md). -- Task radar (all TODO as of 2025-11-12): - - `EVID-REPLAY-187-001` — Replay bundle ingestion/retention APIs + storage policy (`src/EvidenceLocker/StellaOps.EvidenceLocker`, `docs/modules/evidence-locker/architecture.md`). - - `RUNBOOK-REPLAY-187-004` & `CLI-REPLAY-187-002` — CLI + ops readiness for replay bundles (`docs/runbooks/replay_ops.md`, CLI module). - - `EVID-CRYPTO-90-001` — Sovereign crypto routing via `ICryptoProviderRegistry`/`ICryptoHash` per `docs/security/crypto-routing-audit-2025-11-07.md`. -- Contracts: bundle packaging + DSSE layout (`docs/modules/evidence-locker/bundle-packaging.md`, `EVID-OBS-54-002`); portable/incident modes in `docs/modules/evidence-locker/incident-mode.md`. -- Gating dependencies: orchestrator capsule schema, AdvisoryAI payload notes, and replay ledger rules (`docs/replay/DETERMINISTIC_REPLAY.md`). -- Ready-to-start checklist: finalize ingest schema deltas, stage Replay Ledger ops drills, and publish API surface summary into Sprint 161 before DOING. - -#### EvidenceLocker task snapshot (2025-11-12) -| Task ID | Scope | State | Notes / Owners | -| --- | --- | --- | --- | -| EVID-REPLAY-187-001 | Replay bundle ingestion + retention APIs | TODO | Evidence Locker Guild · docs/modules/evidence-locker/architecture.md | -| CLI-REPLAY-187-002 | CLI record/verify/replay UX | TODO | CLI Guild · `docs/modules/cli/architecture.md` | -| RUNBOOK-REPLAY-187-004 | Replay ops runbook + drills | TODO | Docs/Ops Guild · `/docs/runbooks/replay_ops.md` | -| EVID-CRYPTO-90-001 | Sovereign crypto routing | TODO | Evidence Locker + Security Guilds · `ICryptoProviderRegistry` integration | - -### 160.B ExportCenter -- Detail trackers: [SPRINT_0162_0001_0001_exportcenter_i.md](./SPRINT_0162_0001_0001_exportcenter_i.md) and [SPRINT_0163_0001_0001_exportcenter_ii.md](./SPRINT_0163_0001_0001_exportcenter_ii.md). -- Task radar highlights: - - Mirror & bootstrap: `EXPORT-AIRGAP-56-001/002/003/004/005`, `EXPORT-AIRGAP-57-001`, `EXPORT-AIRGAP-58-001`. - - Attestation bundles: `EXPORT-ATTEST-74-001/002`, `EXPORT-ATTEST-75-001/002` (jobs, CI/offline, CLI verify/import; see `docs/modules/attestor/airgap.md`). - - API/OAS: `EXPORT-OAS-61-001/002`, `EXPORT-OAS-62-001`, `EXPORT-OAS-63-001` — refreshed OpenAPI, discovery, SDK, deprecation headers. - - Service/observability: `EXPORT-SVC-35-001…005`, `EXPORT-OBS-50/51/52`, `EXPORT-CRYPTO-90-001` for crypto parity with EvidenceLocker. -- Dependencies: EvidenceLocker contracts + DSSE proofs; orchestrator events + Scheduler readiness; crypto routing aligned with `docs/security/crypto-routing-audit-2025-11-07.md`. -- Ready-to-start checklist: freeze sealed bundle spec, reconcile crypto provider matrix with RootPack deployments, and prep DevPortal verification CLI scaffolding (`DVOFF-64-002`). - -#### ExportCenter task snapshot (2025-11-12) -| Task ID | Scope | State | Notes / Owners | -| --- | --- | --- | --- | +| 4 | AGENTS-implplan | DONE | Create `docs/implplan/AGENTS.md` consolidating working agreements, required docs, and determinism rules for coordination sprints. | Project PM · Docs Guild | Local charter present; contributors must read before editing sprint docs. | + +### Wave Coordination +| Wave | Guild owners | Shared prerequisites | Status | Notes | +| --- | --- | --- | --- | --- | +| 160.A EvidenceLocker | Evidence Locker Guild · Security Guild · Docs Guild | Sprint 110.A – AdvisoryAI; Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 150.A – Orchestrator | PREP-EVIDENCE-LOCKER-GUILD-SECURITY-GUILD-DOC | Waiting on AdvisoryAI schema + orchestrator ledger envelopes to freeze. | +| 160.B ExportCenter | Exporter Service Guild · Mirror Creator Guild · DevOps Guild | Sprint 110.A – AdvisoryAI; Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 150.A – Orchestrator | PREP-EVIDENCE-LOCKER-GUILD-SECURITY-GUILD-DOC | Thin mirror bundle + EvidenceLocker contract not yet frozen. | +| 160.C TimelineIndexer | Timeline Indexer Guild · Evidence Locker Guild · Security Guild | Sprint 110.A – AdvisoryAI; Sprint 120.A – AirGap; Sprint 130.A – Scanner; Sprint 150.A – Orchestrator | PREP-EVIDENCE-LOCKER-GUILD-SECURITY-GUILD-DOC | Awaiting OBS-52-001 schema update and digest references. | + +## Wave Detail Snapshots & Next Actions + +### 160.A EvidenceLocker +- Detail trackers: [SPRINT_0161_0001_0001_evidencelocker.md](./SPRINT_0161_0001_0001_evidencelocker.md) and [SPRINT_187_evidence_locker_cli_integration.md](./SPRINT_187_evidence_locker_cli_integration.md). +- Task radar (all TODO as of 2025-11-12): + - `EVID-REPLAY-187-001` — Replay bundle ingestion/retention APIs + storage policy (`src/EvidenceLocker/StellaOps.EvidenceLocker`, `docs/modules/evidence-locker/architecture.md`). + - `RUNBOOK-REPLAY-187-004` & `CLI-REPLAY-187-002` — CLI + ops readiness for replay bundles (`docs/runbooks/replay_ops.md`, CLI module). + - `EVID-CRYPTO-90-001` — Sovereign crypto routing via `ICryptoProviderRegistry`/`ICryptoHash` per `docs/security/crypto-routing-audit-2025-11-07.md`. +- Contracts: bundle packaging + DSSE layout (`docs/modules/evidence-locker/bundle-packaging.md`, `EVID-OBS-54-002`); portable/incident modes in `docs/modules/evidence-locker/incident-mode.md`. +- Gating dependencies: orchestrator capsule schema, AdvisoryAI payload notes, and replay ledger rules (`docs/replay/DETERMINISTIC_REPLAY.md`). +- Ready-to-start checklist: finalize ingest schema deltas, stage Replay Ledger ops drills, and publish API surface summary into Sprint 161 before DOING. + +#### EvidenceLocker task snapshot (2025-11-12) +| Task ID | Scope | State | Notes / Owners | +| --- | --- | --- | --- | +| EVID-REPLAY-187-001 | Replay bundle ingestion + retention APIs | TODO | Evidence Locker Guild · docs/modules/evidence-locker/architecture.md | +| CLI-REPLAY-187-002 | CLI record/verify/replay UX | TODO | CLI Guild · `docs/modules/cli/architecture.md` | +| RUNBOOK-REPLAY-187-004 | Replay ops runbook + drills | TODO | Docs/Ops Guild · `/docs/runbooks/replay_ops.md` | +| EVID-CRYPTO-90-001 | Sovereign crypto routing | TODO | Evidence Locker + Security Guilds · `ICryptoProviderRegistry` integration | + +### 160.B ExportCenter +- Detail trackers: [SPRINT_0162_0001_0001_exportcenter_i.md](./SPRINT_0162_0001_0001_exportcenter_i.md) and [SPRINT_0163_0001_0001_exportcenter_ii.md](./SPRINT_0163_0001_0001_exportcenter_ii.md). +- Task radar highlights: + - Mirror & bootstrap: `EXPORT-AIRGAP-56-001/002/003/004/005`, `EXPORT-AIRGAP-57-001`, `EXPORT-AIRGAP-58-001`. + - Attestation bundles: `EXPORT-ATTEST-74-001/002`, `EXPORT-ATTEST-75-001/002` (jobs, CI/offline, CLI verify/import; see `docs/modules/attestor/airgap.md`). + - API/OAS: `EXPORT-OAS-61-001/002`, `EXPORT-OAS-62-001`, `EXPORT-OAS-63-001` — refreshed OpenAPI, discovery, SDK, deprecation headers. + - Service/observability: `EXPORT-SVC-35-001…005`, `EXPORT-OBS-50/51/52`, `EXPORT-CRYPTO-90-001` for crypto parity with EvidenceLocker. +- Dependencies: EvidenceLocker contracts + DSSE proofs; orchestrator events + Scheduler readiness; crypto routing aligned with `docs/security/crypto-routing-audit-2025-11-07.md`. +- Ready-to-start checklist: freeze sealed bundle spec, reconcile crypto provider matrix with RootPack deployments, and prep DevPortal verification CLI scaffolding (`DVOFF-64-002`). + +#### ExportCenter task snapshot (2025-11-12) +| Task ID | Scope | State | Notes / Owners | +| --- | --- | --- | --- | | DVOFF-64-002 | DevPortal bundle verification CLI | BLOCKED (2025-11-30) | DevPortal Offline + AirGap Controller Guilds | | EXPORT-AIRGAP-56-001/002 | Mirror bundle + bootstrap pack profiles | BLOCKED (2025-11-30) | Exporter + Mirror Creator + DevOps Guilds | | EXPORT-AIRGAP-57-001 | Portable evidence export mode | BLOCKED (2025-11-30) | Exporter Service + Evidence Locker Guild | @@ -80,28 +82,28 @@ | EXPORT-ATTEST-75-001/002 | CLI verify/import + offline kit integration | BLOCKED (2025-11-30) | Attestation Bundle + CLI + Exporter Guilds | | EXPORT-OAS-61/62/63 | OpenAPI refresh, discovery, SDK + deprecation headers | BLOCKED (2025-11-30) | Exporter Service + API Governance + SDK Guilds | | EXPORT-CRYPTO-90-001 | Sovereign crypto routing | BLOCKED (2025-11-30) | Exporter Service + Security Guilds | - -### 160.C TimelineIndexer -- Detail tracker: [SPRINT_165_timelineindexer.md](./SPRINT_165_timelineindexer.md) covering TIMELINE-OBS-52-001…004 and TIMELINE-OBS-53-001. -- Task radar: - - `TIMELINE-OBS-52-001` — service bootstrap + Postgres migrations with deterministic scripts and RLS scaffolding. - - `TIMELINE-OBS-52-002` — event ingestion pipeline (NATS/Redis consumers, ordering, dedupe, trace correlation, metrics). - - `TIMELINE-OBS-52-003` — REST/gRPC APIs with filtering/pagination + OpenAPI contracts. - - `TIMELINE-OBS-52-004` — finalize RLS, scope checks, audit logging, legal hold enforcement tests. - - `TIMELINE-OBS-53-001` — evidence linkage endpoint returning signed manifest references. -- Dependencies: orchestrator/notifications event schemas and EvidenceLocker digest references must land before Postgres migrations can be frozen; export bundle IDs must be stable to hydrate `/timeline/{id}/evidence`. -- Ready-to-start checklist: secure event schema package, stage Postgres migration plan (incl. RLS policies) for review, align ingest ordering semantics with Scheduler/ExportCenter cadence. - -#### TimelineIndexer task snapshot (2025-11-12) -| Task ID | Scope | State | Notes / Owners | -| --- | --- | --- | --- | + +### 160.C TimelineIndexer +- Detail tracker: [SPRINT_165_timelineindexer.md](./SPRINT_165_timelineindexer.md) covering TIMELINE-OBS-52-001…004 and TIMELINE-OBS-53-001. +- Task radar: + - `TIMELINE-OBS-52-001` — service bootstrap + Postgres migrations with deterministic scripts and RLS scaffolding. + - `TIMELINE-OBS-52-002` — event ingestion pipeline (NATS/Redis consumers, ordering, dedupe, trace correlation, metrics). + - `TIMELINE-OBS-52-003` — REST/gRPC APIs with filtering/pagination + OpenAPI contracts. + - `TIMELINE-OBS-52-004` — finalize RLS, scope checks, audit logging, legal hold enforcement tests. + - `TIMELINE-OBS-53-001` — evidence linkage endpoint returning signed manifest references. +- Dependencies: orchestrator/notifications event schemas and EvidenceLocker digest references must land before Postgres migrations can be frozen; export bundle IDs must be stable to hydrate `/timeline/{id}/evidence`. +- Ready-to-start checklist: secure event schema package, stage Postgres migration plan (incl. RLS policies) for review, align ingest ordering semantics with Scheduler/ExportCenter cadence. + +#### TimelineIndexer task snapshot (2025-11-12) +| Task ID | Scope | State | Notes / Owners | +| --- | --- | --- | --- | | TIMELINE-OBS-52-001 | Service bootstrap + Postgres migrations/RLS | DONE (2025-11-30) | Timeline Indexer Guild | | TIMELINE-OBS-52-002 | Event ingestion pipeline + metrics | DONE (2025-12-03) | Timeline Indexer Guild | | TIMELINE-OBS-52-003 | REST/gRPC APIs + OpenAPI contracts | DONE (2025-12-03) | Timeline Indexer Guild | | TIMELINE-OBS-52-004 | RLS policies, audit logging, legal hold tests | DONE (2025-12-03) | Timeline Indexer + Security Guilds | | TIMELINE-OBS-53-001 | Evidence linkage endpoint | BLOCKED (2025-11-30) | Timeline Indexer + Evidence Locker Guilds | - -## Interlocks & Readiness Signals + +## Interlocks & Readiness Signals | Dependency | Owner / Source | Impacts | Status / Next signal | | --- | --- | --- | --- | | Orchestrator capsule & notifications schema (`docs/events/orchestrator-scanner-events.md`) | Orchestrator Service Guild · Notifications Guild (Sprint 150.A + 140 wave) | 160.A, 160.B, 160.C | OVERDUE; re-escalated 2025-12-04. Require ETA by 2025-12-06 or escalate to steering on 2025-12-07. | @@ -109,16 +111,16 @@ | Replay ledger spec alignment (`docs/replay/DETERMINISTIC_REPLAY.md`, `/docs/runbooks/replay_ops.md`) | Replay Delivery Guild (Sprint 187) | 160.A | Replay ops runbook exists (2025-11-03); EvidenceLocker must incorporate retention API shape before DOING. Track in EVID-REPLAY-187-001. | | Crypto routing parity (`docs/security/crypto-routing-audit-2025-11-07.md`) | Security Guild + Export/Evidence teams (`EVID-CRYPTO-90-001`, `EXPORT-CRYPTO-90-001`) | 160.A, 160.B | Review on 2025-11-18 slipped; reschedule for 2025-12-08 with registry sample due 2025-12-06. Keep sovereign modes off until approved. | | DevPortal verification CLI scaffolding (`DVOFF-64-002`) | DevPortal Offline Guild (Sprint 162) | 160.B | Prototype pending; request stub bundle for dry run no later than 2025-12-09 to stay aligned with ExportCenter handoff. | - -## Upcoming Checkpoints (UTC) + +## Upcoming Checkpoints (UTC) | Date | Session / Owner | Target outcome | Fallback / Escalation | | --- | --- | --- | --- | | 2025-12-06 | Schema ETA sync (AdvisoryAI + Orchestrator/Notifications leads) | Confirm drop dates for AdvisoryAI payload notes and Orchestrator/Notifications capsule envelopes to unblock snapshots. | If no ETA, escalate to steering on 2025-12-07 and keep 160.A/B/C BLOCKED. | | 2025-12-08 | Sovereign crypto readiness review (Security + Evidence/Export teams) | Approve `ICryptoProviderRegistry` wiring plan and provider matrix for `EVID-CRYPTO-90-001`/`EXPORT-CRYPTO-90-001`. | If not approved, publish interim provider whitelist and defer sovereign modes. | | 2025-12-09 | DevPortal Offline CLI dry run (DevPortal Offline + AirGap Controller Guilds) | Demo `stella devportal verify bundle.tgz` against stub bundle to prep ExportCenter handoff. | If bundle not available, use stub from EvidenceLocker sample and log risk in Sprint 162. | | 2025-12-10 | Wave 160 snapshot refresh (EvidenceLocker, ExportCenter, TimelineIndexer leads) | Publish updated readiness snapshots or restate BLOCKED with evidence; sync Sprint 161/162/165 trackers. | If still blocked, record blockade summary and extend checkpoint to 2025-12-13. | - -## Action Tracker + +## Action Tracker | Wave | Immediate action | Owner(s) | Due | Status | | --- | --- | --- | --- | --- | | 160.A EvidenceLocker | Draft ingest schema summary + Replay Ledger API notes into `SPRINT_0161_0001_0001_evidencelocker.md` once orchestrator + AdvisoryAI schemas land. | Evidence Locker Guild · Replay Delivery Guild | 2025-12-10 | BLOCKED (waiting on AdvisoryAI payload notes + Orchestrator envelopes) | @@ -134,8 +136,8 @@ | AGENTS-implplan | Create `docs/implplan/AGENTS.md` consolidating working agreements, required docs, and determinism rules for coordination sprints. | Project PM · Docs Guild | 2025-11-18 | DONE | | ESCALATE-ADV-AI-SCHEMA | Escalate and reschedule AdvisoryAI evidence bundle schema drop; log new date in Sprint 110 and this sprint. | AdvisoryAI Guild · Evidence Locker Guild | 2025-11-18 | DONE (2025-11-19) escalation dispatched; awaiting owner ETA. | | ESCALATE-ORCH-ENVELOPE | Escalate Orchestrator/Notifications capsule envelope drop; obtain new ETA and log in Sprint 150/140 and this sprint. | Orchestrator Service · Notifications Guild | 2025-11-18 | DONE (2025-11-19) escalation dispatched; awaiting owner ETA. | - -## Decisions & Risks + +## Decisions & Risks | Item | Status / Decision | Notes | | --- | --- | --- | | Naming & template alignment | DONE (2025-11-17) | File renamed to `SPRINT_0160_0001_0001_export_evidence.md` and normalized to standard sprint template. | @@ -147,17 +149,17 @@ | AdvisoryAI schema checkpoint (2025-11-14) | OVERDUE | Reschedule in progress; re-escalated 2025-12-04 with ETA ask for 2025-12-06. | | Orchestrator/Notifications checkpoint (2025-11-15) | OVERDUE | Reschedule in progress; re-escalated 2025-12-04 with ETA ask for 2025-12-06. | | Escalation responses | PENDING | Awaiting ETA confirmations from AdvisoryAI and Orchestrator/Notifications leads; next follow-up 2025-12-06 (steering escalation 2025-12-07 if silent). | - -### Risk table -| Risk | Impacted wave(s) | Severity | Mitigation / Owner | -| --- | --- | --- | --- | -| AdvisoryAI schema slips past 2025-11-14, delaying DSSE manifest freeze. | 160.A, 160.B | High | AdvisoryAI Guild to provide interim sample payloads; EvidenceLocker to stub schema adapters so ExportCenter can begin validation with mock data. | -| Orchestrator/Notifications schema handoff misses 2025-11-15 window. | 160.A, 160.B, 160.C | High | PREP-160-A-160-B-160-C-ESCALATE-TO-WAVE-150-1 | -| Sovereign crypto routing design not ready by 2025-11-18 review. | 160.A, 160.B | Medium | Security Guild to publish `ICryptoProviderRegistry` reference implementation; Evidence/Export guilds to nominate fallback providers per profile. | -| DevPortal verification CLI lacks signed bundle fixtures for dry run. | 160.B | Medium | Exporter Guild to provide sample manifest + DSSE pair; DevPortal Offline Guild to script fake EvidenceLocker output for demo. | -| TimelineIndexer Postgres/RLS plan not reviewed before coding. | 160.C | Medium | Timeline Indexer Guild to share migration plan with Security/Compliance for async review; unblock coding by securing written approval in sprint doc. | - -## Execution Log + +### Risk table +| Risk | Impacted wave(s) | Severity | Mitigation / Owner | +| --- | --- | --- | --- | +| AdvisoryAI schema slips past 2025-11-14, delaying DSSE manifest freeze. | 160.A, 160.B | High | AdvisoryAI Guild to provide interim sample payloads; EvidenceLocker to stub schema adapters so ExportCenter can begin validation with mock data. | +| Orchestrator/Notifications schema handoff misses 2025-11-15 window. | 160.A, 160.B, 160.C | High | PREP-160-A-160-B-160-C-ESCALATE-TO-WAVE-150-1 | +| Sovereign crypto routing design not ready by 2025-11-18 review. | 160.A, 160.B | Medium | Security Guild to publish `ICryptoProviderRegistry` reference implementation; Evidence/Export guilds to nominate fallback providers per profile. | +| DevPortal verification CLI lacks signed bundle fixtures for dry run. | 160.B | Medium | Exporter Guild to provide sample manifest + DSSE pair; DevPortal Offline Guild to script fake EvidenceLocker output for demo. | +| TimelineIndexer Postgres/RLS plan not reviewed before coding. | 160.C | Medium | Timeline Indexer Guild to share migration plan with Security/Compliance for async review; unblock coding by securing written approval in sprint doc. | + +## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | | 2025-12-04 | Re-baselined Wave 160 status; added Dec-06/08/09/10 checkpoints, re-escalated schema/envelope ETAs, refreshed Action Tracker (Timeline tasks marked DONE). | Project PM | @@ -165,23 +167,23 @@ | 2025-11-20 | Confirmed PREP-ORCHESTRATOR-NOTIFICATIONS-SCHEMA-HANDOF and PREP-ESCALATION-FOLLOW-UP-ADVISORYAI-ORCHESTR still unclaimed; moved both to DOING to proceed with Wave 150/140 escalations. | Planning | | 2025-11-20 | Published prep artefacts for P1–P3: security coordination (`docs/modules/evidence-locker/prep/2025-11-20-security-coordination.md`), orchestrator/notifications handoff (`docs/events/prep/2025-11-20-orchestrator-notifications-schema-handoff.md`), and escalation follow-up (`docs/events/prep/2025-11-20-advisoryai-orchestrator-followup.md`). Marked P1–P3 DONE. | Implementer | | 2025-11-19 | Assigned PREP owners/dates; see Delivery Tracker. | Planning | -| 2025-11-19 | Updated 160.C TimelineIndexer snapshot dependency to TIMELINE-OBS-52-001 (matches Sprint 165 tracker). | Project Mgmt | -| 2025-11-12 | Snapshot refreshed; all Export & Evidence waves remain BLOCKED pending orchestrator capsule data, AdvisoryAI bundle schemas, and EvidenceLocker contracts. Re-evaluate after 2025-11-15 handoff. | Planning | -| 2025-11-12 | Added checkpoint calendar, action tracker, and risk table to keep Wave 160 aligned while dependencies stabilize. | Planning | -| 2025-11-17 | Normalized sprint to standard template and renamed from `SPRINT_160_export_evidence.md` to `SPRINT_0160_0001_0001_export_evidence.md`; no semantic changes to tasks. | Project PM | -| 2025-11-17 | Set Delivery Tracker and Wave statuses to BLOCKED pending schemas/crypto review; logged missing `docs/implplan/AGENTS.md` as blocker and added action item `AGENTS-implplan`. | Implementer | -| 2025-11-17 | Created `docs/implplan/AGENTS.md`; marked AGENTS-implplan DONE and updated Decisions & Risks accordingly. | Implementer | -| 2025-11-17 | Marked AdvisoryAI (2025-11-14) and Orchestrator/Notifications (2025-11-15) checkpoints as missed; escalations required; action items now OVERDUE. | Implementer | -| 2025-11-18 | Added escalation actions `ESCALATE-ADV-AI-SCHEMA` and `ESCALATE-ORCH-ENVELOPE` to track overdue schema drops. | Implementer | -| 2025-11-18 | Started escalations for AdvisoryAI schema and Orchestrator envelopes; awaiting new ETAs from respective guilds. | Implementer | -| 2025-11-18 | Sent escalation pings to AdvisoryAI and Orchestrator/Notifications leads; awaiting ETA confirmation (tracked in Action Tracker). | Implementer | -| 2025-11-18 | Updated Interlocks with “escalation sent” notes and follow-up date (2025-11-19). | Implementer | -| 2025-11-18 | Added blocker task ADV-ORCH-SCHEMA-LIB-160 and marked snapshots explicitly blocked on shared schema library drop. | Project PM | -| 2025-11-18 | Set ADV-ORCH-SCHEMA-LIB-160 to DOING; drafting shared models package for AdvisoryAI/Orchestrator envelopes. | Implementer | -| 2025-11-18 | Published `src/__Libraries/StellaOps.Orchestrator.Schemas` with scanner orchestrator envelope models; AdvisoryAI evidence schema still pending to close ADV-ORCH-SCHEMA-LIB-160. | Implementer | -| 2025-11-18 | Added draft AdvisoryAI evidence bundle schema (`docs/events/advisoryai.evidence.bundle@0.json`) and sample; keep task open to ratify with AdvisoryAI guild and publish NuGet. | Implementer | -| 2025-11-18 | Flipped ADV-ORCH-SCHEMA-LIB-160 to DONE; moved 160.A/B to DOING using delivered schema/models. | Implementer | -| 2025-11-19 | Marked 160.A and 160.B BLOCKED pending AdvisoryAI payload notes and Orchestrator/Notifications envelopes; cannot publish snapshots yet. | Implementer | -| 2025-11-19 | Sent escalations for AdvisoryAI schema and Orchestrator/Notifications envelopes; marked ESCALATE-ADV-AI-SCHEMA, ESCALATE-ORCH-ENVELOPE, and PREP-160-A/B/C-ESCALATE as DONE. Await ETAs from owners. | Implementer | -| 2025-11-18 | Started 160.A/160.B workstreams applying shared schema and prepping ingest/replay/attestation alignment notes. | Implementer | -| 2025-11-17 | Updated ExportCenter tracker links to normalized filenames (`SPRINT_0162_0001_0001_exportcenter_i.md`, `SPRINT_0163_0001_0001_exportcenter_ii.md`). | Implementer | +| 2025-11-19 | Updated 160.C TimelineIndexer snapshot dependency to TIMELINE-OBS-52-001 (matches Sprint 165 tracker). | Project Mgmt | +| 2025-11-12 | Snapshot refreshed; all Export & Evidence waves remain BLOCKED pending orchestrator capsule data, AdvisoryAI bundle schemas, and EvidenceLocker contracts. Re-evaluate after 2025-11-15 handoff. | Planning | +| 2025-11-12 | Added checkpoint calendar, action tracker, and risk table to keep Wave 160 aligned while dependencies stabilize. | Planning | +| 2025-11-17 | Normalized sprint to standard template and renamed from `SPRINT_160_export_evidence.md` to `SPRINT_0160_0001_0001_export_evidence.md`; no semantic changes to tasks. | Project PM | +| 2025-11-17 | Set Delivery Tracker and Wave statuses to BLOCKED pending schemas/crypto review; logged missing `docs/implplan/AGENTS.md` as blocker and added action item `AGENTS-implplan`. | Implementer | +| 2025-11-17 | Created `docs/implplan/AGENTS.md`; marked AGENTS-implplan DONE and updated Decisions & Risks accordingly. | Implementer | +| 2025-11-17 | Marked AdvisoryAI (2025-11-14) and Orchestrator/Notifications (2025-11-15) checkpoints as missed; escalations required; action items now OVERDUE. | Implementer | +| 2025-11-18 | Added escalation actions `ESCALATE-ADV-AI-SCHEMA` and `ESCALATE-ORCH-ENVELOPE` to track overdue schema drops. | Implementer | +| 2025-11-18 | Started escalations for AdvisoryAI schema and Orchestrator envelopes; awaiting new ETAs from respective guilds. | Implementer | +| 2025-11-18 | Sent escalation pings to AdvisoryAI and Orchestrator/Notifications leads; awaiting ETA confirmation (tracked in Action Tracker). | Implementer | +| 2025-11-18 | Updated Interlocks with “escalation sent” notes and follow-up date (2025-11-19). | Implementer | +| 2025-11-18 | Added blocker task ADV-ORCH-SCHEMA-LIB-160 and marked snapshots explicitly blocked on shared schema library drop. | Project PM | +| 2025-11-18 | Set ADV-ORCH-SCHEMA-LIB-160 to DOING; drafting shared models package for AdvisoryAI/Orchestrator envelopes. | Implementer | +| 2025-11-18 | Published `src/__Libraries/StellaOps.Orchestrator.Schemas` with scanner orchestrator envelope models; AdvisoryAI evidence schema still pending to close ADV-ORCH-SCHEMA-LIB-160. | Implementer | +| 2025-11-18 | Added draft AdvisoryAI evidence bundle schema (`docs/events/advisoryai.evidence.bundle@0.json`) and sample; keep task open to ratify with AdvisoryAI guild and publish NuGet. | Implementer | +| 2025-11-18 | Flipped ADV-ORCH-SCHEMA-LIB-160 to DONE; moved 160.A/B to DOING using delivered schema/models. | Implementer | +| 2025-11-19 | Marked 160.A and 160.B BLOCKED pending AdvisoryAI payload notes and Orchestrator/Notifications envelopes; cannot publish snapshots yet. | Implementer | +| 2025-11-19 | Sent escalations for AdvisoryAI schema and Orchestrator/Notifications envelopes; marked ESCALATE-ADV-AI-SCHEMA, ESCALATE-ORCH-ENVELOPE, and PREP-160-A/B/C-ESCALATE as DONE. Await ETAs from owners. | Implementer | +| 2025-11-18 | Started 160.A/160.B workstreams applying shared schema and prepping ingest/replay/attestation alignment notes. | Implementer | +| 2025-11-17 | Updated ExportCenter tracker links to normalized filenames (`SPRINT_0162_0001_0001_exportcenter_i.md`, `SPRINT_0163_0001_0001_exportcenter_ii.md`). | Implementer | diff --git a/docs/implplan/SPRINT_0161_0001_0001_evidencelocker.md b/docs/implplan/SPRINT_0161_0001_0001_evidencelocker.md index 54d551ed9..5f73bc54d 100644 --- a/docs/implplan/SPRINT_0161_0001_0001_evidencelocker.md +++ b/docs/implplan/SPRINT_0161_0001_0001_evidencelocker.md @@ -20,6 +20,8 @@ - `docs/events/orchestrator-scanner-events.md` - `docs/modules/cli/architecture.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0162_0001_0001_exportcenter_i.md b/docs/implplan/SPRINT_0162_0001_0001_exportcenter_i.md index ffd7bbb1c..aed01a718 100644 --- a/docs/implplan/SPRINT_0162_0001_0001_exportcenter_i.md +++ b/docs/implplan/SPRINT_0162_0001_0001_exportcenter_i.md @@ -18,6 +18,8 @@ - EvidenceLocker bundle packaging (`docs/modules/evidence-locker/bundle-packaging.md`) once frozen - DevPortal offline guidance (DVOFF-64 series) as provided by DevPortal Offline Guild +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0163_0001_0001_exportcenter_ii.md b/docs/implplan/SPRINT_0163_0001_0001_exportcenter_ii.md index 00a3ac8e9..7ca73f476 100644 --- a/docs/implplan/SPRINT_0163_0001_0001_exportcenter_ii.md +++ b/docs/implplan/SPRINT_0163_0001_0001_exportcenter_ii.md @@ -17,6 +17,8 @@ - EvidenceLocker bundle packaging (`docs/modules/evidence-locker/bundle-packaging.md`) once frozen - Observability guidance/dashboards referenced by Observability Guild +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md b/docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md index 79ff708f0..d0a6e850d 100644 --- a/docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md +++ b/docs/implplan/SPRINT_0164_0001_0001_exportcenter_iii.md @@ -16,6 +16,8 @@ - docs/modules/export-center/architecture.md - src/ExportCenter/AGENTS.md (if present) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0165_0001_0001_timelineindexer.md b/docs/implplan/SPRINT_0165_0001_0001_timelineindexer.md index aa448d9f7..cd58f0df2 100644 --- a/docs/implplan/SPRINT_0165_0001_0001_timelineindexer.md +++ b/docs/implplan/SPRINT_0165_0001_0001_timelineindexer.md @@ -16,6 +16,8 @@ - docs/modules/export-center/architecture.md (for evidence linkage) - src/TimelineIndexer/StellaOps.TimelineIndexer/AGENTS.md (if present) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0170_0001_0001_notifications_telemetry.md b/docs/implplan/SPRINT_0170_0001_0001_notifications_telemetry.md index b1354136a..c469bdbf1 100644 --- a/docs/implplan/SPRINT_0170_0001_0001_notifications_telemetry.md +++ b/docs/implplan/SPRINT_0170_0001_0001_notifications_telemetry.md @@ -16,6 +16,8 @@ - docs/modules/notifications/architecture.md - docs/modules/telemetry/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Track | Status | Key dependency / next step | Owners | Notes | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0171_0001_0001_notifier_i.md b/docs/implplan/SPRINT_0171_0001_0001_notifier_i.md index 05093ea1b..464ef48e6 100644 --- a/docs/implplan/SPRINT_0171_0001_0001_notifier_i.md +++ b/docs/implplan/SPRINT_0171_0001_0001_notifier_i.md @@ -17,6 +17,8 @@ - docs/notifications/templates.md - src/Notifier/StellaOps.Notifier/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0172_0001_0002_notifier_ii.md b/docs/implplan/SPRINT_0172_0001_0002_notifier_ii.md index ab5e752bd..d8cb10b00 100644 --- a/docs/implplan/SPRINT_0172_0001_0002_notifier_ii.md +++ b/docs/implplan/SPRINT_0172_0001_0002_notifier_ii.md @@ -1,77 +1,79 @@ -# Sprint 0172-0001-0002 · Notifier II (Notifications & Telemetry 170.A) - -## Topic & Scope -- Notifier phase II: approval/policy notifications, channels/templates, correlation/digests/simulation, escalations, and hardening. -- **Working directory:** `src/Notifier/StellaOps.Notifier`. - -## Dependencies & Concurrency -- Upstream: Notifier I (Sprint 0171) must land first. -- Concurrency: follow service chain (37 → 38 → 39 → 40); all tasks currently TODO. - -## Documentation Prerequisites -- docs/README.md -- docs/07_HIGH_LEVEL_ARCHITECTURE.md -- docs/modules/platform/architecture-overview.md -- docs/modules/notifications/architecture.md -- src/Notifier/StellaOps.Notifier/AGENTS.md - -## Delivery Tracker -| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | -| 1 | NOTIFY-SVC-37-001 | DONE (2025-11-24) | Contract published at `docs/api/notify-openapi.yaml` and `src/Notifier/StellaOps.Notifier/StellaOps.Notifier.WebService/openapi/notify-openapi.yaml`. | Notifications Service Guild (`src/Notifier/StellaOps.Notifier`) | Define pack approval & policy notification contract (OpenAPI schema, event payloads, resume tokens, security guidance). | -| 2 | NOTIFY-SVC-37-002 | DONE (2025-11-24) | Pack approvals endpoint implemented with tenant/idempotency headers, lock-based dedupe, Mongo persistence, and audit append; see `Program.cs` + storage migrations. | Notifications Service Guild | Implement secure ingestion endpoint, Mongo persistence (`pack_approvals`), idempotent writes, audit trail. | -| 3 | NOTIFY-SVC-37-003 | DONE (2025-11-27) | Dispatch/rendering layer complete: `INotifyTemplateRenderer`/`SimpleTemplateRenderer` (Handlebars-style {{variable}} + {{#each}}, sensitive key redaction), `INotifyChannelDispatcher`/`WebhookChannelDispatcher` (Slack/webhook with retry), `DeliveryDispatchWorker` (BackgroundService), DI wiring in Program.cs, options + tests. | Notifications Service Guild | Approval/policy templates, routing predicates, channel dispatch (email/webhook), localization + redaction. | -| 4 | NOTIFY-SVC-37-004 | DONE (2025-11-24) | Test harness stabilized with in-memory stores; OpenAPI stub returns scope/etag; pack-approvals ack path exercised. | Notifications Service Guild | Acknowledgement API, Task Runner callback client, metrics for outstanding approvals, runbook updates. | -| 5 | NOTIFY-SVC-38-002 | DONE (2025-11-27) | Channel adapters complete: `IChannelAdapter`, `WebhookChannelAdapter`, `EmailChannelAdapter`, `ChatWebhookChannelAdapter` with retry policies (exponential backoff + jitter), health checks, audit logging, HMAC signing, `ChannelAdapterFactory` DI registration. Tests at `StellaOps.Notifier.Tests/Channels/`. | Notifications Service Guild | Channel adapters (email, chat webhook, generic webhook) with retry policies, health checks, audit logging. | -| 6 | NOTIFY-SVC-38-003 | DONE (2025-11-27) | Template service complete: `INotifyTemplateService`/`NotifyTemplateService` (locale fallback chain, versioning, CRUD with audit), `EnhancedTemplateRenderer` (configurable redaction allowlists/denylists, Markdown/HTML/JSON/PlainText format conversion, provenance links, {{#if}} conditionals, format specifiers), `TemplateRendererOptions`, DI registration via `AddTemplateServices()`. Tests at `StellaOps.Notifier.Tests/Templates/`. | Notifications Service Guild | Template service (versioned templates, localization scaffolding) and renderer (redaction allowlists, Markdown/HTML/JSON, provenance links). | -| 7 | NOTIFY-SVC-38-004 | DONE (2025-11-27) | REST APIs complete: `/api/v2/notify/rules` (CRUD), `/api/v2/notify/templates` (CRUD + preview + validate), `/api/v2/notify/incidents` (list + ack + resolve). Contract DTOs at `Contracts/RuleContracts.cs`, `TemplateContracts.cs`, `IncidentContracts.cs`. Endpoints via `MapNotifyApiV2()` extension. Audit logging on all mutations. Tests at `StellaOps.Notifier.Tests/Endpoints/`. | Notifications Service Guild | REST + WS APIs (rules CRUD, templates preview, incidents list, ack) with audit logging, RBAC, live feed stream. | -| 8 | NOTIFY-SVC-39-001 | DONE (2025-11-27) | Correlation engine complete: `ICorrelationEngine`/`CorrelationEngine` (orchestrates key building, incident management, throttling, quiet hours), `ICorrelationKeyBuilder` interface with `CompositeCorrelationKeyBuilder` (tenant+kind+payload fields), `TemplateCorrelationKeyBuilder` (template expressions), `CorrelationKeyBuilderFactory`. `INotifyThrottler`/`InMemoryNotifyThrottler` (sliding window throttling). `IQuietHoursEvaluator`/`QuietHoursEvaluator` (quiet hours schedules, maintenance windows). `IIncidentManager`/`InMemoryIncidentManager` (incident lifecycle: open/acknowledged/resolved). Notification policies (FirstOnly, EveryEvent, OnEscalation, Periodic). DI registration via `AddCorrelationServices()`. Comprehensive tests at `StellaOps.Notifier.Tests/Correlation/`. | Notifications Service Guild | Correlation engine with pluggable key expressions/windows, throttler, quiet hours/maintenance evaluator, incident lifecycle. | -| 9 | NOTIFY-SVC-39-002 | DONE (2025-11-27) | Digest generator complete: `IDigestGenerator`/`DigestGenerator` (queries incidents, calculates summary statistics, builds timeline, renders to Markdown/HTML/PlainText/JSON), `IDigestScheduler`/`InMemoryDigestScheduler` (cron-based scheduling with Cronos, timezone support, next-run calculation), `DigestScheduleRunner` BackgroundService (concurrent schedule execution with semaphore limiting), `IDigestDistributor`/`DigestDistributor` (webhook/Slack/Teams/email distribution with format-specific payloads). DTOs: `DigestQuery`, `DigestContent`, `DigestSummary`, `DigestIncident`, `EventKindSummary`, `TimelineEntry`, `DigestSchedule`, `DigestRecipient`. DI registration via `AddDigestServices()` with `DigestServiceBuilder`. Tests at `StellaOps.Notifier.Tests/Digest/`. | Notifications Service Guild | Digest generator (queries, formatting) with schedule runner and distribution. | -| 10 | NOTIFY-SVC-39-003 | DONE (2025-11-27) | Simulation engine complete: `ISimulationEngine`/`SimulationEngine` (dry-runs rules against events without side effects, evaluates all rules against all events, builds detailed match/non-match explanations), `SimulationRequest`/`SimulationResult` DTOs with `SimulationEventResult`, `SimulationRuleMatch`, `SimulationActionMatch`, `SimulationRuleNonMatch`, `SimulationRuleSummary`. Rule validation via `ValidateRuleAsync` with error/warning detection (missing fields, broad matches, unknown severities, disabled actions). API endpoint at `/api/v2/simulate` (POST for simulation, POST /validate for rule validation) via `SimulationEndpoints.cs`. DI registration via `AddSimulationServices()`. Tests at `StellaOps.Notifier.Tests/Simulation/SimulationEngineTests.cs`. | Notifications Service Guild | Simulation engine/API to dry-run rules against historical events, returning matched actions with explanations. | -| 11 | NOTIFY-SVC-39-004 | DONE (2025-11-27) | Quiet hour calendars, throttle configs, audit logging, and operator overrides implemented. | Notifications Service Guild | Quiet hour calendars + default throttles with audit logging and operator overrides. | -| 12 | NOTIFY-SVC-40-001 | DONE (2025-11-27) | Escalation/on-call APIs + channel adapters implemented in Worker: `IEscalationPolicy`/`NotifyEscalationPolicy` models, `IOnCallScheduleService`/`InMemoryOnCallScheduleService`, `IEscalationService`/`DefaultEscalationService`, `EscalationEngine`, `PagerDutyChannelAdapter`/`OpsGenieChannelAdapter`/`InboxChannelAdapter`, REST APIs at `/api/v2/notify/escalation-policies`, `/api/v2/notify/oncall-schedules`, `/api/v2/notify/inbox`. | Notifications Service Guild | Escalations + on-call schedules, ack bridge, PagerDuty/OpsGenie adapters, CLI/in-app inbox channels. | -| 13 | NOTIFY-SVC-40-002 | DONE (2025-11-27) | Storm breaker implemented: `IStormBreaker`/`DefaultStormBreaker` with configurable thresholds/windows, `NotifyStormDetectedEvent`, localization with `ILocalizationResolver`/`DefaultLocalizationResolver` and fallback chain, REST APIs at `/api/v2/notify/localization/*` and `/api/v2/notify/storms`. | Notifications Service Guild | Summary storm breaker notifications, localization bundles, fallback handling. | -| 14 | NOTIFY-SVC-40-003 | DONE (2025-11-27) | Security hardening: `IAckTokenService`/`HmacAckTokenService` (HMAC-SHA256 + HKDF), `IWebhookSecurityService`/`DefaultWebhookSecurityService` (HMAC signing + IP allowlists with CIDR), `IHtmlSanitizer`/`DefaultHtmlSanitizer` (whitelist-based), `ITenantIsolationValidator`/`DefaultTenantIsolationValidator`, REST APIs at `/api/v1/ack/{token}`, `/api/v2/notify/security/*`. | Notifications Service Guild | Security hardening: signed ack links (KMS), webhook HMAC/IP allowlists, tenant isolation fuzz tests, HTML sanitization. | -| 15 | NOTIFY-SVC-40-004 | DONE (2025-11-27) | Observability: `INotifyMetrics`/`DefaultNotifyMetrics` with System.Diagnostics.Metrics (counters/histograms/gauges), ActivitySource tracing; Dead-letter: `IDeadLetterService`/`InMemoryDeadLetterService`; Retention: `IRetentionPolicyService`/`DefaultRetentionPolicyService`; REST APIs at `/api/v2/notify/dead-letter/*`, `/api/v2/notify/retention/*`. | Notifications Service Guild | Observability (metrics/traces for escalations/latency), dead-letter handling, chaos tests for channel outages, retention policies. | - -## Execution Log -| Date (UTC) | Update | Owner | -| --- | --- | --- | -| 2025-11-27 | Implemented NOTIFY-SVC-40-001 through NOTIFY-SVC-40-004: escalations/on-call schedules, storm breaker/localization, security hardening (ack tokens, HMAC webhooks, HTML sanitization, tenant isolation), observability metrics/traces, dead-letter handling, retention policies. Sprint 0172 complete. | Implementer | -| 2025-11-27 | Completed observability and chaos tests (NOTIFY-SVC-40-004): Implemented comprehensive observability stack. | Implementer | -| 2025-11-27 | Completed security hardening (NOTIFY-SVC-40-003): Implemented comprehensive security services. | Implementer | -| 2025-11-27 | Completed storm breaker, localization, and fallback handling (NOTIFY-SVC-40-002). | Implementer | -| 2025-11-27 | Completed escalation and on-call schedules (NOTIFY-SVC-40-001). | Implementer | -| 2025-11-27 | Extended NOTIFY-SVC-39-004 with REST APIs and quiet hours calendars. | Implementer | -| 2025-11-27 | Completed simulation engine (NOTIFY-SVC-39-003). | Implementer | -| 2025-11-27 | Completed digest generator (NOTIFY-SVC-39-002). | Implementer | -| 2025-11-27 | Completed correlation engine (NOTIFY-SVC-39-001). | Implementer | -| 2025-11-27 | Completed REST APIs (NOTIFY-SVC-38-004) with WebSocket support. | Implementer | -| 2025-11-27 | Completed template service (NOTIFY-SVC-38-003). | Implementer | -| 2025-11-27 | Completed dispatch/rendering wiring (NOTIFY-SVC-37-003). | Implementer | -| 2025-11-27 | Completed channel adapters (NOTIFY-SVC-38-002). | Implementer | -| 2025-11-27 | Enhanced pack approvals contract. | Implementer | -| 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_172_notifier_ii.md` to `SPRINT_0172_0001_0002_notifier_ii.md`; content preserved. | Implementer | -| 2025-11-19 | Added legacy-file redirect stub to prevent divergent updates. | Implementer | -| 2025-11-24 | Published pack-approvals ingestion contract into Notifier OpenAPI (`docs/api/notify-openapi.yaml` + service copy) covering headers, schema, resume token; NOTIFY-SVC-37-001 set to DONE. | Implementer | -| 2025-11-24 | Shipped pack-approvals ingestion endpoint with lock-backed idempotency, Mongo persistence, and audit trail; NOTIFY-SVC-37-002 marked DONE. | Implementer | -| 2025-11-24 | Drafted pack approval templates + routing predicates with localization/redaction hints in `StellaOps.Notifier.docs/pack-approval-templates.json`; NOTIFY-SVC-37-003 moved to DOING. | Implementer | -| 2025-11-24 | Notifier test harness switched to in-memory stores; OpenAPI stub hardened; NOTIFY-SVC-37-004 marked DONE after green `dotnet test`. | Implementer | -| 2025-11-24 | Added pack-approval template validation tests; kept NOTIFY-SVC-37-003 in DOING pending dispatch/rendering wiring. | Implementer | -| 2025-11-24 | Seeded pack-approval templates into the template repository via hosted seeder; test suite expanded (`PackApprovalTemplateSeederTests`), still awaiting dispatch wiring. | Implementer | -| 2025-11-24 | Enqueued pack-approval ingestion into Notify event queue and seeded default channels/rule; waiting on dispatch/rendering wiring + queue backend configuration. | Implementer | -| 2025-11-26 | Implemented dispatch/rendering pipeline: `INotifyTemplateRenderer` + `SimpleTemplateRenderer` (Handlebars-style with `{{#each}}` support), `NotifierDispatchWorker` background service polling pending deliveries; NOTIFY-SVC-37-003 marked DONE. | Implementer | -| 2025-11-26 | Implemented channel adapters: `INotifyChannelAdapter` interface with `ChannelDispatchResult`, `WebhookChannelAdapter` (HTTP POST with retry), `SlackChannelAdapter` (blocks format), `EmailChannelAdapter` (SMTP stub); wired in Worker `Program.cs`; NOTIFY-SVC-38-002 marked DONE. | Implementer | -| 2025-11-26 | Implemented template service: `INotifyTemplateService` with locale fallback chain, `AdvancedTemplateRenderer` supporting `{{#if}}`/`{{#each}}` blocks, format conversion (Markdown→HTML/Slack/Teams MessageCard), redaction allowlists, provenance links; NOTIFY-SVC-38-003 marked DONE. | Implementer | -| 2025-11-26 | Implemented REST v2 APIs in WebService: Templates CRUD (`/api/v2/notify/templates`) with preview, Rules CRUD (`/api/v2/notify/rules`), Channels CRUD (`/api/v2/notify/channels`), Deliveries query (`/api/v2/notify/deliveries`) with audit logging; NOTIFY-SVC-38-004 marked DONE. | Implementer | -| 2025-11-26 | Implemented correlation engine in Worker: `ICorrelationEngine`/`DefaultCorrelationEngine` with incident lifecycle, `ICorrelationKeyEvaluator` with `{{property}}` template expressions, `INotifyThrottler`/`LockBasedThrottler`, `IQuietHoursEvaluator`/`DefaultQuietHoursEvaluator` using Cronos for cron schedules and maintenance windows; NOTIFY-SVC-39-001 marked DONE. | Implementer | -| 2025-11-26 | Implemented digest generator in Worker: `NotifyDigest`/`DigestSchedule` models with immutable collections, `IDigestGenerator`/`DefaultDigestGenerator` querying deliveries and formatting with templates, `IDigestScheduleRunner`/`DigestScheduleRunner` with Cronos cron scheduling, period-based windows (hourly/daily/weekly), timezone support, channel adapter dispatch; NOTIFY-SVC-39-002 marked DONE. | Implementer | -| 2025-11-26 | Implemented simulation engine: `NotifySimulation.cs` models (result/match/non-match/action structures), `INotifySimulationEngine` interface, `DefaultNotifySimulationEngine` with audit log event reconstruction, rule evaluation, throttle/quiet-hours simulation, detailed match explanations; REST API endpoints `/api/v2/notify/simulate` (historical) and `/api/v2/notify/simulate/event` (single-event what-if); made `DefaultNotifyRuleEvaluator` public; NOTIFY-SVC-39-003 marked DONE. | Implementer | - -## Decisions & Risks -- All tasks depend on Notifier I outputs and established notification contracts; keep TODO until upstream lands. -- Ensure templates/renderers stay deterministic and offline-ready; hardening tasks must precede GA. -- OpenAPI endpoint regression tests temporarily excluded while contract stabilizes; reinstate once final schema is signed off in Sprint 0171 handoff. - -## Next Checkpoints -- Kickoff after Sprint 0171 completion (date TBD). +# Sprint 0172-0001-0002 · Notifier II (Notifications & Telemetry 170.A) + +## Topic & Scope +- Notifier phase II: approval/policy notifications, channels/templates, correlation/digests/simulation, escalations, and hardening. +- **Working directory:** `src/Notifier/StellaOps.Notifier`. + +## Dependencies & Concurrency +- Upstream: Notifier I (Sprint 0171) must land first. +- Concurrency: follow service chain (37 → 38 → 39 → 40); all tasks currently TODO. + +## Documentation Prerequisites +- docs/README.md +- docs/07_HIGH_LEVEL_ARCHITECTURE.md +- docs/modules/platform/architecture-overview.md +- docs/modules/notifications/architecture.md +- src/Notifier/StellaOps.Notifier/AGENTS.md + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | NOTIFY-SVC-37-001 | DONE (2025-11-24) | Contract published at `docs/api/notify-openapi.yaml` and `src/Notifier/StellaOps.Notifier/StellaOps.Notifier.WebService/openapi/notify-openapi.yaml`. | Notifications Service Guild (`src/Notifier/StellaOps.Notifier`) | Define pack approval & policy notification contract (OpenAPI schema, event payloads, resume tokens, security guidance). | +| 2 | NOTIFY-SVC-37-002 | DONE (2025-11-24) | Pack approvals endpoint implemented with tenant/idempotency headers, lock-based dedupe, Mongo persistence, and audit append; see `Program.cs` + storage migrations. | Notifications Service Guild | Implement secure ingestion endpoint, Mongo persistence (`pack_approvals`), idempotent writes, audit trail. | +| 3 | NOTIFY-SVC-37-003 | DONE (2025-11-27) | Dispatch/rendering layer complete: `INotifyTemplateRenderer`/`SimpleTemplateRenderer` (Handlebars-style {{variable}} + {{#each}}, sensitive key redaction), `INotifyChannelDispatcher`/`WebhookChannelDispatcher` (Slack/webhook with retry), `DeliveryDispatchWorker` (BackgroundService), DI wiring in Program.cs, options + tests. | Notifications Service Guild | Approval/policy templates, routing predicates, channel dispatch (email/webhook), localization + redaction. | +| 4 | NOTIFY-SVC-37-004 | DONE (2025-11-24) | Test harness stabilized with in-memory stores; OpenAPI stub returns scope/etag; pack-approvals ack path exercised. | Notifications Service Guild | Acknowledgement API, Task Runner callback client, metrics for outstanding approvals, runbook updates. | +| 5 | NOTIFY-SVC-38-002 | DONE (2025-11-27) | Channel adapters complete: `IChannelAdapter`, `WebhookChannelAdapter`, `EmailChannelAdapter`, `ChatWebhookChannelAdapter` with retry policies (exponential backoff + jitter), health checks, audit logging, HMAC signing, `ChannelAdapterFactory` DI registration. Tests at `StellaOps.Notifier.Tests/Channels/`. | Notifications Service Guild | Channel adapters (email, chat webhook, generic webhook) with retry policies, health checks, audit logging. | +| 6 | NOTIFY-SVC-38-003 | DONE (2025-11-27) | Template service complete: `INotifyTemplateService`/`NotifyTemplateService` (locale fallback chain, versioning, CRUD with audit), `EnhancedTemplateRenderer` (configurable redaction allowlists/denylists, Markdown/HTML/JSON/PlainText format conversion, provenance links, {{#if}} conditionals, format specifiers), `TemplateRendererOptions`, DI registration via `AddTemplateServices()`. Tests at `StellaOps.Notifier.Tests/Templates/`. | Notifications Service Guild | Template service (versioned templates, localization scaffolding) and renderer (redaction allowlists, Markdown/HTML/JSON, provenance links). | +| 7 | NOTIFY-SVC-38-004 | DONE (2025-11-27) | REST APIs complete: `/api/v2/notify/rules` (CRUD), `/api/v2/notify/templates` (CRUD + preview + validate), `/api/v2/notify/incidents` (list + ack + resolve). Contract DTOs at `Contracts/RuleContracts.cs`, `TemplateContracts.cs`, `IncidentContracts.cs`. Endpoints via `MapNotifyApiV2()` extension. Audit logging on all mutations. Tests at `StellaOps.Notifier.Tests/Endpoints/`. | Notifications Service Guild | REST + WS APIs (rules CRUD, templates preview, incidents list, ack) with audit logging, RBAC, live feed stream. | +| 8 | NOTIFY-SVC-39-001 | DONE (2025-11-27) | Correlation engine complete: `ICorrelationEngine`/`CorrelationEngine` (orchestrates key building, incident management, throttling, quiet hours), `ICorrelationKeyBuilder` interface with `CompositeCorrelationKeyBuilder` (tenant+kind+payload fields), `TemplateCorrelationKeyBuilder` (template expressions), `CorrelationKeyBuilderFactory`. `INotifyThrottler`/`InMemoryNotifyThrottler` (sliding window throttling). `IQuietHoursEvaluator`/`QuietHoursEvaluator` (quiet hours schedules, maintenance windows). `IIncidentManager`/`InMemoryIncidentManager` (incident lifecycle: open/acknowledged/resolved). Notification policies (FirstOnly, EveryEvent, OnEscalation, Periodic). DI registration via `AddCorrelationServices()`. Comprehensive tests at `StellaOps.Notifier.Tests/Correlation/`. | Notifications Service Guild | Correlation engine with pluggable key expressions/windows, throttler, quiet hours/maintenance evaluator, incident lifecycle. | +| 9 | NOTIFY-SVC-39-002 | DONE (2025-11-27) | Digest generator complete: `IDigestGenerator`/`DigestGenerator` (queries incidents, calculates summary statistics, builds timeline, renders to Markdown/HTML/PlainText/JSON), `IDigestScheduler`/`InMemoryDigestScheduler` (cron-based scheduling with Cronos, timezone support, next-run calculation), `DigestScheduleRunner` BackgroundService (concurrent schedule execution with semaphore limiting), `IDigestDistributor`/`DigestDistributor` (webhook/Slack/Teams/email distribution with format-specific payloads). DTOs: `DigestQuery`, `DigestContent`, `DigestSummary`, `DigestIncident`, `EventKindSummary`, `TimelineEntry`, `DigestSchedule`, `DigestRecipient`. DI registration via `AddDigestServices()` with `DigestServiceBuilder`. Tests at `StellaOps.Notifier.Tests/Digest/`. | Notifications Service Guild | Digest generator (queries, formatting) with schedule runner and distribution. | +| 10 | NOTIFY-SVC-39-003 | DONE (2025-11-27) | Simulation engine complete: `ISimulationEngine`/`SimulationEngine` (dry-runs rules against events without side effects, evaluates all rules against all events, builds detailed match/non-match explanations), `SimulationRequest`/`SimulationResult` DTOs with `SimulationEventResult`, `SimulationRuleMatch`, `SimulationActionMatch`, `SimulationRuleNonMatch`, `SimulationRuleSummary`. Rule validation via `ValidateRuleAsync` with error/warning detection (missing fields, broad matches, unknown severities, disabled actions). API endpoint at `/api/v2/simulate` (POST for simulation, POST /validate for rule validation) via `SimulationEndpoints.cs`. DI registration via `AddSimulationServices()`. Tests at `StellaOps.Notifier.Tests/Simulation/SimulationEngineTests.cs`. | Notifications Service Guild | Simulation engine/API to dry-run rules against historical events, returning matched actions with explanations. | +| 11 | NOTIFY-SVC-39-004 | DONE (2025-11-27) | Quiet hour calendars, throttle configs, audit logging, and operator overrides implemented. | Notifications Service Guild | Quiet hour calendars + default throttles with audit logging and operator overrides. | +| 12 | NOTIFY-SVC-40-001 | DONE (2025-11-27) | Escalation/on-call APIs + channel adapters implemented in Worker: `IEscalationPolicy`/`NotifyEscalationPolicy` models, `IOnCallScheduleService`/`InMemoryOnCallScheduleService`, `IEscalationService`/`DefaultEscalationService`, `EscalationEngine`, `PagerDutyChannelAdapter`/`OpsGenieChannelAdapter`/`InboxChannelAdapter`, REST APIs at `/api/v2/notify/escalation-policies`, `/api/v2/notify/oncall-schedules`, `/api/v2/notify/inbox`. | Notifications Service Guild | Escalations + on-call schedules, ack bridge, PagerDuty/OpsGenie adapters, CLI/in-app inbox channels. | +| 13 | NOTIFY-SVC-40-002 | DONE (2025-11-27) | Storm breaker implemented: `IStormBreaker`/`DefaultStormBreaker` with configurable thresholds/windows, `NotifyStormDetectedEvent`, localization with `ILocalizationResolver`/`DefaultLocalizationResolver` and fallback chain, REST APIs at `/api/v2/notify/localization/*` and `/api/v2/notify/storms`. | Notifications Service Guild | Summary storm breaker notifications, localization bundles, fallback handling. | +| 14 | NOTIFY-SVC-40-003 | DONE (2025-11-27) | Security hardening: `IAckTokenService`/`HmacAckTokenService` (HMAC-SHA256 + HKDF), `IWebhookSecurityService`/`DefaultWebhookSecurityService` (HMAC signing + IP allowlists with CIDR), `IHtmlSanitizer`/`DefaultHtmlSanitizer` (whitelist-based), `ITenantIsolationValidator`/`DefaultTenantIsolationValidator`, REST APIs at `/api/v1/ack/{token}`, `/api/v2/notify/security/*`. | Notifications Service Guild | Security hardening: signed ack links (KMS), webhook HMAC/IP allowlists, tenant isolation fuzz tests, HTML sanitization. | +| 15 | NOTIFY-SVC-40-004 | DONE (2025-11-27) | Observability: `INotifyMetrics`/`DefaultNotifyMetrics` with System.Diagnostics.Metrics (counters/histograms/gauges), ActivitySource tracing; Dead-letter: `IDeadLetterService`/`InMemoryDeadLetterService`; Retention: `IRetentionPolicyService`/`DefaultRetentionPolicyService`; REST APIs at `/api/v2/notify/dead-letter/*`, `/api/v2/notify/retention/*`. | Notifications Service Guild | Observability (metrics/traces for escalations/latency), dead-letter handling, chaos tests for channel outages, retention policies. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-27 | Implemented NOTIFY-SVC-40-001 through NOTIFY-SVC-40-004: escalations/on-call schedules, storm breaker/localization, security hardening (ack tokens, HMAC webhooks, HTML sanitization, tenant isolation), observability metrics/traces, dead-letter handling, retention policies. Sprint 0172 complete. | Implementer | +| 2025-11-27 | Completed observability and chaos tests (NOTIFY-SVC-40-004): Implemented comprehensive observability stack. | Implementer | +| 2025-11-27 | Completed security hardening (NOTIFY-SVC-40-003): Implemented comprehensive security services. | Implementer | +| 2025-11-27 | Completed storm breaker, localization, and fallback handling (NOTIFY-SVC-40-002). | Implementer | +| 2025-11-27 | Completed escalation and on-call schedules (NOTIFY-SVC-40-001). | Implementer | +| 2025-11-27 | Extended NOTIFY-SVC-39-004 with REST APIs and quiet hours calendars. | Implementer | +| 2025-11-27 | Completed simulation engine (NOTIFY-SVC-39-003). | Implementer | +| 2025-11-27 | Completed digest generator (NOTIFY-SVC-39-002). | Implementer | +| 2025-11-27 | Completed correlation engine (NOTIFY-SVC-39-001). | Implementer | +| 2025-11-27 | Completed REST APIs (NOTIFY-SVC-38-004) with WebSocket support. | Implementer | +| 2025-11-27 | Completed template service (NOTIFY-SVC-38-003). | Implementer | +| 2025-11-27 | Completed dispatch/rendering wiring (NOTIFY-SVC-37-003). | Implementer | +| 2025-11-27 | Completed channel adapters (NOTIFY-SVC-38-002). | Implementer | +| 2025-11-27 | Enhanced pack approvals contract. | Implementer | +| 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_172_notifier_ii.md` to `SPRINT_0172_0001_0002_notifier_ii.md`; content preserved. | Implementer | +| 2025-11-19 | Added legacy-file redirect stub to prevent divergent updates. | Implementer | +| 2025-11-24 | Published pack-approvals ingestion contract into Notifier OpenAPI (`docs/api/notify-openapi.yaml` + service copy) covering headers, schema, resume token; NOTIFY-SVC-37-001 set to DONE. | Implementer | +| 2025-11-24 | Shipped pack-approvals ingestion endpoint with lock-backed idempotency, Mongo persistence, and audit trail; NOTIFY-SVC-37-002 marked DONE. | Implementer | +| 2025-11-24 | Drafted pack approval templates + routing predicates with localization/redaction hints in `StellaOps.Notifier.docs/pack-approval-templates.json`; NOTIFY-SVC-37-003 moved to DOING. | Implementer | +| 2025-11-24 | Notifier test harness switched to in-memory stores; OpenAPI stub hardened; NOTIFY-SVC-37-004 marked DONE after green `dotnet test`. | Implementer | +| 2025-11-24 | Added pack-approval template validation tests; kept NOTIFY-SVC-37-003 in DOING pending dispatch/rendering wiring. | Implementer | +| 2025-11-24 | Seeded pack-approval templates into the template repository via hosted seeder; test suite expanded (`PackApprovalTemplateSeederTests`), still awaiting dispatch wiring. | Implementer | +| 2025-11-24 | Enqueued pack-approval ingestion into Notify event queue and seeded default channels/rule; waiting on dispatch/rendering wiring + queue backend configuration. | Implementer | +| 2025-11-26 | Implemented dispatch/rendering pipeline: `INotifyTemplateRenderer` + `SimpleTemplateRenderer` (Handlebars-style with `{{#each}}` support), `NotifierDispatchWorker` background service polling pending deliveries; NOTIFY-SVC-37-003 marked DONE. | Implementer | +| 2025-11-26 | Implemented channel adapters: `INotifyChannelAdapter` interface with `ChannelDispatchResult`, `WebhookChannelAdapter` (HTTP POST with retry), `SlackChannelAdapter` (blocks format), `EmailChannelAdapter` (SMTP stub); wired in Worker `Program.cs`; NOTIFY-SVC-38-002 marked DONE. | Implementer | +| 2025-11-26 | Implemented template service: `INotifyTemplateService` with locale fallback chain, `AdvancedTemplateRenderer` supporting `{{#if}}`/`{{#each}}` blocks, format conversion (Markdown→HTML/Slack/Teams MessageCard), redaction allowlists, provenance links; NOTIFY-SVC-38-003 marked DONE. | Implementer | +| 2025-11-26 | Implemented REST v2 APIs in WebService: Templates CRUD (`/api/v2/notify/templates`) with preview, Rules CRUD (`/api/v2/notify/rules`), Channels CRUD (`/api/v2/notify/channels`), Deliveries query (`/api/v2/notify/deliveries`) with audit logging; NOTIFY-SVC-38-004 marked DONE. | Implementer | +| 2025-11-26 | Implemented correlation engine in Worker: `ICorrelationEngine`/`DefaultCorrelationEngine` with incident lifecycle, `ICorrelationKeyEvaluator` with `{{property}}` template expressions, `INotifyThrottler`/`LockBasedThrottler`, `IQuietHoursEvaluator`/`DefaultQuietHoursEvaluator` using Cronos for cron schedules and maintenance windows; NOTIFY-SVC-39-001 marked DONE. | Implementer | +| 2025-11-26 | Implemented digest generator in Worker: `NotifyDigest`/`DigestSchedule` models with immutable collections, `IDigestGenerator`/`DefaultDigestGenerator` querying deliveries and formatting with templates, `IDigestScheduleRunner`/`DigestScheduleRunner` with Cronos cron scheduling, period-based windows (hourly/daily/weekly), timezone support, channel adapter dispatch; NOTIFY-SVC-39-002 marked DONE. | Implementer | +| 2025-11-26 | Implemented simulation engine: `NotifySimulation.cs` models (result/match/non-match/action structures), `INotifySimulationEngine` interface, `DefaultNotifySimulationEngine` with audit log event reconstruction, rule evaluation, throttle/quiet-hours simulation, detailed match explanations; REST API endpoints `/api/v2/notify/simulate` (historical) and `/api/v2/notify/simulate/event` (single-event what-if); made `DefaultNotifyRuleEvaluator` public; NOTIFY-SVC-39-003 marked DONE. | Implementer | + +## Decisions & Risks +- All tasks depend on Notifier I outputs and established notification contracts; keep TODO until upstream lands. +- Ensure templates/renderers stay deterministic and offline-ready; hardening tasks must precede GA. +- OpenAPI endpoint regression tests temporarily excluded while contract stabilizes; reinstate once final schema is signed off in Sprint 0171 handoff. + +## Next Checkpoints +- Kickoff after Sprint 0171 completion (date TBD). diff --git a/docs/implplan/SPRINT_0173_0001_0003_notifier_iii.md b/docs/implplan/SPRINT_0173_0001_0003_notifier_iii.md index 6e08fd6d8..c867cc90b 100644 --- a/docs/implplan/SPRINT_0173_0001_0003_notifier_iii.md +++ b/docs/implplan/SPRINT_0173_0001_0003_notifier_iii.md @@ -1,42 +1,44 @@ -# Sprint 0173-0001-0003 · Notifier III (Notifications & Telemetry 170.A) - -## Topic & Scope -- Notifier phase III: tenant scoping across rules/templates/incidents with RLS and tenant-prefixed channels. -- **Working directory:** `src/Notifier/StellaOps.Notifier`. - -## Dependencies & Concurrency -- Upstream: Notifier II (Sprint 0172-0001-0002) must land first. -- Concurrency: single-track; proceed after prior phase completion. - -## Documentation Prerequisites -- docs/README.md -- docs/07_HIGH_LEVEL_ARCHITECTURE.md -- docs/modules/platform/architecture-overview.md -- docs/modules/notifications/architecture.md -- src/Notifier/StellaOps.Notifier/AGENTS.md - -## Delivery Tracker -| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | -| P1 | PREP-NOTIFY-TEN-48-001-NOTIFIER-II-SPRINT-017 | DONE (2025-11-22) | Due 2025-11-23 · Accountable: Notifications Service Guild (`src/Notifier/StellaOps.Notifier`) | Notifications Service Guild (`src/Notifier/StellaOps.Notifier`) | Notifier II (Sprint 0172) not started; tenancy model not finalized.

Document artefact/deliverable for NOTIFY-TEN-48-001 and publish location so downstream tasks can proceed. Prep artefact: `docs/modules/notifier/prep/2025-11-20-ten-48-001-prep.md`. | -| 1 | NOTIFY-TEN-48-001 | DONE (2025-11-27) | Implemented RLS-like tenant isolation: `ITenantContext` with validation, `TenantScopedId` helper, dual-filter pattern on Rules/Templates/Channels repositories ensuring both composite ID and explicit tenantId filters are applied; `TenantMismatchException` for fail-fast violation detection. | Notifications Service Guild (`src/Notifier/StellaOps.Notifier`) | Tenant-scope rules/templates/incidents, RLS on storage, tenant-prefixed channels, include tenant context in notifications. | - -## Execution Log -| Date (UTC) | Update | Owner | -| --- | --- | --- | -| 2025-11-27 | Implemented NOTIFY-TEN-48-001: Created `ITenantContext`/`DefaultTenantContext` for tenant validation, `TenantScopedId` helper for consistent ID construction, `TenantAwareRepository` base class. Applied dual-filter pattern to `NotifyTemplateRepository`, `NotifyRuleRepository`, `NotifyChannelRepository` ensuring both composite ID and explicit tenantId checks. Sprint 0173 complete. | Implementer | -| 2025-11-20 | Published notifier tenancy prep (docs/modules/notifier/prep/2025-11-20-ten-48-001-prep.md); set PREP-NOTIFY-TEN-48-001 to DOING. | Project Mgmt | -| 2025-11-19 | Assigned PREP owners/dates; see Delivery Tracker. | Planning | -| 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_173_notifier_iii.md` to `SPRINT_0173_0001_0003_notifier_iii.md`; content preserved. | Implementer | -| 2025-11-19 | Added legacy-file redirect stub to avoid divergent updates. | Implementer | -| 2025-11-20 | Marked NOTIFY-TEN-48-001 BLOCKED pending completion of Sprint 0172 tenancy model; no executable work in this sprint today. | Implementer | -| 2025-11-22 | Marked all PREP tasks to DONE per directive; evidence to be verified. | Project Mgmt | -| 2025-11-27 | Implemented NOTIFY-TEN-48-001: Created ITenantContext.cs (context and accessor with AsyncLocal), TenantMiddleware.cs (HTTP tenant extraction), ITenantRlsEnforcer.cs (RLS validation with admin/system bypass), ITenantChannelResolver.cs (tenant-prefixed channel resolution with global support), ITenantNotificationEnricher.cs (payload enrichment), TenancyServiceExtensions.cs (DI registration). Updated Program.cs. Added comprehensive unit tests in Tenancy/ directory. | Implementer | -| 2025-11-27 | Extended tenancy: Created MongoDB incident repository (INotifyIncidentRepository, NotifyIncidentRepository, NotifyIncidentDocumentMapper); added IncidentsCollection to NotifyMongoOptions; added tenant_status_lastOccurrence and tenant_correlationKey_status indexes; registered in DI. Added TenantContext.cs and TenantServiceExtensions.cs to Worker for AsyncLocal context propagation. Updated prep doc with implementation details. | Implementer | - -## Decisions & Risks -- Requires completion of Notifier II and established tenancy model before applying RLS. -- Ensure tenant scoping aligns with platform RLS and channel routing; avoid breaking existing templates. - -## Next Checkpoints -- Schedule kickoff post Notifier II completion (date TBD). +# Sprint 0173-0001-0003 · Notifier III (Notifications & Telemetry 170.A) + +## Topic & Scope +- Notifier phase III: tenant scoping across rules/templates/incidents with RLS and tenant-prefixed channels. +- **Working directory:** `src/Notifier/StellaOps.Notifier`. + +## Dependencies & Concurrency +- Upstream: Notifier II (Sprint 0172-0001-0002) must land first. +- Concurrency: single-track; proceed after prior phase completion. + +## Documentation Prerequisites +- docs/README.md +- docs/07_HIGH_LEVEL_ARCHITECTURE.md +- docs/modules/platform/architecture-overview.md +- docs/modules/notifications/architecture.md +- src/Notifier/StellaOps.Notifier/AGENTS.md + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| P1 | PREP-NOTIFY-TEN-48-001-NOTIFIER-II-SPRINT-017 | DONE (2025-11-22) | Due 2025-11-23 · Accountable: Notifications Service Guild (`src/Notifier/StellaOps.Notifier`) | Notifications Service Guild (`src/Notifier/StellaOps.Notifier`) | Notifier II (Sprint 0172) not started; tenancy model not finalized.

Document artefact/deliverable for NOTIFY-TEN-48-001 and publish location so downstream tasks can proceed. Prep artefact: `docs/modules/notifier/prep/2025-11-20-ten-48-001-prep.md`. | +| 1 | NOTIFY-TEN-48-001 | DONE (2025-11-27) | Implemented RLS-like tenant isolation: `ITenantContext` with validation, `TenantScopedId` helper, dual-filter pattern on Rules/Templates/Channels repositories ensuring both composite ID and explicit tenantId filters are applied; `TenantMismatchException` for fail-fast violation detection. | Notifications Service Guild (`src/Notifier/StellaOps.Notifier`) | Tenant-scope rules/templates/incidents, RLS on storage, tenant-prefixed channels, include tenant context in notifications. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-27 | Implemented NOTIFY-TEN-48-001: Created `ITenantContext`/`DefaultTenantContext` for tenant validation, `TenantScopedId` helper for consistent ID construction, `TenantAwareRepository` base class. Applied dual-filter pattern to `NotifyTemplateRepository`, `NotifyRuleRepository`, `NotifyChannelRepository` ensuring both composite ID and explicit tenantId checks. Sprint 0173 complete. | Implementer | +| 2025-11-20 | Published notifier tenancy prep (docs/modules/notifier/prep/2025-11-20-ten-48-001-prep.md); set PREP-NOTIFY-TEN-48-001 to DOING. | Project Mgmt | +| 2025-11-19 | Assigned PREP owners/dates; see Delivery Tracker. | Planning | +| 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_173_notifier_iii.md` to `SPRINT_0173_0001_0003_notifier_iii.md`; content preserved. | Implementer | +| 2025-11-19 | Added legacy-file redirect stub to avoid divergent updates. | Implementer | +| 2025-11-20 | Marked NOTIFY-TEN-48-001 BLOCKED pending completion of Sprint 0172 tenancy model; no executable work in this sprint today. | Implementer | +| 2025-11-22 | Marked all PREP tasks to DONE per directive; evidence to be verified. | Project Mgmt | +| 2025-11-27 | Implemented NOTIFY-TEN-48-001: Created ITenantContext.cs (context and accessor with AsyncLocal), TenantMiddleware.cs (HTTP tenant extraction), ITenantRlsEnforcer.cs (RLS validation with admin/system bypass), ITenantChannelResolver.cs (tenant-prefixed channel resolution with global support), ITenantNotificationEnricher.cs (payload enrichment), TenancyServiceExtensions.cs (DI registration). Updated Program.cs. Added comprehensive unit tests in Tenancy/ directory. | Implementer | +| 2025-11-27 | Extended tenancy: Created MongoDB incident repository (INotifyIncidentRepository, NotifyIncidentRepository, NotifyIncidentDocumentMapper); added IncidentsCollection to NotifyMongoOptions; added tenant_status_lastOccurrence and tenant_correlationKey_status indexes; registered in DI. Added TenantContext.cs and TenantServiceExtensions.cs to Worker for AsyncLocal context propagation. Updated prep doc with implementation details. | Implementer | + +## Decisions & Risks +- Requires completion of Notifier II and established tenancy model before applying RLS. +- Ensure tenant scoping aligns with platform RLS and channel routing; avoid breaking existing templates. + +## Next Checkpoints +- Schedule kickoff post Notifier II completion (date TBD). diff --git a/docs/implplan/SPRINT_0174_0001_0001_telemetry.md b/docs/implplan/SPRINT_0174_0001_0001_telemetry.md index 0ed74b0e8..3ef6f5f27 100644 --- a/docs/implplan/SPRINT_0174_0001_0001_telemetry.md +++ b/docs/implplan/SPRINT_0174_0001_0001_telemetry.md @@ -1,70 +1,72 @@ -# Sprint 0174-0001-0001 · Telemetry (Notifications & Telemetry 170.B) - -## Topic & Scope -- Deliver `StellaOps.Telemetry.Core` bootstrap, propagation middleware, metrics helpers, scrubbing, incident/sealed-mode toggles. -- Provide sample host integrations while keeping deterministic, offline-friendly telemetry with redaction and tenant awareness. -- **Working directory:** `src/Telemetry/StellaOps.Telemetry.Core`. - -## Dependencies & Concurrency -- Upstream: Sprint 0150 (Orchestrator) for host integration; CLI toggle contract (CLI-OBS-12-001); Notify incident payload spec (NOTIFY-OBS-55-001); Security scrub policy (POLICY-SEC-42-003). -- Concurrency: tasks follow 50 → 51 → 55/56 chain; 50-002 waits on 50-001 package. - -## Documentation Prerequisites -- docs/README.md -- docs/07_HIGH_LEVEL_ARCHITECTURE.md -- docs/modules/platform/architecture-overview.md -- docs/modules/telemetry/architecture.md -- src/Telemetry/StellaOps.Telemetry.Core/AGENTS.md - -## Delivery Tracker -| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | -| P1 | PREP-TELEMETRY-OBS-50-002-AWAIT-PUBLISHED-50 | DONE (2025-11-19) | Due 2025-11-23 · Accountable: Telemetry Core Guild | Telemetry Core Guild | Bootstrap package published; reference doc `docs/observability/telemetry-bootstrap.md` provides wiring + config. | -| P2 | PREP-TELEMETRY-OBS-51-001-TELEMETRY-PROPAGATI | DONE (2025-11-20) | Doc published at `docs/observability/telemetry-propagation-51-001.md`. | Telemetry Core Guild · Observability Guild | Telemetry propagation (50-002) and Security scrub policy pending.

Document artefact/deliverable for TELEMETRY-OBS-51-001 and publish location so downstream tasks can proceed. | -| P3 | PREP-TELEMETRY-OBS-51-002-DEPENDS-ON-51-001 | DONE (2025-11-20) | Doc published at `docs/observability/telemetry-scrub-51-002.md`. | Telemetry Core Guild · Security Guild | Depends on 51-001.

Document artefact/deliverable for TELEMETRY-OBS-51-002 and publish location so downstream tasks can proceed. | -| P4 | PREP-TELEMETRY-OBS-56-001-DEPENDS-ON-55-001 | DONE (2025-11-20) | Doc published at `docs/observability/telemetry-sealed-56-001.md`. | Telemetry Core Guild | Depends on 55-001.

Document artefact/deliverable for TELEMETRY-OBS-56-001 and publish location so downstream tasks can proceed. | -| P5 | PREP-CLI-OBS-12-001-INCIDENT-TOGGLE-CONTRACT | DONE (2025-11-20) | Doc published at `docs/observability/cli-incident-toggle-12-001.md`. | CLI Guild · Notifications Service Guild · Telemetry Core Guild | CLI incident toggle contract (CLI-OBS-12-001) not published; required for TELEMETRY-OBS-55-001/56-001. Provide schema + CLI flag behavior. | -| 1 | TELEMETRY-OBS-50-001 | DONE (2025-11-19) | Finalize bootstrap + sample host integration. | Telemetry Core Guild (`src/Telemetry/StellaOps.Telemetry.Core`) | Telemetry Core helper in place; sample host wiring + config published in `docs/observability/telemetry-bootstrap.md`. | -| 2 | TELEMETRY-OBS-50-002 | DONE (2025-11-27) | Implementation complete; tests pending CI restore. | Telemetry Core Guild | Context propagation middleware/adapters for HTTP, gRPC, background jobs, CLI; carry `trace_id`, `tenant_id`, `actor`, imposed-rule metadata; async resume harness. Prep artefact: `docs/modules/telemetry/prep/2025-11-20-obs-50-002-prep.md`. | -| 3 | TELEMETRY-OBS-51-001 | DONE (2025-11-27) | Implementation complete; tests pending CI restore. | Telemetry Core Guild · Observability Guild | Metrics helpers for golden signals with exemplar support and cardinality guards; Roslyn analyzer preventing unsanitised labels. Prep artefact: `docs/modules/telemetry/prep/2025-11-20-obs-51-001-prep.md`. | -| 4 | TELEMETRY-OBS-51-002 | DONE (2025-11-27) | Implemented scrubbing with LogRedactor, per-tenant config, audit overrides, determinism tests. | Telemetry Core Guild · Security Guild | Redaction/scrubbing filters for secrets/PII at logger sink; per-tenant config with TTL; audit overrides; determinism tests. | -| 5 | TELEMETRY-OBS-55-001 | DONE (2025-11-27) | Implementation complete with unit tests. | Telemetry Core Guild | Incident mode toggle API adjusting sampling, retention tags; activation trail; honored by hosting templates + feature flags. | -| 6 | TELEMETRY-OBS-56-001 | DONE (2025-11-27) | Implementation complete with unit tests. | Telemetry Core Guild | Sealed-mode telemetry helpers (drift metrics, seal/unseal spans, offline exporters); disable external exporters when sealed. | - -## Execution Log -| Date (UTC) | Update | Owner | -| --- | --- | --- | -| 2025-11-27 | Implemented TELEMETRY-OBS-56-001: Added `ISealedModeTelemetryService` with drift metrics, seal/unseal activity spans, external export blocking. | Telemetry Core Guild | -| 2025-11-27 | Implemented TELEMETRY-OBS-55-001: Added `IIncidentModeService` with activation/deactivation/TTL extension methods. | Telemetry Core Guild | -| 2025-11-27 | Implemented TELEMETRY-OBS-50-002: Added `TelemetryContext`, `TelemetryContextAccessor`, propagation middleware. | Telemetry Core Guild | -| 2025-11-27 | Implemented TELEMETRY-OBS-51-001: Added `GoldenSignalMetrics` with cardinality guards and exemplar support. | Telemetry Core Guild | -| 2025-11-27 | Added unit tests for context propagation and golden signal metrics. Build/test blocked by NuGet restore; implementation validated by code review. | Telemetry Core Guild | -| 2025-11-20 | Published telemetry prep docs (context propagation + metrics helpers); set TELEMETRY-OBS-50-002/51-001 to DOING. | Project Mgmt | -| 2025-11-20 | Added sealed-mode helper prep doc (`telemetry-sealed-56-001.md`); marked PREP-TELEMETRY-OBS-56-001 DONE. | Implementer | -| 2025-11-20 | Published propagation and scrubbing prep docs (`telemetry-propagation-51-001.md`, `telemetry-scrub-51-002.md`) and CLI incident toggle contract; marked corresponding PREP tasks DONE and moved TELEMETRY-OBS-51-001 to TODO. | Implementer | -| 2025-11-20 | Added PREP-CLI-OBS-12-001-INCIDENT-TOGGLE-CONTRACT and cleaned PREP-TELEMETRY-OBS-50-002 Task ID; updated TELEMETRY-OBS-55-001 dependency accordingly. | Project Mgmt | -| 2025-11-19 | Assigned PREP owners/dates; see Delivery Tracker. | Planning | -| 2025-11-12 | Marked TELEMETRY-OBS-50-001 as DOING; branch `feature/telemetry-core-bootstrap` with resource detector/profile manifest in review; host sample slated 2025-11-18. | Telemetry Core Guild | -| 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_174_telemetry.md` to `SPRINT_0174_0001_0001_telemetry.md`; content preserved. | Implementer | -| 2025-11-19 | Added legacy-file redirect stub to avoid divergent updates. | Implementer | -| 2025-11-20 | Marked tasks 50-002..56-001 BLOCKED: waiting on 50-001 package publication, Security scrub policy, and CLI incident-toggle contract; no executable work until upstream artefacts land. | Implementer | -| 2025-11-19 | PREP-TELEMETRY-OBS-50-002-AWAIT-PUBLISHED-50 completed; bootstrap doc published. Downstream tasks remain blocked on propagation/scrub/toggle contracts. | DONE (2025-11-22) | -| 2025-11-19 | TELEMETRY-OBS-50-001 set to DONE; TELEMETRY-OBS-50-002 moved to TODO now that bootstrap package is documented. | Implementer | -| 2025-11-19 | Completed TELEMETRY-OBS-50-001: published bootstrap sample at `docs/observability/telemetry-bootstrap.md`; library already present. | Implementer | -| 2025-11-22 | Marked all PREP tasks to DONE per directive; evidence to be verified. | Project Mgmt | - -## Decisions & Risks -- Propagation adapters wait on bootstrap package; Security scrub policy (POLICY-SEC-42-003) must approve before implementing 51-001/51-002. -- Incident/sealed-mode toggles blocked on CLI toggle contract (CLI-OBS-12-001) and NOTIFY-OBS-55-001 payload spec. -- Ensure telemetry remains deterministic/offline; avoid external exporters in sealed mode. -- Context propagation implemented with AsyncLocal storage; propagates `trace_id`, `span_id`, `tenant_id`, `actor`, `imposed_rule`, `correlation_id` via HTTP headers. -- Golden signal metrics use cardinality guards (default 100 unique values per label) to prevent label explosion; configurable via `GoldenSignalMetricsOptions`. -- Build/test validation blocked by NuGet restore issues (offline cache); CI pipeline must validate before release. - -## Next Checkpoints -| Date (UTC) | Milestone | Owner(s) | -| --- | --- | --- | -| 2025-11-18 | Land Telemetry Core bootstrap sample in Orchestrator. | Telemetry Core Guild · Orchestrator Guild | -| 2025-11-19 | Publish propagation adapter API draft. | Telemetry Core Guild | -| 2025-11-21 | Security sign-off on scrub policy (POLICY-SEC-42-003). | Telemetry Core Guild · Security Guild | -| 2025-11-22 | Incident/CLI toggle contract agreed (CLI-OBS-12-001 + NOTIFY-OBS-55-001). | Telemetry Core Guild · Notifications Service Guild · CLI Guild | +# Sprint 0174-0001-0001 · Telemetry (Notifications & Telemetry 170.B) + +## Topic & Scope +- Deliver `StellaOps.Telemetry.Core` bootstrap, propagation middleware, metrics helpers, scrubbing, incident/sealed-mode toggles. +- Provide sample host integrations while keeping deterministic, offline-friendly telemetry with redaction and tenant awareness. +- **Working directory:** `src/Telemetry/StellaOps.Telemetry.Core`. + +## Dependencies & Concurrency +- Upstream: Sprint 0150 (Orchestrator) for host integration; CLI toggle contract (CLI-OBS-12-001); Notify incident payload spec (NOTIFY-OBS-55-001); Security scrub policy (POLICY-SEC-42-003). +- Concurrency: tasks follow 50 → 51 → 55/56 chain; 50-002 waits on 50-001 package. + +## Documentation Prerequisites +- docs/README.md +- docs/07_HIGH_LEVEL_ARCHITECTURE.md +- docs/modules/platform/architecture-overview.md +- docs/modules/telemetry/architecture.md +- src/Telemetry/StellaOps.Telemetry.Core/AGENTS.md + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| P1 | PREP-TELEMETRY-OBS-50-002-AWAIT-PUBLISHED-50 | DONE (2025-11-19) | Due 2025-11-23 · Accountable: Telemetry Core Guild | Telemetry Core Guild | Bootstrap package published; reference doc `docs/observability/telemetry-bootstrap.md` provides wiring + config. | +| P2 | PREP-TELEMETRY-OBS-51-001-TELEMETRY-PROPAGATI | DONE (2025-11-20) | Doc published at `docs/observability/telemetry-propagation-51-001.md`. | Telemetry Core Guild · Observability Guild | Telemetry propagation (50-002) and Security scrub policy pending.

Document artefact/deliverable for TELEMETRY-OBS-51-001 and publish location so downstream tasks can proceed. | +| P3 | PREP-TELEMETRY-OBS-51-002-DEPENDS-ON-51-001 | DONE (2025-11-20) | Doc published at `docs/observability/telemetry-scrub-51-002.md`. | Telemetry Core Guild · Security Guild | Depends on 51-001.

Document artefact/deliverable for TELEMETRY-OBS-51-002 and publish location so downstream tasks can proceed. | +| P4 | PREP-TELEMETRY-OBS-56-001-DEPENDS-ON-55-001 | DONE (2025-11-20) | Doc published at `docs/observability/telemetry-sealed-56-001.md`. | Telemetry Core Guild | Depends on 55-001.

Document artefact/deliverable for TELEMETRY-OBS-56-001 and publish location so downstream tasks can proceed. | +| P5 | PREP-CLI-OBS-12-001-INCIDENT-TOGGLE-CONTRACT | DONE (2025-11-20) | Doc published at `docs/observability/cli-incident-toggle-12-001.md`. | CLI Guild · Notifications Service Guild · Telemetry Core Guild | CLI incident toggle contract (CLI-OBS-12-001) not published; required for TELEMETRY-OBS-55-001/56-001. Provide schema + CLI flag behavior. | +| 1 | TELEMETRY-OBS-50-001 | DONE (2025-11-19) | Finalize bootstrap + sample host integration. | Telemetry Core Guild (`src/Telemetry/StellaOps.Telemetry.Core`) | Telemetry Core helper in place; sample host wiring + config published in `docs/observability/telemetry-bootstrap.md`. | +| 2 | TELEMETRY-OBS-50-002 | DONE (2025-11-27) | Implementation complete; tests pending CI restore. | Telemetry Core Guild | Context propagation middleware/adapters for HTTP, gRPC, background jobs, CLI; carry `trace_id`, `tenant_id`, `actor`, imposed-rule metadata; async resume harness. Prep artefact: `docs/modules/telemetry/prep/2025-11-20-obs-50-002-prep.md`. | +| 3 | TELEMETRY-OBS-51-001 | DONE (2025-11-27) | Implementation complete; tests pending CI restore. | Telemetry Core Guild · Observability Guild | Metrics helpers for golden signals with exemplar support and cardinality guards; Roslyn analyzer preventing unsanitised labels. Prep artefact: `docs/modules/telemetry/prep/2025-11-20-obs-51-001-prep.md`. | +| 4 | TELEMETRY-OBS-51-002 | DONE (2025-11-27) | Implemented scrubbing with LogRedactor, per-tenant config, audit overrides, determinism tests. | Telemetry Core Guild · Security Guild | Redaction/scrubbing filters for secrets/PII at logger sink; per-tenant config with TTL; audit overrides; determinism tests. | +| 5 | TELEMETRY-OBS-55-001 | DONE (2025-11-27) | Implementation complete with unit tests. | Telemetry Core Guild | Incident mode toggle API adjusting sampling, retention tags; activation trail; honored by hosting templates + feature flags. | +| 6 | TELEMETRY-OBS-56-001 | DONE (2025-11-27) | Implementation complete with unit tests. | Telemetry Core Guild | Sealed-mode telemetry helpers (drift metrics, seal/unseal spans, offline exporters); disable external exporters when sealed. | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-27 | Implemented TELEMETRY-OBS-56-001: Added `ISealedModeTelemetryService` with drift metrics, seal/unseal activity spans, external export blocking. | Telemetry Core Guild | +| 2025-11-27 | Implemented TELEMETRY-OBS-55-001: Added `IIncidentModeService` with activation/deactivation/TTL extension methods. | Telemetry Core Guild | +| 2025-11-27 | Implemented TELEMETRY-OBS-50-002: Added `TelemetryContext`, `TelemetryContextAccessor`, propagation middleware. | Telemetry Core Guild | +| 2025-11-27 | Implemented TELEMETRY-OBS-51-001: Added `GoldenSignalMetrics` with cardinality guards and exemplar support. | Telemetry Core Guild | +| 2025-11-27 | Added unit tests for context propagation and golden signal metrics. Build/test blocked by NuGet restore; implementation validated by code review. | Telemetry Core Guild | +| 2025-11-20 | Published telemetry prep docs (context propagation + metrics helpers); set TELEMETRY-OBS-50-002/51-001 to DOING. | Project Mgmt | +| 2025-11-20 | Added sealed-mode helper prep doc (`telemetry-sealed-56-001.md`); marked PREP-TELEMETRY-OBS-56-001 DONE. | Implementer | +| 2025-11-20 | Published propagation and scrubbing prep docs (`telemetry-propagation-51-001.md`, `telemetry-scrub-51-002.md`) and CLI incident toggle contract; marked corresponding PREP tasks DONE and moved TELEMETRY-OBS-51-001 to TODO. | Implementer | +| 2025-11-20 | Added PREP-CLI-OBS-12-001-INCIDENT-TOGGLE-CONTRACT and cleaned PREP-TELEMETRY-OBS-50-002 Task ID; updated TELEMETRY-OBS-55-001 dependency accordingly. | Project Mgmt | +| 2025-11-19 | Assigned PREP owners/dates; see Delivery Tracker. | Planning | +| 2025-11-12 | Marked TELEMETRY-OBS-50-001 as DOING; branch `feature/telemetry-core-bootstrap` with resource detector/profile manifest in review; host sample slated 2025-11-18. | Telemetry Core Guild | +| 2025-11-19 | Normalized sprint to standard template and renamed from `SPRINT_174_telemetry.md` to `SPRINT_0174_0001_0001_telemetry.md`; content preserved. | Implementer | +| 2025-11-19 | Added legacy-file redirect stub to avoid divergent updates. | Implementer | +| 2025-11-20 | Marked tasks 50-002..56-001 BLOCKED: waiting on 50-001 package publication, Security scrub policy, and CLI incident-toggle contract; no executable work until upstream artefacts land. | Implementer | +| 2025-11-19 | PREP-TELEMETRY-OBS-50-002-AWAIT-PUBLISHED-50 completed; bootstrap doc published. Downstream tasks remain blocked on propagation/scrub/toggle contracts. | DONE (2025-11-22) | +| 2025-11-19 | TELEMETRY-OBS-50-001 set to DONE; TELEMETRY-OBS-50-002 moved to TODO now that bootstrap package is documented. | Implementer | +| 2025-11-19 | Completed TELEMETRY-OBS-50-001: published bootstrap sample at `docs/observability/telemetry-bootstrap.md`; library already present. | Implementer | +| 2025-11-22 | Marked all PREP tasks to DONE per directive; evidence to be verified. | Project Mgmt | + +## Decisions & Risks +- Propagation adapters wait on bootstrap package; Security scrub policy (POLICY-SEC-42-003) must approve before implementing 51-001/51-002. +- Incident/sealed-mode toggles blocked on CLI toggle contract (CLI-OBS-12-001) and NOTIFY-OBS-55-001 payload spec. +- Ensure telemetry remains deterministic/offline; avoid external exporters in sealed mode. +- Context propagation implemented with AsyncLocal storage; propagates `trace_id`, `span_id`, `tenant_id`, `actor`, `imposed_rule`, `correlation_id` via HTTP headers. +- Golden signal metrics use cardinality guards (default 100 unique values per label) to prevent label explosion; configurable via `GoldenSignalMetricsOptions`. +- Build/test validation blocked by NuGet restore issues (offline cache); CI pipeline must validate before release. + +## Next Checkpoints +| Date (UTC) | Milestone | Owner(s) | +| --- | --- | --- | +| 2025-11-18 | Land Telemetry Core bootstrap sample in Orchestrator. | Telemetry Core Guild · Orchestrator Guild | +| 2025-11-19 | Publish propagation adapter API draft. | Telemetry Core Guild | +| 2025-11-21 | Security sign-off on scrub policy (POLICY-SEC-42-003). | Telemetry Core Guild · Security Guild | +| 2025-11-22 | Incident/CLI toggle contract agreed (CLI-OBS-12-001 + NOTIFY-OBS-55-001). | Telemetry Core Guild · Notifications Service Guild · CLI Guild | diff --git a/docs/implplan/SPRINT_0180_0001_0001_telemetry_core.md b/docs/implplan/SPRINT_0180_0001_0001_telemetry_core.md index 8c883856f..6cbd3b3f3 100644 --- a/docs/implplan/SPRINT_0180_0001_0001_telemetry_core.md +++ b/docs/implplan/SPRINT_0180_0001_0001_telemetry_core.md @@ -15,6 +15,8 @@ - docs/modules/platform/architecture-overview.md - docs/modules/telemetry/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0185_0001_0001_shared_replay_primitives.md b/docs/implplan/SPRINT_0185_0001_0001_shared_replay_primitives.md index b8537f3c8..2526c7cd9 100644 --- a/docs/implplan/SPRINT_0185_0001_0001_shared_replay_primitives.md +++ b/docs/implplan/SPRINT_0185_0001_0001_shared_replay_primitives.md @@ -14,6 +14,8 @@ - docs/modules/platform/architecture-overview.md (Replay CAS §5) - docs/replay/DETERMINISTIC_REPLAY.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0186_0001_0001_record_deterministic_execution.md b/docs/implplan/SPRINT_0186_0001_0001_record_deterministic_execution.md index 6fdf55944..2df71e738 100644 --- a/docs/implplan/SPRINT_0186_0001_0001_record_deterministic_execution.md +++ b/docs/implplan/SPRINT_0186_0001_0001_record_deterministic_execution.md @@ -1,40 +1,42 @@ -# Sprint 0186-0001-0001 · Record & Deterministic Execution (Scanner Replay 186.A) - -## Topic & Scope -- Enable Scanner to emit replay manifests/bundles, enforce deterministic execution, align signing flows, and publish determinism evidence. -- **Working directory:** `src/Scanner` (WebService, Worker, Replay), `src/Signer`, `src/Authority`, related docs under `docs/replay` and `docs/modules/scanner`. - -## Dependencies & Concurrency -- Upstream: Sprint 0185 (Replay Core foundations) and Sprint 0130 Scanner & Surface. -- Concurrency: execute tasks in listed order; signing tasks align with replay outputs; docs tasks mirror code tasks. - -## Documentation Prerequisites -- docs/README.md -- docs/07_HIGH_LEVEL_ARCHITECTURE.md -- docs/modules/platform/architecture-overview.md -- docs/replay/DETERMINISTIC_REPLAY.md -- docs/replay/TEST_STRATEGY.md -- docs/modules/scanner/architecture.md -- docs/modules/sbomer/architecture.md (for SPDX 3.0.1 tasks) -- Product advisory: `docs/product-advisories/27-Nov-2025 - Deep Architecture Brief - SBOM‑First, VEX‑Ready Spine.md` (canonical for SPDX/VEX work) -- SPDX 3.0.1 specification: https://spdx.github.io/spdx-spec/v3.0.1/ - -## Delivery Tracker -| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | -| 1 | SCAN-REPLAY-186-001 | BLOCKED (2025-11-26) | Await pipeline inputs. | Scanner Guild (`src/Scanner/StellaOps.Scanner.WebService`, docs) | Implement `record` mode (manifest assembly, policy/feed/tool hash capture, CAS uploads); doc workflow referencing replay doc §6. | +# Sprint 0186-0001-0001 · Record & Deterministic Execution (Scanner Replay 186.A) + +## Topic & Scope +- Enable Scanner to emit replay manifests/bundles, enforce deterministic execution, align signing flows, and publish determinism evidence. +- **Working directory:** `src/Scanner` (WebService, Worker, Replay), `src/Signer`, `src/Authority`, related docs under `docs/replay` and `docs/modules/scanner`. + +## Dependencies & Concurrency +- Upstream: Sprint 0185 (Replay Core foundations) and Sprint 0130 Scanner & Surface. +- Concurrency: execute tasks in listed order; signing tasks align with replay outputs; docs tasks mirror code tasks. + +## Documentation Prerequisites +- docs/README.md +- docs/07_HIGH_LEVEL_ARCHITECTURE.md +- docs/modules/platform/architecture-overview.md +- docs/replay/DETERMINISTIC_REPLAY.md +- docs/replay/TEST_STRATEGY.md +- docs/modules/scanner/architecture.md +- docs/modules/sbomer/architecture.md (for SPDX 3.0.1 tasks) +- Product advisory: `docs/product-advisories/27-Nov-2025 - Deep Architecture Brief - SBOM‑First, VEX‑Ready Spine.md` (canonical for SPDX/VEX work) +- SPDX 3.0.1 specification: https://spdx.github.io/spdx-spec/v3.0.1/ + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | SCAN-REPLAY-186-001 | BLOCKED (2025-11-26) | Await pipeline inputs. | Scanner Guild (`src/Scanner/StellaOps.Scanner.WebService`, docs) | Implement `record` mode (manifest assembly, policy/feed/tool hash capture, CAS uploads); doc workflow referencing replay doc §6. | | 2 | SCAN-REPLAY-186-002 | BLOCKED (2025-11-30) | BLOCKED by 186-001 pipeline contract. | Scanner Guild | Update Worker analyzers to consume sealed input bundles, enforce deterministic ordering, contribute Merkle metadata; add `docs/modules/scanner/deterministic-execution.md`. | | 3 | SIGN-REPLAY-186-003 | BLOCKED (2025-11-30) | BLOCKED by 186-001/002. | Signing Guild (`src/Signer`, `src/Authority`) | Extend Signer/Authority DSSE flows to cover replay manifests/bundles; refresh signer/authority architecture docs referencing replay doc §5. | -| 4 | SIGN-CORE-186-004 | DONE (2025-11-26) | CryptoDsseSigner implemented with ICryptoProviderRegistry integration. | Signing Guild | Replace HMAC demo in Signer with StellaOps.Cryptography providers (keyless + KMS); provider selection, key loading, cosign-compatible DSSE output. | -| 5 | SIGN-CORE-186-005 | DONE (2025-11-26) | SignerStatementBuilder refactored with StellaOps predicate types and CanonicalJson from Provenance library. | Signing Guild | Refactor `SignerStatementBuilder` to support StellaOps predicate types and delegate canonicalisation to Provenance library when available. | -| 6 | SIGN-TEST-186-006 | DONE (2025-11-26) | Integration tests upgraded with real crypto providers and fixture predicates. | Signing Guild · QA Guild | Upgrade signer integration tests to real crypto abstraction + fixture predicates (promotion, SBOM, replay); deterministic test data. | +| 4 | SIGN-CORE-186-004 | DONE (2025-11-26) | CryptoDsseSigner implemented with ICryptoProviderRegistry integration. | Signing Guild | Replace HMAC demo in Signer with StellaOps.Cryptography providers (keyless + KMS); provider selection, key loading, cosign-compatible DSSE output. | +| 5 | SIGN-CORE-186-005 | DONE (2025-11-26) | SignerStatementBuilder refactored with StellaOps predicate types and CanonicalJson from Provenance library. | Signing Guild | Refactor `SignerStatementBuilder` to support StellaOps predicate types and delegate canonicalisation to Provenance library when available. | +| 6 | SIGN-TEST-186-006 | DONE (2025-11-26) | Integration tests upgraded with real crypto providers and fixture predicates. | Signing Guild · QA Guild | Upgrade signer integration tests to real crypto abstraction + fixture predicates (promotion, SBOM, replay); deterministic test data. | | 7 | AUTH-VERIFY-186-007 | BLOCKED (2025-11-30) | BLOCKED by 186-003. | Authority Guild · Provenance Guild | Authority-side helper/service validating DSSE signatures and Rekor proofs for promotion attestations using trusted checkpoints; offline audit flow. | | 8 | SCAN-DETER-186-008 | DONE (2025-11-30) | Parallel with 186-002. | Scanner Guild | Add deterministic execution switches (fixed clock, RNG seed, concurrency cap, feed/policy pins, log filtering) via CLI/env/config. | | 9 | SCAN-DETER-186-009 | BLOCKED (2025-11-30) | BLOCKED by 186-008 completion. | Scanner Guild · QA Guild | Determinism harness to replay scans, canonicalise outputs, record hash matrices (`docs/modules/scanner/determinism-score.md`). | | 10 | SCAN-DETER-186-010 | BLOCKED (2025-11-30) | BLOCKED by 186-009. | Scanner Guild · Export Center Guild | Emit/publish `determinism.json` with scores/hashes/diffs alongside each scanner release via CAS/object storage; document in release guide. | -| 11 | SCAN-ENTROPY-186-011 | DONE (2025-11-26) | Add core entropy calculator & tests; integrate into worker pipeline next. | Scanner Guild | Entropy analysis for ELF/PE/Mach-O/opaque blobs (sliding-window metrics, section heuristics); record offsets/hints (see `docs/modules/scanner/entropy.md`). | -| 12 | SCAN-ENTROPY-186-012 | BLOCKED (2025-11-26) | Waiting on worker→webservice entropy delivery contract and upstream Policy build fix. | Scanner Guild · Provenance Guild | Generate `entropy.report.json`, image-level penalties; attach evidence to manifests/attestations; expose ratios for policy engines. | -| 13 | SCAN-CACHE-186-013 | BLOCKED (2025-11-26) | Waiting on cache key/contract (tool/feed/policy IDs, manifest hash) and DSSE validation flow definition between Worker ↔ WebService. | Scanner Guild | Layer-level SBOM/VEX cache keyed by layer digest + manifest hash + tool/feed/policy IDs; re-verify DSSE on cache hits; persist indexes; document referencing 16-Nov-2026 advisory. | +| 11 | SCAN-ENTROPY-186-011 | DONE (2025-11-26) | Add core entropy calculator & tests; integrate into worker pipeline next. | Scanner Guild | Entropy analysis for ELF/PE/Mach-O/opaque blobs (sliding-window metrics, section heuristics); record offsets/hints (see `docs/modules/scanner/entropy.md`). | +| 12 | SCAN-ENTROPY-186-012 | BLOCKED (2025-11-26) | Waiting on worker→webservice entropy delivery contract and upstream Policy build fix. | Scanner Guild · Provenance Guild | Generate `entropy.report.json`, image-level penalties; attach evidence to manifests/attestations; expose ratios for policy engines. | +| 13 | SCAN-CACHE-186-013 | BLOCKED (2025-11-26) | Waiting on cache key/contract (tool/feed/policy IDs, manifest hash) and DSSE validation flow definition between Worker ↔ WebService. | Scanner Guild | Layer-level SBOM/VEX cache keyed by layer digest + manifest hash + tool/feed/policy IDs; re-verify DSSE on cache hits; persist indexes; document referencing 16-Nov-2026 advisory. | | 14 | SCAN-DIFF-CLI-186-014 | BLOCKED (2025-11-30) | BLOCKED by replay + cache scaffolding (186-001, 186-013). | Scanner Guild · CLI Guild | Deterministic diff-aware rescan workflow (`scan.lock.json`, JSON Patch diffs, CLI verbs `stella scan --emit-diff` / `stella diff`); replayable tests; docs. | | 15 | SBOM-BRIDGE-186-015 | BLOCKED (2025-11-30) | Working directory scope missing `src/Sbomer`; needs PM to extend scope or move tasks to Sbomer sprint. | Sbomer Guild · Scanner Guild | Establish SPDX 3.0.1 as canonical SBOM persistence; deterministic CycloneDX 1.6 exporter; map table/library; wire snapshot hashes into replay manifests. See subtasks 15a-15f below. | | 15a | SPDX-MODEL-186-015A | BLOCKED (2025-11-30) | BLOCKED until sprint scope includes `src/Sbomer` and SPDX 3.0.1 review scheduled. | Sbomer Guild (`src/Sbomer/StellaOps.Sbomer.Spdx`) | Implement SPDX 3.0.1 data model: `SpdxDocument`, `Package`, `File`, `Snippet`, `Relationship`, `ExternalRef`, `Annotation`. Use SPDX 3.0.1 JSON-LD schema. | @@ -78,8 +80,8 @@ | 48 | COMP-GAP-186-CM8 | TODO | CM1 benchmarks. | QA Guild · Scanner Guild | Maintain benchmark parity with upstream tool baselines (version-pinned, hash-logged runs). Fixtures folder stubs under `docs/modules/scanner/fixtures/competitor-adapters/fixtures/`. | | 49 | COMP-GAP-186-CM9 | TODO | CM1 coverage. | Product Mgmt · Scanner Guild | Track ingest ecosystem coverage (container, Java, Python, .NET, Go, OS pkgs) and gaps. Coverage CSV stub created. | | 50 | COMP-GAP-186-CM10 | TODO | CM2 policy. | Ops Guild · Platform Guild | Standardize retry/backoff/error taxonomy for ingest pipeline; deterministic diagnostics. | - -## Execution Log + +## Execution Log | Date (UTC) | Update | Owner | | --- | --- | --- | | 2025-12-03 | SCAN-GAP-186-SC4 DONE: published downgrade adapter mappings (CVSS4→3.1, CDX1.7→1.6, SLSA1.2→1.0) with hashes in `docs/modules/scanner/fixtures/adapters/`. | Product Mgmt | @@ -88,12 +90,12 @@ | 2025-12-03 | Finalised SC/SP/CM gap plans; populated fixtures (CDX17/CBOM, spine adapters + crosswalk, competitor adapters) with BLAKE3/SHA256 hashes; marked tasks 18–20, 21, 31–34, 37–41 DONE. | Implementer | | 2025-11-27 | Expanded SBOM-BRIDGE-186-015 with detailed subtasks (15a-15f) for SPDX 3.0.1 implementation per product advisory. | Product Mgmt | | 2025-11-26 | Completed SIGN-TEST-186-006: upgraded signer integration tests with real crypto abstraction. | Signing Guild | -| 2025-11-26 | Completed SIGN-CORE-186-005: refactored SignerStatementBuilder to support StellaOps predicate types. | Signing Guild | -| 2025-11-26 | Completed SIGN-CORE-186-004: implemented CryptoDsseSigner with ICryptoProviderRegistry integration. | Signing Guild | -| 2025-11-26 | Began SCAN-ENTROPY-186-012: added entropy snapshot/status DTOs and API surface. | Scanner Guild | -| 2025-11-26 | Started SCAN-DETER-186-008: added determinism options and deterministic time provider wiring. | Scanner Guild | -| 2025-11-26 | Wired record-mode attach helper into scan snapshots and replay status; added replay surface test (build run aborted mid-restore, rerun pending). | Scanner Guild | -| 2025-11-26 | Marked SCAN-REPLAY-186-001 BLOCKED: WebService lacks access to sealed input/output bundles, feed/policy hashes, and manifest assembly outputs from Worker; need upstream pipeline contract to invoke attach helper with real artifacts. | Scanner Guild | +| 2025-11-26 | Completed SIGN-CORE-186-005: refactored SignerStatementBuilder to support StellaOps predicate types. | Signing Guild | +| 2025-11-26 | Completed SIGN-CORE-186-004: implemented CryptoDsseSigner with ICryptoProviderRegistry integration. | Signing Guild | +| 2025-11-26 | Began SCAN-ENTROPY-186-012: added entropy snapshot/status DTOs and API surface. | Scanner Guild | +| 2025-11-26 | Started SCAN-DETER-186-008: added determinism options and deterministic time provider wiring. | Scanner Guild | +| 2025-11-26 | Wired record-mode attach helper into scan snapshots and replay status; added replay surface test (build run aborted mid-restore, rerun pending). | Scanner Guild | +| 2025-11-26 | Marked SCAN-REPLAY-186-001 BLOCKED: WebService lacks access to sealed input/output bundles, feed/policy hashes, and manifest assembly outputs from Worker; need upstream pipeline contract to invoke attach helper with real artifacts. | Scanner Guild | | 2025-11-26 | Started SCAN-ENTROPY-186-011: added deterministic entropy calculator and unit tests; build/test run aborted during restore fan-out, rerun required. | Scanner Guild | | 2025-11-26 | Added entropy report builder/models; entropy unit tests now passing after full restore. | Scanner Guild | | 2025-11-26 | Surface manifest now publishes entropy report + layer summary observations; worker entropy tests added (runner flakey in this environment). | Scanner Guild | @@ -111,7 +113,7 @@ | 2025-12-02 | Began SC/SP/CM gap scoping (tasks 18–20): reviewed `docs/product-advisories/31-Nov-2025 FINDINGS.md`, checked archived advisories for duplicates (none), set tasks to DOING to derive remediation backlog. | Product Mgmt | | 2025-12-02 | Authored stub plans for SC1, SP1, CM1 (roadmap, spine versioning, competitor ingest normalization) and moved corresponding subtasks to DOING. | Product Mgmt | | 2025-12-02 | Seeded fixture/adapter directories for SC2/SC4/SC5 (cdx17-cbom, adapters), CM1/CM7–CM9 (competitor adapters, coverage), SP1/SP10 (spine adapters/crosswalk). | Product Mgmt | - + ## Decisions & Risks | Item | Impact | Mitigation / Next Step | Status | | --- | --- | --- | --- | @@ -125,8 +127,8 @@ | Risk (SPDX 3.0.1 canonicalisation). | Non-deterministic output could break hashing. | Keep 15a–15f BLOCKED until scope includes `src/Sbomer` and canonical rules reviewed. | OPEN | | Scope gap: sprint working directory excludes `src/Sbomer`. | Tasks 15/15a–15f/17 cannot start. | PM to extend scope or move tasks to Sbomer sprint; logged in Execution Log. | OPEN | | Missing findings doc for tasks 18–20. | Cannot scope SC/ SP/ CM gap remediation without source content. | RESOLVED 2025-12-02: `docs/product-advisories/31-Nov-2025 FINDINGS.md` added; tasks 18–20 set to TODO. | CLOSED | - -## Next Checkpoints -- Kickoff after Replay Core scaffolding begins (date TBD). -- SPDX 3.0.1 data model review (Sbomer Guild, date TBD). -- CDX↔SPDX mapping table draft review (Sbomer Guild, date TBD). + +## Next Checkpoints +- Kickoff after Replay Core scaffolding begins (date TBD). +- SPDX 3.0.1 data model review (Sbomer Guild, date TBD). +- CDX↔SPDX mapping table draft review (Sbomer Guild, date TBD). diff --git a/docs/implplan/SPRINT_0187_0001_0001_evidence_locker_cli_integration.md b/docs/implplan/SPRINT_0187_0001_0001_evidence_locker_cli_integration.md index 85c1cdb60..6f71735bd 100644 --- a/docs/implplan/SPRINT_0187_0001_0001_evidence_locker_cli_integration.md +++ b/docs/implplan/SPRINT_0187_0001_0001_evidence_locker_cli_integration.md @@ -16,6 +16,8 @@ - docs/runbooks/replay_ops.md - docs/security/crypto-routing-audit-2025-11-07.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0190_0001_0001_cvss_v4_receipts.md b/docs/implplan/SPRINT_0190_0001_0001_cvss_v4_receipts.md index 27722b793..6cb9c81ce 100644 --- a/docs/implplan/SPRINT_0190_0001_0001_cvss_v4_receipts.md +++ b/docs/implplan/SPRINT_0190_0001_0001_cvss_v4_receipts.md @@ -21,6 +21,8 @@ - FIRST CVSS v4.0 Calculator: https://www.first.org/cvss/calculator/4-0 - Module AGENTS.md: Create `src/Policy/StellaOps.Policy.Scoring/AGENTS.md` as part of task 1 +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0200_0001_0001_experience_sdks.md b/docs/implplan/SPRINT_0200_0001_0001_experience_sdks.md index 9a31ec452..a19233626 100644 --- a/docs/implplan/SPRINT_0200_0001_0001_experience_sdks.md +++ b/docs/implplan/SPRINT_0200_0001_0001_experience_sdks.md @@ -15,6 +15,8 @@ - docs/modules/platform/architecture-overview.md - docs/implplan/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0201_0001_0001_cli_i.md b/docs/implplan/SPRINT_0201_0001_0001_cli_i.md index da318ef43..e5d7f8db8 100644 --- a/docs/implplan/SPRINT_0201_0001_0001_cli_i.md +++ b/docs/implplan/SPRINT_0201_0001_0001_cli_i.md @@ -17,6 +17,8 @@ - `docs/modules/cli/architecture.md`. - `src/Cli/StellaOps.Cli/AGENTS.md` and `docs/implplan/AGENTS.md`. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0202_0001_0001_cli_ii.md b/docs/implplan/SPRINT_0202_0001_0001_cli_ii.md index 5a2c2a7f1..340b3065b 100644 --- a/docs/implplan/SPRINT_0202_0001_0001_cli_ii.md +++ b/docs/implplan/SPRINT_0202_0001_0001_cli_ii.md @@ -16,6 +16,8 @@ - docs/modules/cli/architecture.md - src/Cli/StellaOps.Cli/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0206_0001_0001_devportal.md b/docs/implplan/SPRINT_0206_0001_0001_devportal.md index 081e0982c..52f17ab65 100644 --- a/docs/implplan/SPRINT_0206_0001_0001_devportal.md +++ b/docs/implplan/SPRINT_0206_0001_0001_devportal.md @@ -17,6 +17,8 @@ - `docs/modules/platform/architecture.md` - `docs/modules/ui/architecture.md` (for shared UX conventions) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0207_0001_0001_graph.md b/docs/implplan/SPRINT_0207_0001_0001_graph.md index c0657bde6..a86e0b45f 100644 --- a/docs/implplan/SPRINT_0207_0001_0001_graph.md +++ b/docs/implplan/SPRINT_0207_0001_0001_graph.md @@ -20,6 +20,8 @@ - `docs/modules/graph/implementation_plan.md` - `src/Graph/AGENTS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0208_0001_0001_sdk.md b/docs/implplan/SPRINT_0208_0001_0001_sdk.md index 9937d3c36..224f4975f 100644 --- a/docs/implplan/SPRINT_0208_0001_0001_sdk.md +++ b/docs/implplan/SPRINT_0208_0001_0001_sdk.md @@ -1,27 +1,29 @@ -# Sprint 0208 · Experience & SDKs - -## Topic & Scope -- Build a reproducible SDK generator toolchain and shared post-processing layer that stays air-gap safe. -- Ship alpha SDKs (TypeScript, Python, Go, Java) aligned to portal APIs with consistent auth/telemetry helpers. -- Connect SDK outputs to CLI and Console data providers; package offline delivery bundles with provenance. -- Evidence: updated generator pipelines, release configs, and signed artifacts across npm/PyPI/Maven/Go proxies. -- **Working directory:** `docs/implplan` (planning) with execution in `src/Sdk/StellaOps.Sdk.*`. - -## Dependencies & Concurrency -- Upstream sprints: Sprint 120.A (AirGap), 130.A (Scanner), 150.A (Orchestrator), 170.A (Notifier) for API and events readiness. -- Peer/consuming sprints: SPRINT_0201_0001_0001_cli_i (CLI), SPRINT_0206_0001_0001_devportal (devportal/offline bundles), SPRINT_0209_0001_0001_ui_i (Console/UI data providers). -- Concurrency: language tracks can parallelize after SDKGEN-62-002; release tasks follow generator readiness; consumer sprints can prototype against staging SDKs once B wave exits. - -## Documentation Prerequisites -- docs/README.md; docs/07_HIGH_LEVEL_ARCHITECTURE.md; docs/modules/platform/architecture-overview.md. -- docs/modules/cli/architecture.md; docs/modules/ui/architecture.md. -- API/OAS governance specs referenced by APIG0101 and portal contracts (DEVL0101) once published. - -## Delivery Tracker -| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | -| 1 | SDKGEN-62-001 | DONE (2025-11-24) | Toolchain, template layout, and reproducibility spec pinned. | SDK Generator Guild · `src/Sdk/StellaOps.Sdk.Generator` | Choose/pin generator toolchain, set up language template pipeline, and enforce reproducible builds. | -| 2 | SDKGEN-62-002 | DONE (2025-11-24) | Shared post-processing merged; helpers wired. | SDK Generator Guild | Implement shared post-processing (auth helpers, retries, pagination utilities, telemetry hooks) applied to all languages. | +# Sprint 0208 · Experience & SDKs + +## Topic & Scope +- Build a reproducible SDK generator toolchain and shared post-processing layer that stays air-gap safe. +- Ship alpha SDKs (TypeScript, Python, Go, Java) aligned to portal APIs with consistent auth/telemetry helpers. +- Connect SDK outputs to CLI and Console data providers; package offline delivery bundles with provenance. +- Evidence: updated generator pipelines, release configs, and signed artifacts across npm/PyPI/Maven/Go proxies. +- **Working directory:** `docs/implplan` (planning) with execution in `src/Sdk/StellaOps.Sdk.*`. + +## Dependencies & Concurrency +- Upstream sprints: Sprint 120.A (AirGap), 130.A (Scanner), 150.A (Orchestrator), 170.A (Notifier) for API and events readiness. +- Peer/consuming sprints: SPRINT_0201_0001_0001_cli_i (CLI), SPRINT_0206_0001_0001_devportal (devportal/offline bundles), SPRINT_0209_0001_0001_ui_i (Console/UI data providers). +- Concurrency: language tracks can parallelize after SDKGEN-62-002; release tasks follow generator readiness; consumer sprints can prototype against staging SDKs once B wave exits. + +## Documentation Prerequisites +- docs/README.md; docs/07_HIGH_LEVEL_ARCHITECTURE.md; docs/modules/platform/architecture-overview.md. +- docs/modules/cli/architecture.md; docs/modules/ui/architecture.md. +- API/OAS governance specs referenced by APIG0101 and portal contracts (DEVL0101) once published. + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | SDKGEN-62-001 | DONE (2025-11-24) | Toolchain, template layout, and reproducibility spec pinned. | SDK Generator Guild · `src/Sdk/StellaOps.Sdk.Generator` | Choose/pin generator toolchain, set up language template pipeline, and enforce reproducible builds. | +| 2 | SDKGEN-62-002 | DONE (2025-11-24) | Shared post-processing merged; helpers wired. | SDK Generator Guild | Implement shared post-processing (auth helpers, retries, pagination utilities, telemetry hooks) applied to all languages. | | 3 | SDKGEN-63-001 | BLOCKED (2025-11-27) | Awaiting frozen aggregate OAS digest to generate TS alpha; scaffolds/smokes ready with hash guard. | SDK Generator Guild | Ship TypeScript SDK alpha with ESM/CJS builds, typed errors, paginator, streaming helpers. | | 4 | SDKGEN-63-002 | BLOCKED (2025-11-27) | Awaiting frozen aggregate OAS digest to generate Python alpha; scaffolds/smokes ready with hash guard. | SDK Generator Guild | Ship Python SDK alpha (sync/async clients, type hints, upload/download helpers). | | 5 | SDKGEN-63-003 | BLOCKED (2025-11-26) | Awaiting frozen aggregate OAS digest to generate Go alpha; scaffolds/smokes ready with hash guard. | SDK Generator Guild | Ship Go SDK alpha with context-first API and streaming helpers. | @@ -32,18 +34,18 @@ | 10 | SDKREL-63-002 | BLOCKED (2025-11-30) | Blocked until 63-001 unblocks; needs CI signing path + OAS diff feed. | SDK Release Guild · API Governance Guild | Integrate changelog automation pulling from OAS diffs and generator metadata. | | 11 | SDKREL-64-001 | BLOCKED (2025-11-30) | Blocked until 63-001 unblocks; Notifications channels require signed release events. | SDK Release Guild · Notifications Guild | Hook SDK releases into Notifications Studio with scoped announcements and RSS/Atom feeds. | | 12 | SDKREL-64-002 | BLOCKED (2025-11-30) | Depends on SDKGEN-64-001 artifacts and signed releases; manifest format ready. | SDK Release Guild · Export Center Guild | Add `devportal --offline` bundle job packaging docs, specs, SDK artifacts for air-gapped users. | - -## Wave Coordination -- Single wave covering generator and release work; language tracks branch after SDKGEN-62-002. - -## Wave Detail Snapshots -| Wave | Window (UTC) | Scope | Exit criteria | Owners | Status | -| --- | --- | --- | --- | --- | --- | -| A: Generator foundation | 2025-11-25 → 2025-12-02 | SDKGEN-62-001..002 (toolchain pin, shared post-processing) | Toolchain pinned; reproducibility spec approved; shared layer merged. | SDK Generator Guild | Planned | -| B: Language alphas | 2025-12-03 → 2025-12-22 | SDKGEN-63-001..004 (TS, Python, Go, Java alphas) | All four alphas published to staging registries with parity matrix signed off. | SDK Generator Guild | Planned | -| C: Release & offline | 2025-12-08 → 2025-12-29 | SDKREL-63-001..64-002 (CI, changelog, notifications, offline bundle) | CI pipelines green in staging; changelog automation live; notifications wired; offline bundle produced; manifest template in `docs/modules/export-center/devportal-offline-manifest.md` adopted. | SDK Release Guild · Export Center Guild | Planned | - -## Interlocks + +## Wave Coordination +- Single wave covering generator and release work; language tracks branch after SDKGEN-62-002. + +## Wave Detail Snapshots +| Wave | Window (UTC) | Scope | Exit criteria | Owners | Status | +| --- | --- | --- | --- | --- | --- | +| A: Generator foundation | 2025-11-25 → 2025-12-02 | SDKGEN-62-001..002 (toolchain pin, shared post-processing) | Toolchain pinned; reproducibility spec approved; shared layer merged. | SDK Generator Guild | Planned | +| B: Language alphas | 2025-12-03 → 2025-12-22 | SDKGEN-63-001..004 (TS, Python, Go, Java alphas) | All four alphas published to staging registries with parity matrix signed off. | SDK Generator Guild | Planned | +| C: Release & offline | 2025-12-08 → 2025-12-29 | SDKREL-63-001..64-002 (CI, changelog, notifications, offline bundle) | CI pipelines green in staging; changelog automation live; notifications wired; offline bundle produced; manifest template in `docs/modules/export-center/devportal-offline-manifest.md` adopted. | SDK Release Guild · Export Center Guild | Planned | + +## Interlocks - API governance: APIG0101 outputs for stable schemas; required before Wave A exit. - Portal contracts: DEVL0101 (auth/session) inform shared post-processing; consume before Wave A design review. - Devportal/offline: SPRINT_0206_0001_0001_devportal must expose bundle manifest format for SDKREL-64-002. @@ -52,14 +54,14 @@ - Notifications/Export: Notifications Studio and Export Center pipelines must be live before Wave C release window (tasks 11–12). - Aggregate OAS freeze: APIG0101 must publish tagged snapshot + SHA (Action #6) to unblock SDKGEN-63-001..004 generation. - Signing keys: Sovereign crypto key provisioning for npm/PyPI/Maven/Go (Action #7) gates SDKREL-63-001 staging runs. - -## Upcoming Checkpoints + +## Upcoming Checkpoints - 2025-12-05: TS alpha staging drop (SDKGEN-63-001) — verify packaging and typed errors (BLOCKED until aggregate OAS freeze). - 2025-12-15: Multi-language alpha readiness check (SDKGEN-63-002..004) — parity matrix sign-off (BLOCKED until aggregate OAS freeze and Java alpha generation). - 2025-12-16: Deliver parity matrix and SDK drop to UI/Console data providers (depends on Wave B artifacts). - 2025-12-22: Release automation demo (SDKREL-63/64) — staging publishes with signatures and offline bundle (BLOCKED until SDKREL-63-001/002 advance). - -## Action Tracker + +## Action Tracker | # | Action | Owner | Due (UTC) | Status | | --- | --- | --- | --- | --- | | 1 | Confirm registry signing keys and provenance workflow per language | SDK Release Guild | 2025-11-29 | BLOCKED (awaiting sovereign crypto key provisioning; overdue) | @@ -69,8 +71,8 @@ | 5 | Deliver parity matrix and SDK drop to UI data providers per SPRINT_0209_0001_0001_ui_i | SDK Generator Guild · UI Guild | 2025-12-16 | Open | | 6 | Request tagged aggregate OpenAPI snapshot + SHA from APIG0101 to unblock Wave B generation | API Governance Guild · SDK Generator Guild | 2025-12-02 | Open | | 7 | Escalate sovereign crypto key provisioning for npm/PyPI/Maven/Go signing to unblock SDKREL-63-001 | SDK Release Guild · Platform Security | 2025-12-02 | Open | - -## Decisions & Risks + +## Decisions & Risks - Toolchain pinned (OpenAPI Generator 7.4.0, JDK 21) and recorded in repo (`TOOLCHAIN.md`, `toolchain.lock.yaml`); downstream tracks must honor lock file for determinism. - Dependencies on upstream API/portal contracts may delay generator pinning; mitigation: align with APIG0101 / DEVL0101 milestones. - Release automation requires registry credentials and signing infra; keys still pending (Action Tracker #1 overdue). Mitigation: reuse sovereign crypto enablement (SPRINT_0514_0001_0001_sovereign_crypto_enablement.md) practices, escalate key provisioning by 2025-12-02, and block releases until keys are validated. @@ -78,33 +80,33 @@ - Shared postprocess helpers copy only when CI sets `STELLA_POSTPROCESS_ROOT` and `STELLA_POSTPROCESS_LANG`; ensure generation jobs export these to keep helpers present in artifacts. - Aggregate OAS freeze now on critical path for Wave B; request tagged snapshot with SHA (Action #6) by 2025-12-02 to unblock SDKGEN-63-001..004. - Sprint currently fully blocked: all Delivery Tracker items depend on Actions #6–#7 (OAS snapshot and signing keys). If unresolved by 2025-12-02, push Wave B and downstream checkpoints by ≥1 week. - -### Risk Register -| Risk | Impact | Mitigation | Owner | Status | -| --- | --- | --- | --- | --- | + +### Risk Register +| Risk | Impact | Mitigation | Owner | Status | +| --- | --- | --- | --- | --- | | Upstream APIs change after generator pin | Rework across four SDKs | Freeze spec version before SDKGEN-63-x; gate via API governance sign-off | SDK Generator Guild | Open | | Aggregate OpenAPI freeze delayed | Wave B and downstream adoption blocked | Track APIG0101 schedule; request interim tagged snapshot with SHA; re-run hash guard once frozen | SDK Generator Guild | Open | | Registry signing not provisioned | Cannot ship to npm/PyPI/Maven/Go | Coordinate with sovereign crypto enablement; dry-run staging before prod; Action #7 escalation due 2025-12-02 | SDK Release Guild | Open | | Offline bundle inputs unavailable | Air-gapped delivery slips | Pull docs/specs from devportal cache; coordinate with Export Center; tied to SDKREL-64-002 blocker | SDK Release Guild | Open | - -## Execution Log -| Date (UTC) | Update | Owner | -| --- | --- | --- | -| 2025-11-22 | Normalised sprint to standard template; renamed file to `SPRINT_0208_0001_0001_sdk.md`; no status changes. | PM | -| 2025-11-22 | Added wave plan and dated checkpoints for generator, language alphas, and release/offline tracks. | PM | -| 2025-11-22 | Added explicit interlocks to CLI/UI/Devportal sprints and new alignment actions. | PM | -| 2025-11-22 | Added UI parity-matrix delivery action to keep data provider integration on track. | PM | -| 2025-11-24 | Pinned generator toolchain (OpenAPI Generator CLI 7.4.0, JDK 21), template layout, and reproducibility rules; captured in `src/Sdk/StellaOps.Sdk.Generator/TOOLCHAIN.md` + `toolchain.lock.yaml`. | SDK Generator Guild | -| 2025-11-24 | Started SDKGEN-62-002: added shared post-process scaffold (`postprocess/`), LF/whitespace normalizer script, and README for language hooks. | SDK Generator Guild | -| 2025-11-24 | Completed SDKGEN-62-002: postprocess now copies auth/retry/pagination/telemetry helpers for TS/Python/Go/Java, wires TS/Python exports, and adds smoke tests. | SDK Generator Guild | -| 2025-11-24 | Began SDKGEN-63-001: added TypeScript generator config (`ts/config.yaml`), deterministic driver script (`ts/generate-ts.sh`), and README; waiting on frozen OAS spec to produce alpha artifact. | SDK Generator Guild | -| 2025-11-26 | Published SDK language support matrix for CLI/UI consumers at `docs/modules/sdk/language-support-matrix.md`; Action #2 closed. | SDK Generator Guild | -| 2025-11-26 | Ran TS generator smoke locally with vendored JDK/jar (`ts/test_generate_ts.sh`); pass. Blocked until aggregate OpenAPI spec is frozen/published to generate Wave B alpha artifact. | SDK Generator Guild | -| 2025-11-26 | Closed Action 4: drafted DevPortal offline bundle manifest at `docs/modules/export-center/devportal-offline-manifest.md` to align SDKREL-64-002 with SPRINT_0206. | SDK Release Guild | -| 2025-11-26 | Added spec hash guard to TS/Python generators (`STELLA_OAS_EXPECTED_SHA256`) and emit `.oas.sha256` for provenance; updated smoke tests and READMEs. | SDK Generator Guild | -| 2025-11-26 | Scaffolded Go generator (config/script/smoke), enabled hash guard + helper copy via postprocess, and added `.oas.sha256` emission; waiting on frozen OAS for Wave B alpha. | SDK Generator Guild | -| 2025-11-26 | Scaffolded Java generator (config/script/smoke), added postprocess hook copy into `org.stellaops.sdk`, hash guard + `.oas.sha256`, and vendored-JDK fallback; waiting on frozen OAS for Wave B alpha. | SDK Generator Guild | -| 2025-11-26 | Marked SDKGEN-63-003/004 BLOCKED pending frozen aggregate OAS digest; scaffolds and smoke tests are ready. | SDK Generator Guild | + +## Execution Log +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-11-22 | Normalised sprint to standard template; renamed file to `SPRINT_0208_0001_0001_sdk.md`; no status changes. | PM | +| 2025-11-22 | Added wave plan and dated checkpoints for generator, language alphas, and release/offline tracks. | PM | +| 2025-11-22 | Added explicit interlocks to CLI/UI/Devportal sprints and new alignment actions. | PM | +| 2025-11-22 | Added UI parity-matrix delivery action to keep data provider integration on track. | PM | +| 2025-11-24 | Pinned generator toolchain (OpenAPI Generator CLI 7.4.0, JDK 21), template layout, and reproducibility rules; captured in `src/Sdk/StellaOps.Sdk.Generator/TOOLCHAIN.md` + `toolchain.lock.yaml`. | SDK Generator Guild | +| 2025-11-24 | Started SDKGEN-62-002: added shared post-process scaffold (`postprocess/`), LF/whitespace normalizer script, and README for language hooks. | SDK Generator Guild | +| 2025-11-24 | Completed SDKGEN-62-002: postprocess now copies auth/retry/pagination/telemetry helpers for TS/Python/Go/Java, wires TS/Python exports, and adds smoke tests. | SDK Generator Guild | +| 2025-11-24 | Began SDKGEN-63-001: added TypeScript generator config (`ts/config.yaml`), deterministic driver script (`ts/generate-ts.sh`), and README; waiting on frozen OAS spec to produce alpha artifact. | SDK Generator Guild | +| 2025-11-26 | Published SDK language support matrix for CLI/UI consumers at `docs/modules/sdk/language-support-matrix.md`; Action #2 closed. | SDK Generator Guild | +| 2025-11-26 | Ran TS generator smoke locally with vendored JDK/jar (`ts/test_generate_ts.sh`); pass. Blocked until aggregate OpenAPI spec is frozen/published to generate Wave B alpha artifact. | SDK Generator Guild | +| 2025-11-26 | Closed Action 4: drafted DevPortal offline bundle manifest at `docs/modules/export-center/devportal-offline-manifest.md` to align SDKREL-64-002 with SPRINT_0206. | SDK Release Guild | +| 2025-11-26 | Added spec hash guard to TS/Python generators (`STELLA_OAS_EXPECTED_SHA256`) and emit `.oas.sha256` for provenance; updated smoke tests and READMEs. | SDK Generator Guild | +| 2025-11-26 | Scaffolded Go generator (config/script/smoke), enabled hash guard + helper copy via postprocess, and added `.oas.sha256` emission; waiting on frozen OAS for Wave B alpha. | SDK Generator Guild | +| 2025-11-26 | Scaffolded Java generator (config/script/smoke), added postprocess hook copy into `org.stellaops.sdk`, hash guard + `.oas.sha256`, and vendored-JDK fallback; waiting on frozen OAS for Wave B alpha. | SDK Generator Guild | +| 2025-11-26 | Marked SDKGEN-63-003/004 BLOCKED pending frozen aggregate OAS digest; scaffolds and smoke tests are ready. | SDK Generator Guild | | 2025-11-26 | Added unified SDK smoke npm scripts (`sdk:smoke:*`, `sdk:smoke`) covering TS/Python/Go/Java to keep pre-alpha checks consistent. | SDK Generator Guild | | 2025-11-26 | Added CI workflow `.gitea/workflows/sdk-generator.yml` to run `npm run sdk:smoke` on SDK generator changes (TS/Python/Go/Java). | SDK Generator Guild | | 2025-11-27 | Marked SDKGEN-63-001/002 BLOCKED pending frozen aggregate OAS digest; scaffolds and smokes remain ready. | SDK Generator Guild | @@ -116,7 +118,7 @@ | 2025-11-24 | Added fixture OpenAPI (`ts/fixtures/ping.yaml`) and smoke test (`ts/test_generate_ts.sh`) to validate TypeScript pipeline locally; skips if generator jar absent. | SDK Generator Guild | | 2025-11-24 | Vendored `tools/openapi-generator-cli-7.4.0.jar` and `tools/jdk-21.0.1.tar.gz` with SHA recorded in `toolchain.lock.yaml`; adjusted TS script to ensure helper copy post-run and verified generation against fixture. | SDK Generator Guild | | 2025-11-24 | Ran `ts/test_generate_ts.sh` with vendored JDK/JAR and fixture spec; smoke test passes (helpers present). | SDK Generator Guild | -| 2025-11-24 | Added deterministic TS packaging templates (package.json, tsconfig base/cjs/esm, README, sdk-error) copied via postprocess; updated helper exports and lock hash. | SDK Generator Guild | -| 2025-11-24 | Began SDKGEN-63-002: added Python generator config/script/README + smoke test (reuses ping fixture); awaiting frozen OAS to emit alpha. | SDK Generator Guild | -| 2025-11-27 | Began SDKGEN-63-003: added Go SDK generator scaffold with config (`go/config.yaml`), driver script (`go/generate-go.sh`), smoke test (`go/test_generate_go.sh`), and README; context-first API design documented; awaiting frozen OAS to generate alpha. | SDK Generator Guild | -| 2025-11-27 | Began SDKGEN-63-004: added Java SDK generator scaffold with config (`java/config.yaml`), driver script (`java/generate-java.sh`), smoke test (`java/test_generate_java.sh`), and README; OkHttp + Gson selected as HTTP client/serialization; builder pattern documented; awaiting frozen OAS to generate alpha. | SDK Generator Guild | +| 2025-11-24 | Added deterministic TS packaging templates (package.json, tsconfig base/cjs/esm, README, sdk-error) copied via postprocess; updated helper exports and lock hash. | SDK Generator Guild | +| 2025-11-24 | Began SDKGEN-63-002: added Python generator config/script/README + smoke test (reuses ping fixture); awaiting frozen OAS to emit alpha. | SDK Generator Guild | +| 2025-11-27 | Began SDKGEN-63-003: added Go SDK generator scaffold with config (`go/config.yaml`), driver script (`go/generate-go.sh`), smoke test (`go/test_generate_go.sh`), and README; context-first API design documented; awaiting frozen OAS to generate alpha. | SDK Generator Guild | +| 2025-11-27 | Began SDKGEN-63-004: added Java SDK generator scaffold with config (`java/config.yaml`), driver script (`java/generate-java.sh`), smoke test (`java/test_generate_java.sh`), and README; OkHttp + Gson selected as HTTP client/serialization; builder pattern documented; awaiting frozen OAS to generate alpha. | SDK Generator Guild | diff --git a/docs/implplan/SPRINT_0209_0001_0001_ui_i.md b/docs/implplan/SPRINT_0209_0001_0001_ui_i.md index 5f04be665..c763f7f9f 100644 --- a/docs/implplan/SPRINT_0209_0001_0001_ui_i.md +++ b/docs/implplan/SPRINT_0209_0001_0001_ui_i.md @@ -25,6 +25,8 @@ - `docs/15_UI_GUIDE.md` - `docs/18_CODING_STANDARDS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0210_0001_0002_ui_ii.md b/docs/implplan/SPRINT_0210_0001_0002_ui_ii.md index 4f390ddb8..d2f896ef4 100644 --- a/docs/implplan/SPRINT_0210_0001_0002_ui_ii.md +++ b/docs/implplan/SPRINT_0210_0001_0002_ui_ii.md @@ -25,6 +25,8 @@ - `docs/schemas/audit-bundle-index.schema.json` - Advisory: "28-Nov-2025 - Vulnerability Triage UX & VEX-First Decisioning.md" +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0211_0001_0003_ui_iii.md b/docs/implplan/SPRINT_0211_0001_0003_ui_iii.md index 4f9b18e21..db4ac2351 100644 --- a/docs/implplan/SPRINT_0211_0001_0003_ui_iii.md +++ b/docs/implplan/SPRINT_0211_0001_0003_ui_iii.md @@ -25,6 +25,8 @@ - `docs/15_UI_GUIDE.md` - `docs/18_CODING_STANDARDS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0212_0001_0001_web_i.md b/docs/implplan/SPRINT_0212_0001_0001_web_i.md index 5c022a116..63426dd24 100644 --- a/docs/implplan/SPRINT_0212_0001_0001_web_i.md +++ b/docs/implplan/SPRINT_0212_0001_0001_web_i.md @@ -18,6 +18,8 @@ - `docs/api/console/workspaces.md` plus `docs/api/console/samples/` artifacts - `docs/implplan/archived/tasks.md` for prior completions +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition / Evidence | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0213_0001_0002_web_ii.md b/docs/implplan/SPRINT_0213_0001_0002_web_ii.md index 6ae76c796..ba68f101d 100644 --- a/docs/implplan/SPRINT_0213_0001_0002_web_ii.md +++ b/docs/implplan/SPRINT_0213_0001_0002_web_ii.md @@ -20,6 +20,8 @@ - `docs/modules/export-center/architecture.md` - `src/Web/StellaOps.Web/AGENTS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0214_0001_0001_web_iii.md b/docs/implplan/SPRINT_0214_0001_0001_web_iii.md index c6f72454e..1bc596870 100644 --- a/docs/implplan/SPRINT_0214_0001_0001_web_iii.md +++ b/docs/implplan/SPRINT_0214_0001_0001_web_iii.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `src/Web/StellaOps.Web/AGENTS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0215_0001_0001_vuln_triage_ux.md b/docs/implplan/SPRINT_0215_0001_0001_vuln_triage_ux.md index dfb76d38e..0f888017f 100644 --- a/docs/implplan/SPRINT_0215_0001_0001_vuln_triage_ux.md +++ b/docs/implplan/SPRINT_0215_0001_0001_vuln_triage_ux.md @@ -23,6 +23,8 @@ - `docs/schemas/vex-decision.schema.json` - `docs/schemas/audit-bundle-index.schema.json` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0215_0001_0001_web_iv.md b/docs/implplan/SPRINT_0215_0001_0001_web_iv.md index fed24acb2..cc49019c9 100644 --- a/docs/implplan/SPRINT_0215_0001_0001_web_iv.md +++ b/docs/implplan/SPRINT_0215_0001_0001_web_iv.md @@ -18,6 +18,8 @@ - `docs/modules/policy/architecture.md` - `src/Web/StellaOps.Web/AGENTS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0216_0001_0001_web_v.md b/docs/implplan/SPRINT_0216_0001_0001_web_v.md index b95936e64..3926cb825 100644 --- a/docs/implplan/SPRINT_0216_0001_0001_web_v.md +++ b/docs/implplan/SPRINT_0216_0001_0001_web_v.md @@ -18,6 +18,8 @@ - `docs/modules/ui/architecture.md` - `src/Web/StellaOps.Web/AGENTS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0301_0001_0001_docs_md_i.md b/docs/implplan/SPRINT_0301_0001_0001_docs_md_i.md index 7ee9223a9..180441abf 100644 --- a/docs/implplan/SPRINT_0301_0001_0001_docs_md_i.md +++ b/docs/implplan/SPRINT_0301_0001_0001_docs_md_i.md @@ -18,6 +18,8 @@ - `docs/modules/scanner/architecture.md` - `docs/modules/airgap/architecture.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Task Board | Task ID | Status | Owner(s) | Dependencies | Notes | | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md b/docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md index e35f42ba3..aa59f05a7 100644 --- a/docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md +++ b/docs/implplan/SPRINT_0313_0001_0001_docs_modules_attestor.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md b/docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md index 7c0ddeb0c..661f8d601 100644 --- a/docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md +++ b/docs/implplan/SPRINT_0314_0001_0001_docs_modules_authority.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0315_0001_0001_docs_modules_ci.md b/docs/implplan/SPRINT_0315_0001_0001_docs_modules_ci.md index 2ab49c863..5f3d84500 100644 --- a/docs/implplan/SPRINT_0315_0001_0001_docs_modules_ci.md +++ b/docs/implplan/SPRINT_0315_0001_0001_docs_modules_ci.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0316_0001_0001_docs_modules_cli.md b/docs/implplan/SPRINT_0316_0001_0001_docs_modules_cli.md index b358cfc51..5b3c2acad 100644 --- a/docs/implplan/SPRINT_0316_0001_0001_docs_modules_cli.md +++ b/docs/implplan/SPRINT_0316_0001_0001_docs_modules_cli.md @@ -18,6 +18,8 @@ - docs/modules/platform/architecture-overview.md - docs/07_HIGH_LEVEL_ARCHITECTURE.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0317_0001_0001_docs_modules_concelier.md b/docs/implplan/SPRINT_0317_0001_0001_docs_modules_concelier.md index 20b5462f8..57d1516b1 100644 --- a/docs/implplan/SPRINT_0317_0001_0001_docs_modules_concelier.md +++ b/docs/implplan/SPRINT_0317_0001_0001_docs_modules_concelier.md @@ -18,6 +18,8 @@ - docs/modules/platform/architecture-overview.md - docs/07_HIGH_LEVEL_ARCHITECTURE.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0320_0001_0001_docs_modules_export_center.md b/docs/implplan/SPRINT_0320_0001_0001_docs_modules_export_center.md index 29f50c224..b6ae36a47 100644 --- a/docs/implplan/SPRINT_0320_0001_0001_docs_modules_export_center.md +++ b/docs/implplan/SPRINT_0320_0001_0001_docs_modules_export_center.md @@ -19,6 +19,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0321_0001_0001_docs_modules_graph.md b/docs/implplan/SPRINT_0321_0001_0001_docs_modules_graph.md index e8ee90a50..c0f2b1f8d 100644 --- a/docs/implplan/SPRINT_0321_0001_0001_docs_modules_graph.md +++ b/docs/implplan/SPRINT_0321_0001_0001_docs_modules_graph.md @@ -17,6 +17,8 @@ - docs/modules/platform/architecture-overview.md - docs/07_HIGH_LEVEL_ARCHITECTURE.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0323_0001_0001_docs_modules_orchestrator.md b/docs/implplan/SPRINT_0323_0001_0001_docs_modules_orchestrator.md index 0af9132f8..3d2a7f4f5 100644 --- a/docs/implplan/SPRINT_0323_0001_0001_docs_modules_orchestrator.md +++ b/docs/implplan/SPRINT_0323_0001_0001_docs_modules_orchestrator.md @@ -16,6 +16,8 @@ - docs/modules/orchestrator/implementation_plan.md - docs/modules/platform/architecture-overview.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0324_0001_0001_docs_modules_platform.md b/docs/implplan/SPRINT_0324_0001_0001_docs_modules_platform.md index 0ff760af1..774b551c4 100644 --- a/docs/implplan/SPRINT_0324_0001_0001_docs_modules_platform.md +++ b/docs/implplan/SPRINT_0324_0001_0001_docs_modules_platform.md @@ -18,6 +18,8 @@ - `docs/modules/platform/implementation_plan.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0327_0001_0001_docs_modules_scanner.md b/docs/implplan/SPRINT_0327_0001_0001_docs_modules_scanner.md index 38bd7497d..214e9e533 100644 --- a/docs/implplan/SPRINT_0327_0001_0001_docs_modules_scanner.md +++ b/docs/implplan/SPRINT_0327_0001_0001_docs_modules_scanner.md @@ -16,6 +16,8 @@ - docs/modules/platform/architecture-overview.md - docs/modules/scanner/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0328_0001_0001_docs_modules_scheduler.md b/docs/implplan/SPRINT_0328_0001_0001_docs_modules_scheduler.md index 687368aa1..dc0eb5f27 100644 --- a/docs/implplan/SPRINT_0328_0001_0001_docs_modules_scheduler.md +++ b/docs/implplan/SPRINT_0328_0001_0001_docs_modules_scheduler.md @@ -16,6 +16,8 @@ - docs/modules/scheduler/implementation_plan.md - docs/modules/scheduler/AGENTS.md (this sprint refreshes it) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md b/docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md index c2c07c86c..1d14a85d7 100644 --- a/docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md +++ b/docs/implplan/SPRINT_0330_0001_0001_docs_modules_telemetry.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md b/docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md index 8c5b18f69..3cd8fc51d 100644 --- a/docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md +++ b/docs/implplan/SPRINT_0331_0001_0001_docs_modules_ui.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md b/docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md index 230df4e04..4251d62e4 100644 --- a/docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md +++ b/docs/implplan/SPRINT_0332_0001_0001_docs_modules_vex_lens.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md b/docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md index f4963ae0a..02ff206b6 100644 --- a/docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md +++ b/docs/implplan/SPRINT_0333_0001_0001_docs_modules_excititor.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md b/docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md index e33b42060..def7df761 100644 --- a/docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md +++ b/docs/implplan/SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md b/docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md index afe52d53c..3af15150e 100644 --- a/docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md +++ b/docs/implplan/SPRINT_0335_0001_0001_docs_modules_zastava.md @@ -18,6 +18,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/07_HIGH_LEVEL_ARCHITECTURE.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0400_0001_0001_reachability_runtime_static_union.md b/docs/implplan/SPRINT_0400_0001_0001_reachability_runtime_static_union.md index 439e3e199..1801fdc0a 100644 --- a/docs/implplan/SPRINT_0400_0001_0001_reachability_runtime_static_union.md +++ b/docs/implplan/SPRINT_0400_0001_0001_reachability_runtime_static_union.md @@ -17,6 +17,8 @@ - docs/reachability/function-level-evidence.md - docs/reachability/DELIVERY_GUIDE.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0401_0001_0001_reachability_evidence_chain.md b/docs/implplan/SPRINT_0401_0001_0001_reachability_evidence_chain.md index bcfcce4aa..b3d90b2cf 100644 --- a/docs/implplan/SPRINT_0401_0001_0001_reachability_evidence_chain.md +++ b/docs/implplan/SPRINT_0401_0001_0001_reachability_evidence_chain.md @@ -30,6 +30,8 @@ - docs/provenance/inline-dsse.md - docs/ci/dsse-build-flow.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0504_0001_0001_ops_devops_ii.md b/docs/implplan/SPRINT_0504_0001_0001_ops_devops_ii.md index 95f2bb33e..0a162acec 100644 --- a/docs/implplan/SPRINT_0504_0001_0001_ops_devops_ii.md +++ b/docs/implplan/SPRINT_0504_0001_0001_ops_devops_ii.md @@ -15,6 +15,8 @@ - `docs/modules/platform/architecture-overview.md` - `ops/devops/AGENTS.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0509_0001_0001_samples.md b/docs/implplan/SPRINT_0509_0001_0001_samples.md index 09cba7dfb..60bc66869 100644 --- a/docs/implplan/SPRINT_0509_0001_0001_samples.md +++ b/docs/implplan/SPRINT_0509_0001_0001_samples.md @@ -16,6 +16,8 @@ - docs/modules/concelier/architecture.md (for linkset schema/statuses) - docs/modules/vuln-explorer/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0510_0001_0001_airgap.md b/docs/implplan/SPRINT_0510_0001_0001_airgap.md index f422ae38a..2a3ddd13f 100644 --- a/docs/implplan/SPRINT_0510_0001_0001_airgap.md +++ b/docs/implplan/SPRINT_0510_0001_0001_airgap.md @@ -15,6 +15,8 @@ - docs/modules/devops/architecture.md - docs/modules/airgap/airgap-mode.md (if present) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0512_0001_0001_bench.md b/docs/implplan/SPRINT_0512_0001_0001_bench.md index ab1400edf..f50370bcc 100644 --- a/docs/implplan/SPRINT_0512_0001_0001_bench.md +++ b/docs/implplan/SPRINT_0512_0001_0001_bench.md @@ -16,6 +16,8 @@ - docs/modules/signals/architecture.md (for reachability benches) - docs/modules/policy/architecture.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | @@ -27,9 +29,9 @@ | P6 | PREP-BENCH-SIG-26-002-BLOCKED-ON-26-001-OUTPU | DONE (2025-11-20) | Prep doc at `docs/benchmarks/signals/bench-sig-26-002-prep.md`; depends on 26-001 datasets. | Bench Guild · Policy Guild | Blocked on 26-001 outputs.

Document artefact/deliverable for BENCH-SIG-26-002 and publish location so downstream tasks can proceed. | | 1 | BENCH-GRAPH-21-001 | DONE (2025-12-02) | PREP-BENCH-GRAPH-21-001-NEED-GRAPH-BENCH-HARN | Bench Guild · Graph Platform Guild | Build graph viewport/path benchmark harness (50k/100k nodes) measuring Graph API/Indexer latency, memory, and tile cache hit rates. | | 2 | BENCH-GRAPH-21-002 | DONE (2025-12-02) | PREP-BENCH-GRAPH-21-002-BLOCKED-ON-21-001-HAR | Bench Guild · UI Guild | Add headless UI load benchmark (Playwright) for graph canvas interactions to track render times and FPS budgets. | -| 3 | BENCH-GRAPH-24-002 | BLOCKED | Waiting for 50k/100k graph fixture (SAMPLES-GRAPH-24-003) | Bench Guild · UI Guild | Implement UI interaction benchmarks (filter/zoom/table operations) citing p95 latency; integrate with perf dashboards. | -| 4 | BENCH-IMPACT-16-001 | BLOCKED | PREP-BENCH-IMPACT-16-001-IMPACT-INDEX-DATASET | Bench Guild · Scheduler Team | ImpactIndex throughput bench (resolve 10k productKeys) + RAM profile. | -| 5 | BENCH-POLICY-20-002 | BLOCKED | PREP-BENCH-POLICY-20-002-POLICY-DELTA-SAMPLE | Bench Guild · Policy Guild · Scheduler Guild | Add incremental run benchmark measuring delta evaluation vs full; capture SLA compliance. | +| 3 | BENCH-GRAPH-24-002 | BLOCKED | Waiting for 50k/100k graph fixture (SAMPLES-GRAPH-24-003) | Bench Guild · UI Guild | Implement UI interaction benchmarks (filter/zoom/table operations) citing p95 latency; integrate with perf dashboards. | +| 4 | BENCH-IMPACT-16-001 | BLOCKED | PREP-BENCH-IMPACT-16-001-IMPACT-INDEX-DATASET | Bench Guild · Scheduler Team | ImpactIndex throughput bench (resolve 10k productKeys) + RAM profile. | +| 5 | BENCH-POLICY-20-002 | BLOCKED | PREP-BENCH-POLICY-20-002-POLICY-DELTA-SAMPLE | Bench Guild · Policy Guild · Scheduler Guild | Add incremental run benchmark measuring delta evaluation vs full; capture SLA compliance. | | 6 | BENCH-SIG-26-001 | BLOCKED | PREP-BENCH-SIG-26-001-REACHABILITY-SCHEMA-FIX | Bench Guild · Signals Guild | Develop benchmark for reachability scoring pipeline (facts/sec, latency, memory) using synthetic callgraphs/runtime batches. | | 7 | BENCH-SIG-26-002 | BLOCKED | PREP-BENCH-SIG-26-002-BLOCKED-ON-26-001-OUTPU | Bench Guild · Policy Guild | Measure policy evaluation overhead with reachability cache hot/cold; ensure ≤8 ms p95 added latency. | | 8 | BENCH-DETERMINISM-401-057 | DONE (2025-11-27) | Feed-freeze hash + SBOM/VEX bundle list from Sprint 0401. | Bench Guild · Signals Guild · Policy Guild (`bench/determinism`, `docs/benchmarks/signals/bench-determinism.md`) | Run cross-scanner determinism bench from 23-Nov advisory; publish determinism% and CVSS delta σ; CI workflow `bench-determinism` runs harness and uploads manifests/results; offline runner added. | diff --git a/docs/implplan/SPRINT_0513_0001_0001_provenance.md b/docs/implplan/SPRINT_0513_0001_0001_provenance.md index 695025a2e..8db8b495f 100644 --- a/docs/implplan/SPRINT_0513_0001_0001_provenance.md +++ b/docs/implplan/SPRINT_0513_0001_0001_provenance.md @@ -18,6 +18,8 @@ - `docs/modules/orchestrator/architecture.md` - `docs/modules/export-center/architecture.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0513_0001_0001_public_reachability_benchmark.md b/docs/implplan/SPRINT_0513_0001_0001_public_reachability_benchmark.md index 376793cbe..bad80094f 100644 --- a/docs/implplan/SPRINT_0513_0001_0001_public_reachability_benchmark.md +++ b/docs/implplan/SPRINT_0513_0001_0001_public_reachability_benchmark.md @@ -23,6 +23,8 @@ - Related advisory: `docs/product-advisories/archived/23-Nov-2025 - Publishing a Reachability Benchmark Dataset.md` - Existing bench prep docs: `docs/benchmarks/signals/bench-determinism.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_0514_0001_0001_sovereign_crypto_enablement.md b/docs/implplan/SPRINT_0514_0001_0001_sovereign_crypto_enablement.md index eb5d776ae..1e70d410f 100644 --- a/docs/implplan/SPRINT_0514_0001_0001_sovereign_crypto_enablement.md +++ b/docs/implplan/SPRINT_0514_0001_0001_sovereign_crypto_enablement.md @@ -1,14 +1,14 @@ -# Sprint 0514 · Ops & Offline · Sovereign Crypto Enablement (190.K) - -## Topic & Scope -- Deliver RootPack_RU-ready sovereign crypto providers (CryptoPro + PKCS#11), configuration knobs, deterministic tests, and repo-wide crypto routing audit. -- Maintain quarantined fork for GostCryptography/CryptoPro plugin and ensure Authority/Scanner/Attestor route through registry-based providers. -- **Working directory:** `src/__Libraries/StellaOps.Cryptography*`, `src/Authority`, `src/Scanner`, `src/Attestor`, `third_party/forks/AlexMAS.GostCryptography`. - -## Dependencies & Concurrency -- Authority signing provider contract and JWKS export requirements (blocking AUTH-CRYPTO-90-001). -- CI runners must support platform-specific CryptoPro/PKCS#11 tests (env/pin gated); may need opt-in pipelines. - +# Sprint 0514 · Ops & Offline · Sovereign Crypto Enablement (190.K) + +## Topic & Scope +- Deliver RootPack_RU-ready sovereign crypto providers (CryptoPro + PKCS#11), configuration knobs, deterministic tests, and repo-wide crypto routing audit. +- Maintain quarantined fork for GostCryptography/CryptoPro plugin and ensure Authority/Scanner/Attestor route through registry-based providers. +- **Working directory:** `src/__Libraries/StellaOps.Cryptography*`, `src/Authority`, `src/Scanner`, `src/Attestor`, `third_party/forks/AlexMAS.GostCryptography`. + +## Dependencies & Concurrency +- Authority signing provider contract and JWKS export requirements (blocking AUTH-CRYPTO-90-001). +- CI runners must support platform-specific CryptoPro/PKCS#11 tests (env/pin gated); may need opt-in pipelines. + ## Documentation Prerequisites - docs/security/rootpack_ru_*.md - docs/dev/crypto.md @@ -16,7 +16,9 @@ - docs/modules/authority/architecture.md (for Authority provider/JWKS contract context) - docs/modules/scanner/architecture.md (for registry wiring in Scanner WebService/Worker) - docs/modules/attestor/architecture.md (for attestation hashing/witness flows) - + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | @@ -30,7 +32,7 @@ | 7 | SEC-CRYPTO-90-013 | BLOCKED (2025-11-27) | After 90-021 (blocked). | Security Guild | Add Magma/Kuznyechik symmetric support via provider registry. | | 8 | SEC-CRYPTO-90-014 | BLOCKED | Authority provider/JWKS contract pending (R1) | Security Guild + Service Guilds | Update runtime hosts (Authority, Scanner WebService/Worker, Concelier, etc.) to register RU providers and expose config toggles. | | 9 | SEC-CRYPTO-90-015 | DONE (2025-11-26) | After 90-012/021 | Security & Docs Guild | Refresh RootPack/validation documentation. | -| 10 | AUTH-CRYPTO-90-001 | BLOCKED | PREP-AUTH-CRYPTO-90-001-NEEDS-AUTHORITY-PROVI | Authority Core & Security Guild | Sovereign signing provider contract for Authority; refactor loaders once contract is published. | +| 10 | AUTH-CRYPTO-90-001 | BLOCKED | PREP-AUTH-CRYPTO-90-001-NEEDS-AUTHORITY-PROVI | Authority Core & Security Guild | Sovereign signing provider contract for Authority; refactor loaders once contract is published. | | 11 | SCANNER-CRYPTO-90-001 | BLOCKED (2025-11-27) | Await Authority provider/JWKS contract + registry option design (R1/R3) | Scanner WebService Guild · Security Guild | Route hashing/signing flows through `ICryptoProviderRegistry`. | | 12 | SCANNER-WORKER-CRYPTO-90-001 | BLOCKED (2025-11-27) | After 11 (registry contract pending) | Scanner Worker Guild · Security Guild | Wire Scanner Worker/BuildX analyzers to registry/hash abstractions. | | 13 | SCANNER-CRYPTO-90-002 | BLOCKED (2025-11-30) | Blocked by R1/R3: registry/provider contract (Authority) and PQ option mapping not finalized in runtime hosts. Design doc exists (`docs/security/pq-provider-options.md`). | Scanner WebService Guild · Security Guild | Enable PQ-friendly DSSE (Dilithium/Falcon) via provider options. | diff --git a/docs/implplan/SPRINT_120_excititor_ii.md b/docs/implplan/SPRINT_120_excititor_ii.md index e7ded94e5..bf1994746 100644 --- a/docs/implplan/SPRINT_120_excititor_ii.md +++ b/docs/implplan/SPRINT_120_excititor_ii.md @@ -1,5 +1,7 @@ # Legacy Sprint Filename (redirect) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + The Excititor Ingestion & Evidence phase II sprint was normalized on 2025-11-16 and now lives at `docs/implplan/SPRINT_0119_0001_0002_excititor_ii.md`. This legacy file remains only as a pointer for bookmarks. All updates, task status changes, execution logs, and decisions must be recorded in the normalized sprint file. diff --git a/docs/implplan/SPRINT_121_excititor_iii.md b/docs/implplan/SPRINT_121_excititor_iii.md index c4d6b0a62..da15bac5f 100644 --- a/docs/implplan/SPRINT_121_excititor_iii.md +++ b/docs/implplan/SPRINT_121_excititor_iii.md @@ -15,6 +15,8 @@ - docs/modules/excititor/implementation_plan.md - Component AGENTS.md under `src/Excititor/**` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_122_excititor_iv.md b/docs/implplan/SPRINT_122_excititor_iv.md index 99cc312f9..ad1a4b112 100644 --- a/docs/implplan/SPRINT_122_excititor_iv.md +++ b/docs/implplan/SPRINT_122_excititor_iv.md @@ -16,6 +16,8 @@ - Excititor component `AGENTS.md` (Core, WebService, Worker) - `docs/ingestion/aggregation-only-contract.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_123_excititor_v.md b/docs/implplan/SPRINT_123_excititor_v.md index 2144fe039..28f80d816 100644 --- a/docs/implplan/SPRINT_123_excititor_v.md +++ b/docs/implplan/SPRINT_123_excititor_v.md @@ -15,6 +15,8 @@ - docs/airgap/portable-evidence-bundle-verification.md - Excititor AGENTS.md files (WebService, Core, Storage) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_123_policy_reasoning.md b/docs/implplan/SPRINT_123_policy_reasoning.md index 1403a3416..df2059af9 100644 --- a/docs/implplan/SPRINT_123_policy_reasoning.md +++ b/docs/implplan/SPRINT_123_policy_reasoning.md @@ -1,5 +1,7 @@ # Sprint 123 - Policy & Reasoning +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + _Last updated: November 8, 2025. Implementation order is DOING → TODO → BLOCKED._ Focus areas below were split out of the previous combined sprint; execute sections in order unless noted. diff --git a/docs/implplan/SPRINT_124_excititor_vi.md b/docs/implplan/SPRINT_124_excititor_vi.md index 4653b00ee..572032ae7 100644 --- a/docs/implplan/SPRINT_124_excititor_vi.md +++ b/docs/implplan/SPRINT_124_excititor_vi.md @@ -15,6 +15,8 @@ - docs/modules/excititor/observability/locker-manifest.md - Excititor WebService AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_125_policy_reasoning.md b/docs/implplan/SPRINT_125_policy_reasoning.md index 13eb286fe..60aefd5cf 100644 --- a/docs/implplan/SPRINT_125_policy_reasoning.md +++ b/docs/implplan/SPRINT_125_policy_reasoning.md @@ -1,5 +1,7 @@ # Sprint 125 - Policy & Reasoning +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + _Last updated: November 8, 2025. Implementation order is DOING → TODO → BLOCKED._ Focus areas below were split out of the previous combined sprint; execute sections in order unless noted. diff --git a/docs/implplan/SPRINT_126_policy_reasoning.md b/docs/implplan/SPRINT_126_policy_reasoning.md index 1d01cd26d..49463297c 100644 --- a/docs/implplan/SPRINT_126_policy_reasoning.md +++ b/docs/implplan/SPRINT_126_policy_reasoning.md @@ -1,5 +1,7 @@ # Sprint 126 - Policy & Reasoning > Superseded by `docs/implplan/SPRINT_0126_0001_0001_policy_reasoning.md`; maintained for historical context only. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + _Last updated: November 8, 2025. Implementation order is DOING → TODO → BLOCKED._ diff --git a/docs/implplan/SPRINT_127_policy_reasoning.md b/docs/implplan/SPRINT_127_policy_reasoning.md index a57ccbfbd..e566d343c 100644 --- a/docs/implplan/SPRINT_127_policy_reasoning.md +++ b/docs/implplan/SPRINT_127_policy_reasoning.md @@ -1,5 +1,7 @@ # Sprint 127 - Policy & Reasoning +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + _Last updated: November 27, 2025. Implementation order is DOING → TODO → BLOCKED._ Focus areas below were split out of the previous combined sprint; execute sections in order unless noted. diff --git a/docs/implplan/SPRINT_128_policy_reasoning.md b/docs/implplan/SPRINT_128_policy_reasoning.md index 2f76b92a9..62e74adc0 100644 --- a/docs/implplan/SPRINT_128_policy_reasoning.md +++ b/docs/implplan/SPRINT_128_policy_reasoning.md @@ -1,5 +1,7 @@ # Sprint 128 - Policy & Reasoning +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + _Last updated: November 8, 2025. Implementation order is DOING → TODO → BLOCKED._ Focus areas below were split out of the previous combined sprint; execute sections in order unless noted. diff --git a/docs/implplan/SPRINT_132_scanner_surface.md b/docs/implplan/SPRINT_132_scanner_surface.md index 549d2cc65..de3675bcf 100644 --- a/docs/implplan/SPRINT_132_scanner_surface.md +++ b/docs/implplan/SPRINT_132_scanner_surface.md @@ -14,6 +14,8 @@ - docs/modules/platform/architecture-overview.md - src/Scanner/__Libraries/StellaOps.Scanner.Analyzers.Lang.Node/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_133_scanner_surface.md b/docs/implplan/SPRINT_133_scanner_surface.md index 10e52de02..5c9f5375c 100644 --- a/docs/implplan/SPRINT_133_scanner_surface.md +++ b/docs/implplan/SPRINT_133_scanner_surface.md @@ -1,5 +1,7 @@ # Sprint 133 - Scanner & Surface +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Implementation order remains sequential across Sprint 130–139. Complete each sprint in order before pulling tasks from the next file. ## 4. Scanner.IV — Scanner & Surface focus on Scanner (phase IV). diff --git a/docs/implplan/SPRINT_134_scanner_surface.md b/docs/implplan/SPRINT_134_scanner_surface.md index c4ba1da8d..97c23bf63 100644 --- a/docs/implplan/SPRINT_134_scanner_surface.md +++ b/docs/implplan/SPRINT_134_scanner_surface.md @@ -1,5 +1,7 @@ # Sprint 134 - Scanner & Surface +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Implementation order remains sequential across Sprint 130–139. Complete each sprint in order before pulling tasks from the next file. ## 5. Scanner.V — Scanner & Surface focus on Scanner (phase V). diff --git a/docs/implplan/SPRINT_135_scanner_surface.md b/docs/implplan/SPRINT_135_scanner_surface.md index 118c139da..fe9ff18c8 100644 --- a/docs/implplan/SPRINT_135_scanner_surface.md +++ b/docs/implplan/SPRINT_135_scanner_surface.md @@ -1,5 +1,7 @@ # Redirect · Sprint 0135 · Scanner & Surface (Phase VI) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This legacy filename is retained only as a pointer. The authoritative sprint doc is `SPRINT_0135_0001_0001_scanner_surface.md`. - Please update task state and execution logs in `docs/implplan/SPRINT_0135_0001_0001_scanner_surface.md`. diff --git a/docs/implplan/SPRINT_136_scanner_surface.md b/docs/implplan/SPRINT_136_scanner_surface.md index 17ef0896b..1b8cfdb28 100644 --- a/docs/implplan/SPRINT_136_scanner_surface.md +++ b/docs/implplan/SPRINT_136_scanner_surface.md @@ -1,3 +1,5 @@ # Legacy sprint file (redirect) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint was renamed to `SPRINT_0136_0001_0001_scanner_surface.md` on 2025-11-19 to comply with the standard filename template. Please update and read the canonical file instead. diff --git a/docs/implplan/SPRINT_144_zastava.md b/docs/implplan/SPRINT_144_zastava.md index bbb225afc..1d406c0cf 100644 --- a/docs/implplan/SPRINT_144_zastava.md +++ b/docs/implplan/SPRINT_144_zastava.md @@ -1,5 +1,7 @@ # Sprint 144 - Runtime & Signals · 140.D) Zastava +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Runtime & Signals] 140.D) Zastava diff --git a/docs/implplan/SPRINT_150_scheduling_automation.md b/docs/implplan/SPRINT_150_scheduling_automation.md index 4662b3679..40d5d3fd0 100644 --- a/docs/implplan/SPRINT_150_scheduling_automation.md +++ b/docs/implplan/SPRINT_150_scheduling_automation.md @@ -1,5 +1,7 @@ # Sprint 150 - Scheduling & Automation +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). This file now only tracks the scheduling & automation status snapshot. Active backlog lives in Sprint 151+ files. diff --git a/docs/implplan/SPRINT_152_orchestrator_ii.md b/docs/implplan/SPRINT_152_orchestrator_ii.md index 82f728658..6fe7fd28e 100644 --- a/docs/implplan/SPRINT_152_orchestrator_ii.md +++ b/docs/implplan/SPRINT_152_orchestrator_ii.md @@ -1,5 +1,7 @@ # Moved: Sprint 0152-0001-0002 · Orchestrator II (Scheduling & Automation) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This legacy filename is retained to avoid broken references. The canonical sprint now lives at `docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md` following the standard naming/template. Do not edit tasks here; update the canonical file only. Status recap (read-only): All ORCH-SVC-32/33/34/35/36/37 tasks are DONE in the canonical sprint document. diff --git a/docs/implplan/SPRINT_154_packsregistry.md b/docs/implplan/SPRINT_154_packsregistry.md index 81be6cf1b..951e51756 100644 --- a/docs/implplan/SPRINT_154_packsregistry.md +++ b/docs/implplan/SPRINT_154_packsregistry.md @@ -1,5 +1,7 @@ # Legacy redirect — Sprint 0154 Packs Registry +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint was renamed to `SPRINT_0154_0001_0001_packsregistry.md` on 2025-11-19 to match the standard format. Please update the canonical file instead: diff --git a/docs/implplan/SPRINT_157_taskrunner_i.md b/docs/implplan/SPRINT_157_taskrunner_i.md index d49db8d68..40b8565a3 100644 --- a/docs/implplan/SPRINT_157_taskrunner_i.md +++ b/docs/implplan/SPRINT_157_taskrunner_i.md @@ -1,4 +1,6 @@ # Deprecated Sprint File +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint was normalized and renamed to `docs/implplan/SPRINT_0157_0001_0001_taskrunner_i.md`. Please update only the canonical file; this stub remains to prevent divergent edits. (Updated 2025-11-30.) diff --git a/docs/implplan/SPRINT_158_taskrunner_ii.md b/docs/implplan/SPRINT_158_taskrunner_ii.md index 9ca0ba422..2669c7840 100644 --- a/docs/implplan/SPRINT_158_taskrunner_ii.md +++ b/docs/implplan/SPRINT_158_taskrunner_ii.md @@ -1,5 +1,7 @@ # Redirect Notice · Sprint 158 +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint was normalized and renamed to `docs/implplan/SPRINT_0158_0001_0002_taskrunner_ii.md` (2025-11-19). Please edit the canonical file only. This legacy filename is retained to prevent divergent updates. diff --git a/docs/implplan/SPRINT_164_exportcenter_iii.md b/docs/implplan/SPRINT_164_exportcenter_iii.md index d8008b7bc..be6da98e7 100644 --- a/docs/implplan/SPRINT_164_exportcenter_iii.md +++ b/docs/implplan/SPRINT_164_exportcenter_iii.md @@ -1,3 +1,5 @@ # Deprecated alias +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Sprint file was renamed to `SPRINT_0164_0001_0001_exportcenter_iii.md` for template compliance on 2025-11-19. Do not edit this file; update the canonical sprint instead. diff --git a/docs/implplan/SPRINT_165_timelineindexer.md b/docs/implplan/SPRINT_165_timelineindexer.md index 6a2f1a993..8765ae56e 100644 --- a/docs/implplan/SPRINT_165_timelineindexer.md +++ b/docs/implplan/SPRINT_165_timelineindexer.md @@ -1,3 +1,5 @@ # Legacy sprint file (redirect) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint was renamed to `SPRINT_0165_0001_0001_timelineindexer.md` on 2025-11-19 to meet the standard filename template. Please consult the canonical file for all updates. diff --git a/docs/implplan/SPRINT_170_notifications_telemetry.md b/docs/implplan/SPRINT_170_notifications_telemetry.md index d6a69c160..1c5fbbeb0 100644 --- a/docs/implplan/SPRINT_170_notifications_telemetry.md +++ b/docs/implplan/SPRINT_170_notifications_telemetry.md @@ -1,5 +1,7 @@ # Sprint 170 - Notifications & Telemetry +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). This file now only tracks the notifications & telemetry status snapshot. Active backlog lives in Sprint 171+ files. diff --git a/docs/implplan/SPRINT_171_notifier_i.md b/docs/implplan/SPRINT_171_notifier_i.md index c7c3d8e65..e88c0be5c 100644 --- a/docs/implplan/SPRINT_171_notifier_i.md +++ b/docs/implplan/SPRINT_171_notifier_i.md @@ -1,5 +1,7 @@ # Sprint 171 - Notifications & Telemetry · 170.A) Notifier.I +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Notifications & Telemetry] 170.A) Notifier.I diff --git a/docs/implplan/SPRINT_172_notifier_ii.md b/docs/implplan/SPRINT_172_notifier_ii.md index fd96fd873..53ede7f36 100644 --- a/docs/implplan/SPRINT_172_notifier_ii.md +++ b/docs/implplan/SPRINT_172_notifier_ii.md @@ -1,5 +1,7 @@ # Sprint 172 - Notifications & Telemetry · 170.A) Notifier.II +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Notifications & Telemetry] 170.A) Notifier.II diff --git a/docs/implplan/SPRINT_173_notifier_iii.md b/docs/implplan/SPRINT_173_notifier_iii.md index 1bce32b3b..12656f359 100644 --- a/docs/implplan/SPRINT_173_notifier_iii.md +++ b/docs/implplan/SPRINT_173_notifier_iii.md @@ -1,5 +1,7 @@ # Sprint 173 - Notifications & Telemetry · 170.A) Notifier.III +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Notifications & Telemetry] 170.A) Notifier.III diff --git a/docs/implplan/SPRINT_174_telemetry.md b/docs/implplan/SPRINT_174_telemetry.md index 0e72a63dc..9bb8ce6f7 100644 --- a/docs/implplan/SPRINT_174_telemetry.md +++ b/docs/implplan/SPRINT_174_telemetry.md @@ -1,5 +1,7 @@ # Sprint 174 - Notifications & Telemetry · 170.B) Telemetry +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Notifications & Telemetry] 170.B) Telemetry diff --git a/docs/implplan/SPRINT_185_shared_replay_primitives.md b/docs/implplan/SPRINT_185_shared_replay_primitives.md index f2afeaf79..63824023e 100644 --- a/docs/implplan/SPRINT_185_shared_replay_primitives.md +++ b/docs/implplan/SPRINT_185_shared_replay_primitives.md @@ -1,5 +1,7 @@ # Sprint 185 - Replay Core · 185.A) Shared Replay Primitives +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + [Replay Core] 185.A) Shared Replay Primitives Depends on: Sprint 160 Export & Evidence Summary: Stand up a shared replay library, hashing/cononicalisation helpers, and baseline documentation for deterministic bundles. diff --git a/docs/implplan/SPRINT_186_record_deterministic_execution.md b/docs/implplan/SPRINT_186_record_deterministic_execution.md index 4ad2faf71..4434595d8 100644 --- a/docs/implplan/SPRINT_186_record_deterministic_execution.md +++ b/docs/implplan/SPRINT_186_record_deterministic_execution.md @@ -1,3 +1,5 @@ # Legacy Redirect +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint file was renamed to `SPRINT_0186_0001_0001_record_deterministic_execution.md` on 2025-11-19 to comply with the standard template and naming rules. Do not edit this legacy copy; update the canonical file instead. diff --git a/docs/implplan/SPRINT_187_evidence_locker_cli_integration.md b/docs/implplan/SPRINT_187_evidence_locker_cli_integration.md index b87b28985..c4a1aa180 100644 --- a/docs/implplan/SPRINT_187_evidence_locker_cli_integration.md +++ b/docs/implplan/SPRINT_187_evidence_locker_cli_integration.md @@ -19,6 +19,8 @@ - docs/modules/attestor/architecture.md - docs/replay/DETERMINISTIC_REPLAY.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_200_experience_sdks.md b/docs/implplan/SPRINT_200_experience_sdks.md index 6d93e61b2..7ded5f416 100644 --- a/docs/implplan/SPRINT_200_experience_sdks.md +++ b/docs/implplan/SPRINT_200_experience_sdks.md @@ -1,5 +1,7 @@ # Redirect Notice · Sprint 200 +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint was normalized and renamed to `docs/implplan/SPRINT_0200_0001_0001_experience_sdks.md` (2025-11-30). Please edit the canonical file only. This legacy filename is retained to prevent divergent updates. diff --git a/docs/implplan/SPRINT_202_cli_ii.md b/docs/implplan/SPRINT_202_cli_ii.md index 40b2cba63..7c73cc781 100644 --- a/docs/implplan/SPRINT_202_cli_ii.md +++ b/docs/implplan/SPRINT_202_cli_ii.md @@ -1,5 +1,7 @@ # Redirect Notice · Sprint 202 +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint was normalized and renamed to `docs/implplan/SPRINT_0202_0001_0001_cli_ii.md` (2025-11-30). Please edit the canonical file only. This legacy filename is retained to prevent divergent updates. diff --git a/docs/implplan/SPRINT_203_cli_iii.md b/docs/implplan/SPRINT_203_cli_iii.md index 5e1fc11f4..cdb941803 100644 --- a/docs/implplan/SPRINT_203_cli_iii.md +++ b/docs/implplan/SPRINT_203_cli_iii.md @@ -1,5 +1,7 @@ # Sprint 203 - Experience & SDKs · 180.A) Cli.III +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Experience & SDKs] 180.A) Cli.III diff --git a/docs/implplan/SPRINT_204_cli_iv.md b/docs/implplan/SPRINT_204_cli_iv.md index c2be47791..7bf01e1e1 100644 --- a/docs/implplan/SPRINT_204_cli_iv.md +++ b/docs/implplan/SPRINT_204_cli_iv.md @@ -1,5 +1,7 @@ # Sprint 204 - Experience & SDKs · 180.A) Cli.IV +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Experience & SDKs] 180.A) Cli.IV diff --git a/docs/implplan/SPRINT_205_cli_v.md b/docs/implplan/SPRINT_205_cli_v.md index 25dd9748e..a91fe245a 100644 --- a/docs/implplan/SPRINT_205_cli_v.md +++ b/docs/implplan/SPRINT_205_cli_v.md @@ -1,5 +1,7 @@ # Sprint 205 - Experience & SDKs · 180.A) Cli.V +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Experience & SDKs] 180.A) Cli.V diff --git a/docs/implplan/SPRINT_215_web_iv.md b/docs/implplan/SPRINT_215_web_iv.md index e19f25aa4..cb8b643c9 100644 --- a/docs/implplan/SPRINT_215_web_iv.md +++ b/docs/implplan/SPRINT_215_web_iv.md @@ -1,3 +1,5 @@ # Sprint 215 Web IV (legacy file) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0215_0001_0001_web_iv.md` and normalized to the standard template on 2025-11-19. Please update links to point to the new file. diff --git a/docs/implplan/SPRINT_300_documentation_process.md b/docs/implplan/SPRINT_300_documentation_process.md index 057e6c38f..87be5d9d0 100644 --- a/docs/implplan/SPRINT_300_documentation_process.md +++ b/docs/implplan/SPRINT_300_documentation_process.md @@ -13,6 +13,8 @@ - `docs/modules/platform/architecture-overview.md` - `docs/README.md` +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Task Board | Stream | Status | Owner(s) | Dependencies | Notes | | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_303_docs_tasks_md_iii.md b/docs/implplan/SPRINT_303_docs_tasks_md_iii.md index be01c7d48..ff79a4bf1 100644 --- a/docs/implplan/SPRINT_303_docs_tasks_md_iii.md +++ b/docs/implplan/SPRINT_303_docs_tasks_md_iii.md @@ -1,5 +1,7 @@ # Sprint 303 - Documentation & Process · 200.A) Docs Tasks.Md.III +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.A) Docs Tasks.Md.III diff --git a/docs/implplan/SPRINT_304_docs_tasks_md_iv.md b/docs/implplan/SPRINT_304_docs_tasks_md_iv.md index 2545fe83b..dd1107af2 100644 --- a/docs/implplan/SPRINT_304_docs_tasks_md_iv.md +++ b/docs/implplan/SPRINT_304_docs_tasks_md_iv.md @@ -17,6 +17,8 @@ Active items only. Completed/historic work now resides in `docs/implplan/archive - Module dossiers: `docs/modules/export-center/architecture.md`, `docs/modules/attestor/architecture.md`, `docs/modules/signer/architecture.md`, `docs/modules/telemetry/architecture.md`, `docs/modules/ui/architecture.md` (graph UI tasks). - Sprint template rules in `docs/implplan/AGENTS.md`. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker Task ID | State | Task description | Owners (Source) --- | --- | --- | --- diff --git a/docs/implplan/SPRINT_305_docs_tasks_md_v.md b/docs/implplan/SPRINT_305_docs_tasks_md_v.md index 5f71dbea4..61e613490 100644 --- a/docs/implplan/SPRINT_305_docs_tasks_md_v.md +++ b/docs/implplan/SPRINT_305_docs_tasks_md_v.md @@ -1,5 +1,7 @@ # Sprint 305 - Documentation & Process · 200.A) Docs Tasks.Md.V +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.A) Docs Tasks.Md.V diff --git a/docs/implplan/SPRINT_306_docs_tasks_md_vi.md b/docs/implplan/SPRINT_306_docs_tasks_md_vi.md index d6e2f9ff6..b45c22720 100644 --- a/docs/implplan/SPRINT_306_docs_tasks_md_vi.md +++ b/docs/implplan/SPRINT_306_docs_tasks_md_vi.md @@ -1,5 +1,7 @@ # Sprint 306 - Documentation & Process · 200.A) Docs Tasks.Md.VI +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.A) Docs Tasks.Md.VI diff --git a/docs/implplan/SPRINT_307_docs_tasks_md_vii.md b/docs/implplan/SPRINT_307_docs_tasks_md_vii.md index 580b624d2..126ac5cf1 100644 --- a/docs/implplan/SPRINT_307_docs_tasks_md_vii.md +++ b/docs/implplan/SPRINT_307_docs_tasks_md_vii.md @@ -1,5 +1,7 @@ # Sprint 307 - Documentation & Process · 200.A) Docs Tasks.Md.VII +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.A) Docs Tasks.Md.VII diff --git a/docs/implplan/SPRINT_308_docs_tasks_md_viii.md b/docs/implplan/SPRINT_308_docs_tasks_md_viii.md index c1db6f88f..4f12c0880 100644 --- a/docs/implplan/SPRINT_308_docs_tasks_md_viii.md +++ b/docs/implplan/SPRINT_308_docs_tasks_md_viii.md @@ -1,5 +1,7 @@ # Sprint 308 - Documentation & Process · 200.A) Docs Tasks.Md.VIII +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.A) Docs Tasks.Md.VIII diff --git a/docs/implplan/SPRINT_309_docs_tasks_md_ix.md b/docs/implplan/SPRINT_309_docs_tasks_md_ix.md index 0b1f0a8b7..81bf0996f 100644 --- a/docs/implplan/SPRINT_309_docs_tasks_md_ix.md +++ b/docs/implplan/SPRINT_309_docs_tasks_md_ix.md @@ -1,5 +1,7 @@ # Sprint 309 - Documentation & Process · 200.A) Docs Tasks.Md.IX +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.A) Docs Tasks.Md.IX diff --git a/docs/implplan/SPRINT_310_docs_tasks_md_x.md b/docs/implplan/SPRINT_310_docs_tasks_md_x.md index 1a507f31d..746f540b0 100644 --- a/docs/implplan/SPRINT_310_docs_tasks_md_x.md +++ b/docs/implplan/SPRINT_310_docs_tasks_md_x.md @@ -1,5 +1,7 @@ # Sprint 310 - Documentation & Process · 200.A) Docs Tasks.Md.X +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.A) Docs Tasks.Md.X diff --git a/docs/implplan/SPRINT_311_docs_tasks_md_xi.md b/docs/implplan/SPRINT_311_docs_tasks_md_xi.md index 76459daee..cdf94674a 100644 --- a/docs/implplan/SPRINT_311_docs_tasks_md_xi.md +++ b/docs/implplan/SPRINT_311_docs_tasks_md_xi.md @@ -1,5 +1,7 @@ # Sprint 311 - Documentation & Process · 200.A) Docs Tasks.Md.XI +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.A) Docs Tasks.Md.XI diff --git a/docs/implplan/SPRINT_312_docs_modules_advisory_ai.md b/docs/implplan/SPRINT_312_docs_modules_advisory_ai.md index ea34c9db4..88a709130 100644 --- a/docs/implplan/SPRINT_312_docs_modules_advisory_ai.md +++ b/docs/implplan/SPRINT_312_docs_modules_advisory_ai.md @@ -1,5 +1,7 @@ # Sprint 312 - Documentation & Process · 200.B) Docs Modules Advisory Ai +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.B) Docs Modules Advisory Ai diff --git a/docs/implplan/SPRINT_313_docs_modules_attestor.md b/docs/implplan/SPRINT_313_docs_modules_attestor.md index 47c96bf2b..be6098258 100644 --- a/docs/implplan/SPRINT_313_docs_modules_attestor.md +++ b/docs/implplan/SPRINT_313_docs_modules_attestor.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0313_0001_0001_docs_modules_attestor.md` to comply with the standard template. Update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_314_docs_modules_authority.md b/docs/implplan/SPRINT_314_docs_modules_authority.md index 1baebc7c7..2d83e8d9a 100644 --- a/docs/implplan/SPRINT_314_docs_modules_authority.md +++ b/docs/implplan/SPRINT_314_docs_modules_authority.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0314_0001_0001_docs_modules_authority.md` to comply with the standard template. Update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_315_docs_modules_ci.md b/docs/implplan/SPRINT_315_docs_modules_ci.md index cb7d828c7..f09bcaefc 100644 --- a/docs/implplan/SPRINT_315_docs_modules_ci.md +++ b/docs/implplan/SPRINT_315_docs_modules_ci.md @@ -1,3 +1,5 @@ # Moved +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint was renamed for template compliance. Please use `docs/implplan/SPRINT_0315_0001_0001_docs_modules_ci.md`. diff --git a/docs/implplan/SPRINT_318_docs_modules_devops.md b/docs/implplan/SPRINT_318_docs_modules_devops.md index 93caabe77..9bea8afa0 100644 --- a/docs/implplan/SPRINT_318_docs_modules_devops.md +++ b/docs/implplan/SPRINT_318_docs_modules_devops.md @@ -1,5 +1,7 @@ # Sprint 318 - Documentation & Process · 200.H) Docs Modules Devops +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.H) Docs Modules Devops diff --git a/docs/implplan/SPRINT_319_docs_modules_excititor.md b/docs/implplan/SPRINT_319_docs_modules_excititor.md index 3a2f9b4a3..36954dd36 100644 --- a/docs/implplan/SPRINT_319_docs_modules_excititor.md +++ b/docs/implplan/SPRINT_319_docs_modules_excititor.md @@ -1,5 +1,7 @@ # Sprint 319 - Documentation & Process · 200.I) Docs Modules Excititor +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.I) Docs Modules Excititor diff --git a/docs/implplan/SPRINT_320_docs_modules_export_center.md b/docs/implplan/SPRINT_320_docs_modules_export_center.md index 361365dd6..a09f13a14 100644 --- a/docs/implplan/SPRINT_320_docs_modules_export_center.md +++ b/docs/implplan/SPRINT_320_docs_modules_export_center.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0320_0001_0001_docs_modules_export_center.md` to comply with the standard template. Update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_322_docs_modules_notify.md b/docs/implplan/SPRINT_322_docs_modules_notify.md index 7f528614a..05b07273d 100644 --- a/docs/implplan/SPRINT_322_docs_modules_notify.md +++ b/docs/implplan/SPRINT_322_docs_modules_notify.md @@ -1,5 +1,7 @@ # Sprint 322 - Documentation & Process · 200.L) Docs Modules Notify +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.L) Docs Modules Notify diff --git a/docs/implplan/SPRINT_324_docs_modules_platform.md b/docs/implplan/SPRINT_324_docs_modules_platform.md index b9e1f5b67..7a7d1bf2c 100644 --- a/docs/implplan/SPRINT_324_docs_modules_platform.md +++ b/docs/implplan/SPRINT_324_docs_modules_platform.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0324_0001_0001_docs_modules_platform.md` to comply with the standard template. Update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_325_docs_modules_policy.md b/docs/implplan/SPRINT_325_docs_modules_policy.md index b5db96b9c..c58418cae 100644 --- a/docs/implplan/SPRINT_325_docs_modules_policy.md +++ b/docs/implplan/SPRINT_325_docs_modules_policy.md @@ -1,5 +1,7 @@ # Sprint 325 - Documentation & Process · 200.O) Docs Modules Policy +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.O) Docs Modules Policy diff --git a/docs/implplan/SPRINT_326_docs_modules_registry.md b/docs/implplan/SPRINT_326_docs_modules_registry.md index 073da2e6e..981ce7718 100644 --- a/docs/implplan/SPRINT_326_docs_modules_registry.md +++ b/docs/implplan/SPRINT_326_docs_modules_registry.md @@ -1,5 +1,7 @@ # Sprint 326 - Documentation & Process · 200.P) Docs Modules Registry +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). [Documentation & Process] 200.P) Docs Modules Registry diff --git a/docs/implplan/SPRINT_327_docs_modules_scanner.md b/docs/implplan/SPRINT_327_docs_modules_scanner.md index 9d5d28d8b..685be58d7 100644 --- a/docs/implplan/SPRINT_327_docs_modules_scanner.md +++ b/docs/implplan/SPRINT_327_docs_modules_scanner.md @@ -1,3 +1,5 @@ # Redirect +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint file was renamed to `SPRINT_0327_0001_0001_docs_modules_scanner.md` to comply with naming rules. Please edit the canonical file. diff --git a/docs/implplan/SPRINT_329_docs_modules_signer.md b/docs/implplan/SPRINT_329_docs_modules_signer.md index cd2ba20f4..a93c6c035 100644 --- a/docs/implplan/SPRINT_329_docs_modules_signer.md +++ b/docs/implplan/SPRINT_329_docs_modules_signer.md @@ -1,13 +1,15 @@ -# Sprint 329 - Documentation & Process · 200.S) Docs Modules Signer - -Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). - -[Documentation & Process] 200.S) Docs Modules Signer -Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment -Summary: Documentation & Process focus on Docs Modules Signer). -Task ID | State | Task description | Owners (Source) ---- | --- | --- | --- -SIGNER-DOCS-0001 | DONE (2025-11-05) | Validate that `docs/modules/signer/README.md` captures the latest DSSE/fulcio updates. | Docs Guild (docs/modules/signer) -SIGNER-OPS-0001 | TODO | Review signer runbooks/observability assets after next sprint demo. | Ops Guild (docs/modules/signer) -SIGNER-ENG-0001 | DONE (2025-11-27) | Keep module milestones aligned with signer sprints under `/docs/implplan`. Added Sprint Readiness Tracker to `docs/modules/signer/implementation_plan.md` mapping 4 phases to 17+ sprint tasks across Sprints 100, 186, 401, 513, 514. Updated README with Sprint 0186/0401 completed tasks (SIGN-CORE-186-004/005, SIGN-TEST-186-006, SIGN-VEX-401-018). | Module Team (docs/modules/signer) -SIGNER-OPS-0001 | TODO | Sync outcomes back to ../.. | Ops Guild (docs/modules/signer) +# Sprint 329 - Documentation & Process · 200.S) Docs Modules Signer + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). + +[Documentation & Process] 200.S) Docs Modules Signer +Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - AirGap, Sprint 130.A - Scanner, Sprint 140.A - Graph, Sprint 150.A - Orchestrator, Sprint 160.A - EvidenceLocker, Sprint 170.A - Notifier, Sprint 180.A - Cli, Sprint 190.A - Ops Deployment +Summary: Documentation & Process focus on Docs Modules Signer). +Task ID | State | Task description | Owners (Source) +--- | --- | --- | --- +SIGNER-DOCS-0001 | DONE (2025-11-05) | Validate that `docs/modules/signer/README.md` captures the latest DSSE/fulcio updates. | Docs Guild (docs/modules/signer) +SIGNER-OPS-0001 | TODO | Review signer runbooks/observability assets after next sprint demo. | Ops Guild (docs/modules/signer) +SIGNER-ENG-0001 | DONE (2025-11-27) | Keep module milestones aligned with signer sprints under `/docs/implplan`. Added Sprint Readiness Tracker to `docs/modules/signer/implementation_plan.md` mapping 4 phases to 17+ sprint tasks across Sprints 100, 186, 401, 513, 514. Updated README with Sprint 0186/0401 completed tasks (SIGN-CORE-186-004/005, SIGN-TEST-186-006, SIGN-VEX-401-018). | Module Team (docs/modules/signer) +SIGNER-OPS-0001 | TODO | Sync outcomes back to ../.. | Ops Guild (docs/modules/signer) diff --git a/docs/implplan/SPRINT_330_docs_modules_telemetry.md b/docs/implplan/SPRINT_330_docs_modules_telemetry.md index 726080931..a618c9790 100644 --- a/docs/implplan/SPRINT_330_docs_modules_telemetry.md +++ b/docs/implplan/SPRINT_330_docs_modules_telemetry.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0330_0001_0001_docs_modules_telemetry.md` to comply with the standard template. Update any links accordingly. diff --git a/docs/implplan/SPRINT_331_docs_modules_ui.md b/docs/implplan/SPRINT_331_docs_modules_ui.md index f7bf56ee1..45670f38d 100644 --- a/docs/implplan/SPRINT_331_docs_modules_ui.md +++ b/docs/implplan/SPRINT_331_docs_modules_ui.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0331_0001_0001_docs_modules_ui.md` to meet the standard template. Update any links accordingly. diff --git a/docs/implplan/SPRINT_332_docs_modules_vex_lens.md b/docs/implplan/SPRINT_332_docs_modules_vex_lens.md index e3bad8647..85c7af1c4 100644 --- a/docs/implplan/SPRINT_332_docs_modules_vex_lens.md +++ b/docs/implplan/SPRINT_332_docs_modules_vex_lens.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0332_0001_0001_docs_modules_vex_lens.md` for template compliance. Please update bookmarks accordingly. diff --git a/docs/implplan/SPRINT_333_docs_modules_excititor.md b/docs/implplan/SPRINT_333_docs_modules_excititor.md index a9bb3b3c9..0756a01a7 100644 --- a/docs/implplan/SPRINT_333_docs_modules_excititor.md +++ b/docs/implplan/SPRINT_333_docs_modules_excititor.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0333_0001_0001_docs_modules_excititor.md` to comply with the standard template. Update any links accordingly. diff --git a/docs/implplan/SPRINT_334_docs_modules_vuln_explorer.md b/docs/implplan/SPRINT_334_docs_modules_vuln_explorer.md index 5dc8a6065..1d7f1ec3c 100644 --- a/docs/implplan/SPRINT_334_docs_modules_vuln_explorer.md +++ b/docs/implplan/SPRINT_334_docs_modules_vuln_explorer.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0334_0001_0001_docs_modules_vuln_explorer.md` to align with the standard naming template. Please update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_335_docs_modules_zastava.md b/docs/implplan/SPRINT_335_docs_modules_zastava.md index cc5f9f912..8b5f7ff7c 100644 --- a/docs/implplan/SPRINT_335_docs_modules_zastava.md +++ b/docs/implplan/SPRINT_335_docs_modules_zastava.md @@ -1,3 +1,5 @@ # Moved sprint file +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + This sprint has been renamed to `SPRINT_0335_0001_0001_docs_modules_zastava.md` to align with the standard template. Please update any bookmarks accordingly. diff --git a/docs/implplan/SPRINT_3400_0000_0000_postgres_conversion_overview.md b/docs/implplan/SPRINT_3400_0000_0000_postgres_conversion_overview.md index 5db0acd8e..f7b6c7dd0 100644 --- a/docs/implplan/SPRINT_3400_0000_0000_postgres_conversion_overview.md +++ b/docs/implplan/SPRINT_3400_0000_0000_postgres_conversion_overview.md @@ -1,5 +1,7 @@ # PostgreSQL Conversion Project Overview +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Project Summary **Objective:** Convert StellaOps control-plane domains from MongoDB to PostgreSQL using a strangler fig pattern for gradual rollout. diff --git a/docs/implplan/SPRINT_3400_0001_0001_postgres_foundations.md b/docs/implplan/SPRINT_3400_0001_0001_postgres_foundations.md index 05efe5930..aeb1f47c5 100644 --- a/docs/implplan/SPRINT_3400_0001_0001_postgres_foundations.md +++ b/docs/implplan/SPRINT_3400_0001_0001_postgres_foundations.md @@ -19,6 +19,8 @@ - docs/db/VERIFICATION.md - docs/db/CONVERSION_PLAN.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_3401_0001_0001_postgres_authority.md b/docs/implplan/SPRINT_3401_0001_0001_postgres_authority.md index c130d22e5..444cc5cfd 100644 --- a/docs/implplan/SPRINT_3401_0001_0001_postgres_authority.md +++ b/docs/implplan/SPRINT_3401_0001_0001_postgres_authority.md @@ -18,6 +18,8 @@ - docs/db/RULES.md - src/Authority/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_3402_0001_0001_postgres_scheduler.md b/docs/implplan/SPRINT_3402_0001_0001_postgres_scheduler.md index 688f2be3d..28097dc3a 100644 --- a/docs/implplan/SPRINT_3402_0001_0001_postgres_scheduler.md +++ b/docs/implplan/SPRINT_3402_0001_0001_postgres_scheduler.md @@ -18,6 +18,8 @@ - docs/db/RULES.md - src/Scheduler/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_3403_0001_0001_postgres_notify.md b/docs/implplan/SPRINT_3403_0001_0001_postgres_notify.md index e859d618e..234bf982f 100644 --- a/docs/implplan/SPRINT_3403_0001_0001_postgres_notify.md +++ b/docs/implplan/SPRINT_3403_0001_0001_postgres_notify.md @@ -20,6 +20,8 @@ - src/Notify/AGENTS.md - src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_3404_0001_0001_postgres_policy.md b/docs/implplan/SPRINT_3404_0001_0001_postgres_policy.md index 342917db7..e06d5d965 100644 --- a/docs/implplan/SPRINT_3404_0001_0001_postgres_policy.md +++ b/docs/implplan/SPRINT_3404_0001_0001_postgres_policy.md @@ -18,6 +18,8 @@ - docs/db/RULES.md - src/Policy/AGENTS.md (if exists) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_3405_0001_0001_postgres_vulnerabilities.md b/docs/implplan/SPRINT_3405_0001_0001_postgres_vulnerabilities.md index 3023d4fe8..6002ee339 100644 --- a/docs/implplan/SPRINT_3405_0001_0001_postgres_vulnerabilities.md +++ b/docs/implplan/SPRINT_3405_0001_0001_postgres_vulnerabilities.md @@ -18,6 +18,8 @@ - docs/db/RULES.md - src/Concelier/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker ### Sprint 5a: Schema & Repositories diff --git a/docs/implplan/SPRINT_3406_0001_0001_postgres_vex_graph.md b/docs/implplan/SPRINT_3406_0001_0001_postgres_vex_graph.md index adff8bc1e..e45311eb5 100644 --- a/docs/implplan/SPRINT_3406_0001_0001_postgres_vex_graph.md +++ b/docs/implplan/SPRINT_3406_0001_0001_postgres_vex_graph.md @@ -20,6 +20,8 @@ - docs/modules/platform/architecture-overview.md - src/Excititor/AGENTS.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Wave Coordination | Wave | Scope | Exit gate | Notes | | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_3407_0001_0001_postgres_cleanup.md b/docs/implplan/SPRINT_3407_0001_0001_postgres_cleanup.md index 4ea2e6ca7..17841d5b5 100644 --- a/docs/implplan/SPRINT_3407_0001_0001_postgres_cleanup.md +++ b/docs/implplan/SPRINT_3407_0001_0001_postgres_cleanup.md @@ -24,6 +24,8 @@ - docs/db/VERIFICATION.md - All module AGENTS.md files +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker ### T7.1: Remove MongoDB Dependencies diff --git a/docs/implplan/SPRINT_3408_0001_0001_postgres_migration_lifecycle.md b/docs/implplan/SPRINT_3408_0001_0001_postgres_migration_lifecycle.md new file mode 100644 index 000000000..d22356c46 --- /dev/null +++ b/docs/implplan/SPRINT_3408_0001_0001_postgres_migration_lifecycle.md @@ -0,0 +1,272 @@ +# Sprint 3408 · PostgreSQL Migration Lifecycle Implementation + +## Topic & Scope +- Implement the PostgreSQL migration strategy defined in `docs/db/MIGRATION_STRATEGY.md`. +- Add startup migration hosts to all modules with advisory lock coordination. +- Create CLI tooling for manual/release migrations. +- Integrate migration status into health checks. +- **Working directory:** src/__Libraries/StellaOps.Infrastructure.Postgres/Migrations (core), src/*/WebService (module integration) + +## Dependencies & Concurrency +- Upstream: Sprint 3400 (Phase 0 - Foundations) must be DONE. +- Upstream: All module PostgreSQL storage libraries must exist (Phases 1-5). +- Concurrency: Can run in parallel with data migration tasks (PG-T5b.3.2, PG-T5b.4.4). +- Reference: `docs/db/MIGRATION_STRATEGY.md` + +## Documentation Prerequisites +- docs/db/MIGRATION_STRATEGY.md +- docs/db/SPECIFICATION.md +- docs/db/RULES.md +- Existing module migration files in `src/*/Storage.Postgres/Migrations/` + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +### Wave 1: Core Infrastructure Enhancement +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 1 | MIG-T1.1 | DONE | Completed 2025-12-03 | Infrastructure | Create `MigrationCategory.cs` enum with filename parsing | +| 2 | MIG-T1.2 | DONE | Completed 2025-12-03 | Infrastructure | Create `StartupMigrationHost.cs` with advisory locks | +| 3 | MIG-T1.3 | DONE | Completed 2025-12-03 | Infrastructure | Create `MigrationServiceExtensions.cs` for DI | +| 4 | MIG-T1.4 | DONE | Completed 2025-12-03 | Infrastructure | Update `MigrationRunner.cs` to support category filtering | +| 5 | MIG-T1.5 | DONE | Completed 2025-12-03 | Infrastructure | Add checksum validation to existing `MigrationRunner` | +| 6 | MIG-T1.6 | DONE | Completed 2025-12-03 | Infrastructure | Create `IMigrationRunner` interface for testability | +| 7 | MIG-T1.7 | DONE | Completed 2025-12-03 | Infrastructure | Write unit tests for `MigrationCategory` (54 tests) | +| 8 | MIG-T1.8 | DONE | Completed 2025-12-03 | Infrastructure | Write integration tests for `StartupMigrationHost` (13 tests) | + +### Wave 2: CLI Tooling +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 9 | MIG-T2.1 | DONE | Completed 2025-12-03 | CLI | Create `system migrations-run` command structure | +| 10 | MIG-T2.2 | DONE | Completed 2025-12-03 | CLI | Implement `--module` filter for targeted migrations | +| 11 | MIG-T2.3 | DONE | Completed 2025-12-03 | CLI | Implement `--category` filter (startup/release/seed/data) | +| 12 | MIG-T2.4 | DONE | Completed 2025-12-03 | CLI | Implement `--dry-run` mode | +| 13 | MIG-T2.5 | DONE | Completed 2025-12-03 | CLI | Create `system migrations-status` command | +| 14 | MIG-T2.6 | DONE | Completed 2025-12-03 | CLI | Implement `--all` flag for cross-module status | +| 15 | MIG-T2.7 | DONE | Completed 2025-12-03 | CLI | Create `system migrations-verify` command | +| 16 | MIG-T2.8 | TODO | Depends on Scanner build fixes | CLI | Write CLI integration tests | + +### Wave 3: Module Integration - Authority +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 17 | MIG-T3.1 | TODO | Depends on MIG-T1.3 | Authority | Update Authority.Storage.Postgres.csproj with EmbeddedResource | +| 18 | MIG-T3.2 | TODO | Depends on MIG-T3.1 | Authority | Rename migrations to follow naming convention (001-099) | +| 19 | MIG-T3.3 | TODO | Depends on MIG-T3.2 | Authority | Register `AddStartupMigrations` in Authority.WebService | +| 20 | MIG-T3.4 | TODO | Depends on MIG-T3.3 | Authority | Add migration status health check | +| 21 | MIG-T3.5 | TODO | Depends on MIG-T3.4 | Authority | Test startup migration in Authority.WebService.Tests | + +### Wave 4: Module Integration - Scheduler +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 22 | MIG-T4.1 | TODO | Depends on MIG-T1.3 | Scheduler | Update Scheduler.Storage.Postgres.csproj with EmbeddedResource | +| 23 | MIG-T4.2 | TODO | Depends on MIG-T4.1 | Scheduler | Rename migrations to follow naming convention | +| 24 | MIG-T4.3 | TODO | Depends on MIG-T4.2 | Scheduler | Register `AddStartupMigrations` in Scheduler.WebService | +| 25 | MIG-T4.4 | TODO | Depends on MIG-T4.3 | Scheduler | Add migration status health check | +| 26 | MIG-T4.5 | TODO | Depends on MIG-T4.4 | Scheduler | Test startup migration in Scheduler.WebService.Tests | + +### Wave 5: Module Integration - Concelier +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 27 | MIG-T5.1 | TODO | Depends on MIG-T1.3 | Concelier | Update Concelier.Storage.Postgres.csproj with EmbeddedResource | +| 28 | MIG-T5.2 | TODO | Depends on MIG-T5.1 | Concelier | Rename migrations to follow naming convention | +| 29 | MIG-T5.3 | TODO | Depends on MIG-T5.2 | Concelier | Register `AddStartupMigrations` in Concelier.WebService | +| 30 | MIG-T5.4 | TODO | Depends on MIG-T5.3 | Concelier | Add migration status health check | +| 31 | MIG-T5.5 | TODO | Depends on MIG-T5.4 | Concelier | Test startup migration in Concelier.WebService.Tests | + +### Wave 6: Module Integration - Policy +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 32 | MIG-T6.1 | TODO | Depends on MIG-T1.3 | Policy | Update Policy.Storage.Postgres.csproj with EmbeddedResource | +| 33 | MIG-T6.2 | TODO | Depends on MIG-T6.1 | Policy | Rename migrations to follow naming convention | +| 34 | MIG-T6.3 | TODO | Depends on MIG-T6.2 | Policy | Register `AddStartupMigrations` in Policy.Engine | +| 35 | MIG-T6.4 | TODO | Depends on MIG-T6.3 | Policy | Add migration status health check | +| 36 | MIG-T6.5 | TODO | Depends on MIG-T6.4 | Policy | Test startup migration in Policy.Engine.Tests | + +### Wave 7: Module Integration - Notify +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 37 | MIG-T7.1 | TODO | Depends on MIG-T1.3 | Notify | Update Notify.Storage.Postgres.csproj with EmbeddedResource | +| 38 | MIG-T7.2 | TODO | Depends on MIG-T7.1 | Notify | Rename migrations to follow naming convention | +| 39 | MIG-T7.3 | TODO | Depends on MIG-T7.2 | Notify | Register `AddStartupMigrations` in Notify.WebService | +| 40 | MIG-T7.4 | TODO | Depends on MIG-T7.3 | Notify | Add migration status health check | +| 41 | MIG-T7.5 | TODO | Depends on MIG-T7.4 | Notify | Test startup migration in Notify.WebService.Tests | + +### Wave 8: Module Integration - Excititor +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 42 | MIG-T8.1 | TODO | Depends on MIG-T1.3 | Excititor | Update Excititor.Storage.Postgres.csproj with EmbeddedResource | +| 43 | MIG-T8.2 | TODO | Depends on MIG-T8.1 | Excititor | Rename migrations to follow naming convention | +| 44 | MIG-T8.3 | TODO | Depends on MIG-T8.2 | Excititor | Register `AddStartupMigrations` in Excititor.WebService | +| 45 | MIG-T8.4 | TODO | Depends on MIG-T8.3 | Excititor | Add migration status health check | +| 46 | MIG-T8.5 | TODO | Depends on MIG-T8.4 | Excititor | Test startup migration in Excititor.WebService.Tests | + +### Wave 9: Verification & Documentation +| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | +| --- | --- | --- | --- | --- | --- | +| 47 | MIG-T9.1 | TODO | Depends on Waves 3-8 | QA | End-to-end test: multi-instance startup race condition | +| 48 | MIG-T9.2 | TODO | Depends on MIG-T9.1 | QA | End-to-end test: release migration blocking startup | +| 49 | MIG-T9.3 | TODO | Depends on MIG-T9.1 | QA | End-to-end test: checksum mismatch detection | +| 50 | MIG-T9.4 | TODO | Depends on MIG-T2.8 | Docs | Update CLI reference documentation | +| 51 | MIG-T9.5 | TODO | Depends on MIG-T9.4 | Docs | Create runbook for migration operations | +| 52 | MIG-T9.6 | TODO | Depends on MIG-T9.5 | DevOps | Add migration status to Grafana dashboards | + +## Wave Coordination + +``` +Wave 1 (Core Infrastructure) ─┬─► Wave 2 (CLI Tooling) ──────────────────────┐ + │ │ + ├─► Wave 3 (Authority) ─┐ │ + ├─► Wave 4 (Scheduler) ─┤ │ + ├─► Wave 5 (Concelier) ─┼─► Wave 9 (Verification) + ├─► Wave 6 (Policy) ────┤ + ├─► Wave 7 (Notify) ────┤ + └─► Wave 8 (Excititor) ─┘ +``` + +- Wave 1 must complete before any other wave. +- Waves 3-8 can run in parallel after Wave 1. +- Wave 2 can run in parallel with Waves 3-8. +- Wave 9 requires all other waves to complete. + +## Interlocks + +- **Sprint 3400**: Migration infrastructure depends on `StellaOps.Infrastructure.Postgres` from Phase 0. +- **Sprints 3401-3406**: Module integration (Waves 3-8) requires respective module storage libraries. +- **DevOps**: Health check integration requires coordination with monitoring infrastructure. +- **CI/CD**: CLI commands must be available in deployment pipelines before Wave 9. + +## Exit Criteria + +- [ ] All modules have startup migrations with advisory locks +- [ ] CLI provides `db migrate`, `db status`, `db verify` commands +- [ ] Multi-instance race conditions are prevented +- [ ] Checksum validation catches modified migrations +- [ ] Release migrations block startup until manually applied +- [ ] Health checks expose migration status +- [ ] Documentation complete for operators + +## Task Details + +### MIG-T1.4: Update MigrationRunner with Category Support + +```csharp +// Add to MigrationRunner.cs +public async Task RunAsync( + string migrationsPath, + MigrationCategory? categoryFilter = null, + CancellationToken cancellationToken = default) +{ + // Filter migrations by category before execution +} +``` + +### MIG-T2.1: CLI db migrate Command + +```bash +# Command structure +stellaops db migrate [options] + +Options: + --module Target specific module (Authority, Scheduler, etc.) + --category Filter by category (startup, release, seed, data) + --dry-run Show what would be executed without applying + --connection Override connection string + --timeout Migration timeout (default: 300) + --force Skip confirmation for release migrations +``` + +### MIG-T3.3: Authority Integration Example + +```csharp +// In Authority.WebService/Program.cs or ServiceCollectionExtensions.cs +public static IServiceCollection AddAuthorityPostgres( + this IServiceCollection services, + IConfiguration configuration) +{ + services.Configure( + configuration.GetSection("Authority:Storage:Postgres")); + + // Register repositories... + + // Add startup migrations + services.AddStartupMigrations( + schemaName: "auth", + moduleName: "Authority", + migrationsAssembly: typeof(AuthorityDataSource).Assembly, + connectionStringSelector: opts => opts.ConnectionString, + configureOptions: opts => + { + opts.FailOnPendingReleaseMigrations = true; + opts.LockTimeoutSeconds = 120; + }); + + // Add health check + services.AddHealthChecks() + .AddCheck("authority-migrations"); + + return services; +} +``` + +### MIG-T9.1: Multi-Instance Race Condition Test + +```csharp +[Fact] +public async Task MultipleInstances_ShouldNotApplyMigrationsTwice() +{ + // Start 5 instances simultaneously + var tasks = Enumerable.Range(0, 5) + .Select(_ => StartApplicationAsync()) + .ToArray(); + + await Task.WhenAll(tasks); + + // Verify migration was applied exactly once + var appliedCount = await GetMigrationAppliedCountAsync("001_initial_schema.sql"); + Assert.Equal(1, appliedCount); +} +``` + +## Decisions & Risks + +| Risk | Impact | Mitigation | Owner | Status | +| --- | --- | --- | --- | --- | +| Advisory lock contention in large deployments | Startup delays | Implement exponential backoff, configurable timeout | Infrastructure | Open | +| Checksum drift from line-ending differences | False positives on validation | Normalize line endings before checksum | Infrastructure | Mitigated (implemented) | +| CLI not available in air-gapped deployments | Cannot run release migrations | Embed CLI in container images | DevOps | Open | +| Module startup order dependencies | Schema creation race | Each module creates its own schema independently | Infrastructure | Mitigated | + +## Action Tracker + +| # | Action | Owner | Due | Status | Notes | +| --- | --- | --- | --- | --- | --- | +| 1 | Complete Wave 1 infrastructure tasks | Infrastructure | TBD | DONE | MIG-T1.1-T1.8 complete | +| 2 | Design CLI command structure | CLI Team | TBD | TODO | Coordinate with existing CLI patterns | +| 3 | Identify migration file renames per module | All Guilds | TBD | TODO | Audit existing migrations | +| 4 | Coordinate health check endpoints | DevOps | TBD | TODO | Align with existing /health patterns | + +## Execution Log + +| Date (UTC) | Update | Owner | +| --- | --- | --- | +| 2025-12-03 | Sprint file created | Claude | +| 2025-12-03 | Completed MIG-T1.1: MigrationCategory.cs created | Claude | +| 2025-12-03 | Completed MIG-T1.2: StartupMigrationHost.cs created with advisory locks | Claude | +| 2025-12-03 | Completed MIG-T1.3: MigrationServiceExtensions.cs created for DI | Claude | +| 2025-12-03 | Added Microsoft.Extensions.Hosting.Abstractions to Infrastructure.Postgres | Claude | +| 2025-12-03 | Completed MIG-T1.4: Updated MigrationRunner.cs with category filtering and MigrationRunOptions | Claude | +| 2025-12-03 | Completed MIG-T1.5: Added checksum validation to MigrationRunner (ValidateChecksumsAsync) | Claude | +| 2025-12-03 | Completed MIG-T1.6: Created IMigrationRunner interface with full API | Claude | +| 2025-12-03 | Completed MIG-T1.7: Created MigrationCategoryTests.cs (54 unit tests) | Claude | +| 2025-12-03 | Completed MIG-T1.8: Created StartupMigrationHostTests.cs (13 integration tests) | Claude | +| 2025-12-03 | Wave 1 COMPLETE - all core infrastructure tasks done | Claude | +| 2025-12-03 | Completed MIG-T2.1-T2.7: Created `system migrations-*` CLI commands | Claude | +| 2025-12-03 | Created MigrationModuleRegistry.cs with all 6 module definitions | Claude | +| 2025-12-03 | Created SystemCommandHandlers.cs for migrations-run/status/verify | Claude | +| 2025-12-03 | Added BuildSystemCommand to CommandFactory.cs | Claude | +| 2025-12-03 | Added Storage.Postgres references to CLI project | Claude | +| 2025-12-03 | Note: CLI build blocked by pre-existing Scanner module errors | Claude | + +--- +*Reference: docs/db/MIGRATION_STRATEGY.md* diff --git a/docs/implplan/SPRINT_500_ops_offline.md b/docs/implplan/SPRINT_500_ops_offline.md index 774c5f2a2..a7a6df73f 100644 --- a/docs/implplan/SPRINT_500_ops_offline.md +++ b/docs/implplan/SPRINT_500_ops_offline.md @@ -1,5 +1,7 @@ # Sprint 500 - Ops & Offline +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + Active items only. Completed/historic work now resides in docs/implplan/archived/tasks.md (updated 2025-11-08). This file now only tracks the Ops & Offline status snapshot. Active backlog lives in Sprint 501 and later files. diff --git a/docs/implplan/SPRINT_501_ops_deployment_i.md b/docs/implplan/SPRINT_501_ops_deployment_i.md index 5c60248a6..b5337ef8d 100644 --- a/docs/implplan/SPRINT_501_ops_deployment_i.md +++ b/docs/implplan/SPRINT_501_ops_deployment_i.md @@ -18,6 +18,8 @@ Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - A - docs/modules/ci/architecture.md - docs/airgap/** (for mirror/import tasks) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | Task ID | State | Task description | Owners (Source) | | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_502_ops_deployment_ii.md b/docs/implplan/SPRINT_502_ops_deployment_ii.md index 5213f3162..6daab9a57 100644 --- a/docs/implplan/SPRINT_502_ops_deployment_ii.md +++ b/docs/implplan/SPRINT_502_ops_deployment_ii.md @@ -15,6 +15,8 @@ - docs/modules/platform/architecture-overview.md - Any module-specific runbooks referenced by tasks (policy, VEX Lens, Findings Ledger). +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_503_ops_devops_i.md b/docs/implplan/SPRINT_503_ops_devops_i.md index 00c19cb9f..2332009ae 100644 --- a/docs/implplan/SPRINT_503_ops_devops_i.md +++ b/docs/implplan/SPRINT_503_ops_devops_i.md @@ -19,6 +19,8 @@ Depends on: Sprint 100.A - Attestor, Sprint 110.A - AdvisoryAI, Sprint 120.A - A - docs/modules/ci/architecture.md - docs/airgap/** (for sealed-mode tasks) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | Task ID | State | Task description | Owners (Source) | | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_504_ops_devops_ii.log.md b/docs/implplan/SPRINT_504_ops_devops_ii.log.md index 499fe9761..11e514725 100644 --- a/docs/implplan/SPRINT_504_ops_devops_ii.log.md +++ b/docs/implplan/SPRINT_504_ops_devops_ii.log.md @@ -1,4 +1,6 @@ ## Execution Log (addendum) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + | Date (UTC) | Update | Owner | | --- | --- | --- | | 2025-11-24 | Completed DEVOPS-CONTAINERS-44-001: added buildx multi-arch script (`scripts/buildx/build-multiarch.sh`) with SBOM + optional cosign signing, and workflow `.gitea/workflows/containers-multiarch.yml` for manual dispatch. | Implementer | diff --git a/docs/implplan/SPRINT_505_ops_devops_iii.md b/docs/implplan/SPRINT_505_ops_devops_iii.md index f9dc19c1b..1ac2fe070 100644 --- a/docs/implplan/SPRINT_505_ops_devops_iii.md +++ b/docs/implplan/SPRINT_505_ops_devops_iii.md @@ -15,6 +15,8 @@ - docs/modules/platform/architecture-overview.md - Existing CI/OAS runbooks referenced by tasks. +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_506_ops_devops_iv.md b/docs/implplan/SPRINT_506_ops_devops_iv.md index e9f9a5d2c..d69bf09a6 100644 --- a/docs/implplan/SPRINT_506_ops_devops_iv.md +++ b/docs/implplan/SPRINT_506_ops_devops_iv.md @@ -16,6 +16,8 @@ - docs/modules/devops/architecture.md - ops/devops/README.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_507_ops_devops_v.md b/docs/implplan/SPRINT_507_ops_devops_v.md index 34aacb96b..60109dae3 100644 --- a/docs/implplan/SPRINT_507_ops_devops_v.md +++ b/docs/implplan/SPRINT_507_ops_devops_v.md @@ -13,6 +13,8 @@ - ops/devops/README.md - ops/devops/docker/base-image-guidelines.md +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_508_ops_offline_kit.md b/docs/implplan/SPRINT_508_ops_offline_kit.md index e9f1173af..ae1997658 100644 --- a/docs/implplan/SPRINT_508_ops_offline_kit.md +++ b/docs/implplan/SPRINT_508_ops_offline_kit.md @@ -14,6 +14,8 @@ - docs/modules/devops/architecture.md - ops/offline-kit README/tests +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/implplan/SPRINT_511_api.md b/docs/implplan/SPRINT_511_api.md index d970d0834..0d315fc45 100644 --- a/docs/implplan/SPRINT_511_api.md +++ b/docs/implplan/SPRINT_511_api.md @@ -14,6 +14,8 @@ - docs/api/openapi-discovery.md - src/Api/StellaOps.Api.Governance/README.md (if present) +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + ## Delivery Tracker | # | Task ID | Status | Key dependency / next step | Owners | Task Definition | | --- | --- | --- | --- | --- | --- | diff --git a/docs/router/13-Step.md b/docs/router/13-Step.md new file mode 100644 index 000000000..89d19b8f3 --- /dev/null +++ b/docs/router/13-Step.md @@ -0,0 +1,946 @@ +# Step 13: InMemory Transport Implementation + +**Phase 3: Transport Layer** +**Estimated Complexity:** Medium +**Dependencies:** Step 12 (Request/Response Serialization) + +--- + +## Overview + +The InMemory transport provides a high-performance, zero-network transport for testing, local development, and same-process microservices. It serves as the reference implementation for the transport layer and must pass all protocol tests before any real transport implementation. + +--- + +## Goals + +1. Implement a fully-functional in-process transport without network overhead +2. Serve as the reference implementation for transport protocol compliance +3. Enable fast integration tests without network dependencies +4. Support all frame types and streaming semantics +5. Provide debugging hooks for protocol validation + +--- + +## Core Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ InMemory Transport Hub │ +├─────────────────────────────────────────────────────────────┤ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Gateway Side │◄──►│ Channels │◄──►│Microservice │ │ +│ │ Client │ │ (Duplex) │ │ Server │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +│ │ +│ Connection Registry Frame Queue Handler Dispatch │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +## Core Types + +### InMemory Channel + +```csharp +namespace StellaOps.Router.Transport.InMemory; + +/// +/// Bidirectional in-memory channel for frame exchange. +/// +public sealed class InMemoryChannel : IAsyncDisposable +{ + private readonly Channel _gatewayToService; + private readonly Channel _serviceToGateway; + private readonly CancellationTokenSource _cts; + + public string ChannelId { get; } + public string ServiceName { get; } + public string InstanceId { get; } + public ConnectionState State { get; private set; } + public DateTimeOffset CreatedAt { get; } + public DateTimeOffset LastActivityAt { get; private set; } + + public InMemoryChannel(string serviceName, string instanceId) + { + ChannelId = Guid.NewGuid().ToString("N"); + ServiceName = serviceName; + InstanceId = instanceId; + CreatedAt = DateTimeOffset.UtcNow; + LastActivityAt = CreatedAt; + State = ConnectionState.Connecting; + _cts = new CancellationTokenSource(); + + // Bounded channels to provide backpressure + var options = new BoundedChannelOptions(1000) + { + FullMode = BoundedChannelFullMode.Wait, + SingleReader = false, + SingleWriter = false + }; + + _gatewayToService = Channel.CreateBounded(options); + _serviceToGateway = Channel.CreateBounded(options); + } + + /// + /// Gets the writer for sending frames from gateway to service. + /// + public ChannelWriter GatewayWriter => _gatewayToService.Writer; + + /// + /// Gets the reader for receiving frames from gateway (service side). + /// + public ChannelReader ServiceReader => _gatewayToService.Reader; + + /// + /// Gets the writer for sending frames from service to gateway. + /// + public ChannelWriter ServiceWriter => _serviceToGateway.Writer; + + /// + /// Gets the reader for receiving frames from service (gateway side). + /// + public ChannelReader GatewayReader => _serviceToGateway.Reader; + + public void MarkConnected() + { + State = ConnectionState.Connected; + LastActivityAt = DateTimeOffset.UtcNow; + } + + public void UpdateActivity() + { + LastActivityAt = DateTimeOffset.UtcNow; + } + + public async ValueTask DisposeAsync() + { + State = ConnectionState.Disconnected; + _cts.Cancel(); + _gatewayToService.Writer.TryComplete(); + _serviceToGateway.Writer.TryComplete(); + _cts.Dispose(); + } +} +``` + +### InMemory Hub + +```csharp +namespace StellaOps.Router.Transport.InMemory; + +/// +/// Central hub managing all InMemory transport connections. +/// +public sealed class InMemoryTransportHub : IDisposable +{ + private readonly ConcurrentDictionary _channels = new(); + private readonly ConcurrentDictionary> _serviceChannels = new(); + private readonly ILogger _logger; + + public InMemoryTransportHub(ILogger logger) + { + _logger = logger; + } + + /// + /// Creates a new channel for a microservice connection. + /// + public InMemoryChannel CreateChannel(string serviceName, string instanceId) + { + var channel = new InMemoryChannel(serviceName, instanceId); + + if (!_channels.TryAdd(channel.ChannelId, channel)) + { + throw new InvalidOperationException($"Channel {channel.ChannelId} already exists"); + } + + _serviceChannels.AddOrUpdate( + serviceName, + _ => new List { channel.ChannelId }, + (_, list) => { lock (list) { list.Add(channel.ChannelId); } return list; } + ); + + _logger.LogDebug( + "Created InMemory channel {ChannelId} for {ServiceName}/{InstanceId}", + channel.ChannelId, serviceName, instanceId); + + return channel; + } + + /// + /// Gets a channel by ID. + /// + public InMemoryChannel? GetChannel(string channelId) + { + return _channels.TryGetValue(channelId, out var channel) ? channel : null; + } + + /// + /// Gets all channels for a service. + /// + public IReadOnlyList GetServiceChannels(string serviceName) + { + if (!_serviceChannels.TryGetValue(serviceName, out var channelIds)) + return Array.Empty(); + + var result = new List(); + lock (channelIds) + { + foreach (var id in channelIds) + { + if (_channels.TryGetValue(id, out var channel) && + channel.State == ConnectionState.Connected) + { + result.Add(channel); + } + } + } + return result; + } + + /// + /// Removes a channel from the hub. + /// + public async Task RemoveChannelAsync(string channelId) + { + if (_channels.TryRemove(channelId, out var channel)) + { + if (_serviceChannels.TryGetValue(channel.ServiceName, out var list)) + { + lock (list) { list.Remove(channelId); } + } + + await channel.DisposeAsync(); + + _logger.LogDebug("Removed InMemory channel {ChannelId}", channelId); + } + } + + /// + /// Gets all active channels. + /// + public IEnumerable GetAllChannels() + { + return _channels.Values.Where(c => c.State == ConnectionState.Connected); + } + + public void Dispose() + { + foreach (var channel in _channels.Values) + { + _ = channel.DisposeAsync(); + } + _channels.Clear(); + _serviceChannels.Clear(); + } +} +``` + +--- + +## Gateway-Side Client + +```csharp +namespace StellaOps.Router.Transport.InMemory; + +/// +/// Gateway-side client for InMemory transport. +/// +public sealed class InMemoryTransportClient : ITransportClient +{ + private readonly InMemoryTransportHub _hub; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + private readonly ConcurrentDictionary> _pendingRequests = new(); + + public string TransportType => "InMemory"; + + public InMemoryTransportClient( + InMemoryTransportHub hub, + IPayloadSerializer serializer, + ILogger logger) + { + _hub = hub; + _serializer = serializer; + _logger = logger; + } + + public async Task SendRequestAsync( + string serviceName, + RequestPayload request, + TimeSpan timeout, + CancellationToken cancellationToken) + { + var channels = _hub.GetServiceChannels(serviceName); + if (channels.Count == 0) + { + throw new NoAvailableInstanceException(serviceName); + } + + // Simple round-robin selection (in production, use routing plugin) + var channel = channels[Random.Shared.Next(channels.Count)]; + + var correlationId = Guid.NewGuid().ToString("N"); + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + _pendingRequests[correlationId] = tcs; + + try + { + // Create and send request frame + var frame = new Frame + { + Type = FrameType.Request, + CorrelationId = correlationId, + Payload = _serializer.SerializeRequest(request) + }; + + await channel.GatewayWriter.WriteAsync(frame, cancellationToken); + channel.UpdateActivity(); + + // Start listening for response + _ = ListenForResponseAsync(channel, correlationId, cancellationToken); + + // Wait for response with timeout + using var timeoutCts = new CancellationTokenSource(timeout); + using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource( + cancellationToken, timeoutCts.Token); + + try + { + return await tcs.Task.WaitAsync(linkedCts.Token); + } + catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested) + { + // Send cancel frame + await SendCancelAsync(channel, correlationId); + throw new TimeoutException($"Request to {serviceName} timed out after {timeout}"); + } + } + finally + { + _pendingRequests.TryRemove(correlationId, out _); + } + } + + public async IAsyncEnumerable SendStreamingRequestAsync( + string serviceName, + IAsyncEnumerable requestChunks, + TimeSpan timeout, + [EnumeratorCancellation] CancellationToken cancellationToken) + { + var channels = _hub.GetServiceChannels(serviceName); + if (channels.Count == 0) + { + throw new NoAvailableInstanceException(serviceName); + } + + var channel = channels[Random.Shared.Next(channels.Count)]; + var correlationId = Guid.NewGuid().ToString("N"); + + // Send all request chunks + await foreach (var chunk in requestChunks.WithCancellation(cancellationToken)) + { + var frame = new Frame + { + Type = FrameType.Request, + CorrelationId = correlationId, + Payload = _serializer.SerializeRequest(chunk), + Flags = chunk.IsStreaming ? FrameFlags.None : FrameFlags.Final + }; + + await channel.GatewayWriter.WriteAsync(frame, cancellationToken); + channel.UpdateActivity(); + } + + // Read response chunks + await foreach (var frame in channel.GatewayReader.ReadAllAsync(cancellationToken)) + { + if (frame.CorrelationId != correlationId) + continue; + + if (frame.Type == FrameType.Response) + { + var response = _serializer.DeserializeResponse(frame.Payload); + yield return response; + + if (response.IsFinalChunk || frame.Flags.HasFlag(FrameFlags.Final)) + yield break; + } + } + } + + private async Task ListenForResponseAsync( + InMemoryChannel channel, + string correlationId, + CancellationToken cancellationToken) + { + try + { + await foreach (var frame in channel.GatewayReader.ReadAllAsync(cancellationToken)) + { + if (frame.CorrelationId != correlationId) + continue; + + if (frame.Type == FrameType.Response) + { + var response = _serializer.DeserializeResponse(frame.Payload); + + if (_pendingRequests.TryGetValue(correlationId, out var tcs)) + { + tcs.TrySetResult(response); + } + return; + } + } + } + catch (OperationCanceledException) + { + // Expected on cancellation + } + } + + private async Task SendCancelAsync(InMemoryChannel channel, string correlationId) + { + try + { + var cancelFrame = new Frame + { + Type = FrameType.Cancel, + CorrelationId = correlationId, + Payload = Array.Empty() + }; + await channel.GatewayWriter.WriteAsync(cancelFrame); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to send cancel frame for {CorrelationId}", correlationId); + } + } +} +``` + +--- + +## Microservice-Side Server + +```csharp +namespace StellaOps.Router.Transport.InMemory; + +/// +/// Microservice-side server for InMemory transport. +/// +public sealed class InMemoryTransportServer : ITransportServer +{ + private readonly InMemoryTransportHub _hub; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + private InMemoryChannel? _channel; + private CancellationTokenSource? _cts; + private Task? _processingTask; + + public string TransportType => "InMemory"; + public bool IsConnected => _channel?.State == ConnectionState.Connected; + + public event Func>? OnRequest; + public event Func? OnCancel; + + public InMemoryTransportServer( + InMemoryTransportHub hub, + IPayloadSerializer serializer, + ILogger logger) + { + _hub = hub; + _serializer = serializer; + _logger = logger; + } + + public async Task ConnectAsync( + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + CancellationToken cancellationToken) + { + _channel = _hub.CreateChannel(serviceName, instanceId); + _cts = new CancellationTokenSource(); + + // Send HELLO frame + var helloPayload = new HelloPayload + { + ServiceName = serviceName, + InstanceId = instanceId, + Endpoints = endpoints, + Metadata = new Dictionary + { + ["transport"] = "InMemory", + ["pid"] = Environment.ProcessId.ToString() + } + }; + + var helloFrame = new Frame + { + Type = FrameType.Hello, + CorrelationId = Guid.NewGuid().ToString("N"), + Payload = _serializer.SerializeHello(helloPayload) + }; + + await _channel.ServiceWriter.WriteAsync(helloFrame, cancellationToken); + + // Wait for HELLO response + var response = await _channel.ServiceReader.ReadAsync(cancellationToken); + if (response.Type != FrameType.Hello) + { + throw new ProtocolException($"Expected HELLO response, got {response.Type}"); + } + + _channel.MarkConnected(); + _logger.LogInformation( + "InMemory transport connected for {ServiceName}/{InstanceId}", + serviceName, instanceId); + + // Start processing loop + _processingTask = ProcessFramesAsync(_cts.Token); + } + + private async Task ProcessFramesAsync(CancellationToken cancellationToken) + { + if (_channel == null) return; + + try + { + await foreach (var frame in _channel.ServiceReader.ReadAllAsync(cancellationToken)) + { + _channel.UpdateActivity(); + + switch (frame.Type) + { + case FrameType.Request: + _ = HandleRequestAsync(frame, cancellationToken); + break; + + case FrameType.Cancel: + if (OnCancel != null) + { + await OnCancel(frame.CorrelationId, cancellationToken); + } + break; + + case FrameType.Heartbeat: + await HandleHeartbeatAsync(frame); + break; + } + } + } + catch (OperationCanceledException) + { + // Expected on shutdown + } + catch (Exception ex) + { + _logger.LogError(ex, "Error processing InMemory frames"); + } + } + + private async Task HandleRequestAsync(Frame frame, CancellationToken cancellationToken) + { + if (_channel == null || OnRequest == null) return; + + try + { + var request = _serializer.DeserializeRequest(frame.Payload); + var response = await OnRequest(request, cancellationToken); + + var responseFrame = new Frame + { + Type = FrameType.Response, + CorrelationId = frame.CorrelationId, + Payload = _serializer.SerializeResponse(response), + Flags = FrameFlags.Final + }; + + await _channel.ServiceWriter.WriteAsync(responseFrame, cancellationToken); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error handling request {CorrelationId}", frame.CorrelationId); + + // Send error response + var errorResponse = new ResponsePayload + { + StatusCode = 500, + Headers = new Dictionary(), + ErrorMessage = ex.Message, + IsFinalChunk = true + }; + + var errorFrame = new Frame + { + Type = FrameType.Response, + CorrelationId = frame.CorrelationId, + Payload = _serializer.SerializeResponse(errorResponse), + Flags = FrameFlags.Final | FrameFlags.Error + }; + + await _channel.ServiceWriter.WriteAsync(errorFrame, cancellationToken); + } + } + + private async Task HandleHeartbeatAsync(Frame frame) + { + if (_channel == null) return; + + var pongFrame = new Frame + { + Type = FrameType.Heartbeat, + CorrelationId = frame.CorrelationId, + Payload = frame.Payload // Echo back + }; + + await _channel.ServiceWriter.WriteAsync(pongFrame); + } + + public async Task DisconnectAsync() + { + _cts?.Cancel(); + + if (_processingTask != null) + { + try + { + await _processingTask.WaitAsync(TimeSpan.FromSeconds(5)); + } + catch (TimeoutException) + { + _logger.LogWarning("InMemory processing task did not complete in time"); + } + } + + if (_channel != null) + { + await _hub.RemoveChannelAsync(_channel.ChannelId); + } + + _cts?.Dispose(); + } + + public async Task SendHeartbeatAsync(CancellationToken cancellationToken) + { + if (_channel == null || _channel.State != ConnectionState.Connected) + return; + + var heartbeatFrame = new Frame + { + Type = FrameType.Heartbeat, + CorrelationId = Guid.NewGuid().ToString("N"), + Payload = BitConverter.GetBytes(DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()) + }; + + await _channel.ServiceWriter.WriteAsync(heartbeatFrame, cancellationToken); + } +} +``` + +--- + +## Integration with Global Routing State + +```csharp +namespace StellaOps.Router.Transport.InMemory; + +/// +/// InMemory transport integration with gateway routing state. +/// +public sealed class InMemoryRoutingIntegration : IHostedService +{ + private readonly InMemoryTransportHub _hub; + private readonly IGlobalRoutingState _routingState; + private readonly ILogger _logger; + private Timer? _syncTimer; + + public InMemoryRoutingIntegration( + InMemoryTransportHub hub, + IGlobalRoutingState routingState, + ILogger logger) + { + _hub = hub; + _routingState = routingState; + _logger = logger; + } + + public Task StartAsync(CancellationToken cancellationToken) + { + // Sync InMemory channels with routing state periodically + _syncTimer = new Timer(SyncChannels, null, TimeSpan.Zero, TimeSpan.FromSeconds(5)); + return Task.CompletedTask; + } + + private void SyncChannels(object? state) + { + try + { + foreach (var channel in _hub.GetAllChannels()) + { + var connection = new EndpointConnection + { + ServiceName = channel.ServiceName, + InstanceId = channel.InstanceId, + ConnectionId = channel.ChannelId, + Transport = "InMemory", + State = channel.State, + LastHeartbeat = channel.LastActivityAt + }; + + _routingState.UpdateConnection(connection); + } + } + catch (Exception ex) + { + _logger.LogError(ex, "Error syncing InMemory channels"); + } + } + + public Task StopAsync(CancellationToken cancellationToken) + { + _syncTimer?.Dispose(); + return Task.CompletedTask; + } +} +``` + +--- + +## Service Registration + +```csharp +namespace StellaOps.Router.Transport.InMemory; + +public static class InMemoryTransportExtensions +{ + /// + /// Adds InMemory transport to the gateway. + /// + public static IServiceCollection AddInMemoryTransport(this IServiceCollection services) + { + services.AddSingleton(); + services.AddSingleton(); + services.AddHostedService(); + + return services; + } + + /// + /// Adds InMemory transport to a microservice. + /// + public static IServiceCollection AddInMemoryMicroserviceTransport( + this IServiceCollection services, + Action? configure = null) + { + var options = new InMemoryTransportOptions(); + configure?.Invoke(options); + + services.AddSingleton(options); + services.AddSingleton(); + + return services; + } +} + +public class InMemoryTransportOptions +{ + public int MaxPendingRequests { get; set; } = 1000; + public TimeSpan ConnectionTimeout { get; set; } = TimeSpan.FromSeconds(30); +} +``` + +--- + +## Testing Utilities + +```csharp +namespace StellaOps.Router.Transport.InMemory.Testing; + +/// +/// Test fixture for InMemory transport testing. +/// +public sealed class InMemoryTransportFixture : IAsyncDisposable +{ + private readonly InMemoryTransportHub _hub; + private readonly ILoggerFactory _loggerFactory; + + public InMemoryTransportHub Hub => _hub; + + public InMemoryTransportFixture() + { + _loggerFactory = LoggerFactory.Create(b => b.AddConsole()); + _hub = new InMemoryTransportHub(_loggerFactory.CreateLogger()); + } + + public InMemoryTransportClient CreateClient() + { + var serializer = new MessagePackPayloadSerializer(); + return new InMemoryTransportClient( + _hub, + serializer, + _loggerFactory.CreateLogger()); + } + + public InMemoryTransportServer CreateServer() + { + var serializer = new MessagePackPayloadSerializer(); + return new InMemoryTransportServer( + _hub, + serializer, + _loggerFactory.CreateLogger()); + } + + public async ValueTask DisposeAsync() + { + _hub.Dispose(); + _loggerFactory.Dispose(); + } +} +``` + +--- + +## Unit Tests + +```csharp +public class InMemoryTransportTests +{ + [Fact] + public async Task SimpleRequestResponse_Works() + { + await using var fixture = new InMemoryTransportFixture(); + var client = fixture.CreateClient(); + var server = fixture.CreateServer(); + + // Setup server + server.OnRequest += (request, ct) => Task.FromResult(new ResponsePayload + { + StatusCode = 200, + Headers = new Dictionary(), + Body = Encoding.UTF8.GetBytes($"Hello {request.Path}") + }); + + await server.ConnectAsync("test-service", "instance-1", Array.Empty(), default); + + // Send request + var response = await client.SendRequestAsync( + "test-service", + new RequestPayload + { + Method = "GET", + Path = "/test", + Headers = new Dictionary(), + Claims = new Dictionary() + }, + TimeSpan.FromSeconds(5), + default); + + Assert.Equal(200, response.StatusCode); + Assert.Equal("Hello /test", Encoding.UTF8.GetString(response.Body!)); + } + + [Fact] + public async Task Cancellation_SendsCancelFrame() + { + await using var fixture = new InMemoryTransportFixture(); + var client = fixture.CreateClient(); + var server = fixture.CreateServer(); + + var cancelReceived = new TaskCompletionSource(); + + server.OnRequest += async (request, ct) => + { + await Task.Delay(TimeSpan.FromSeconds(30), ct); + return new ResponsePayload { StatusCode = 200, Headers = new Dictionary() }; + }; + + server.OnCancel += (correlationId, ct) => + { + cancelReceived.TrySetResult(true); + return Task.CompletedTask; + }; + + await server.ConnectAsync("test-service", "instance-1", Array.Empty(), default); + + // Send request with short timeout + await Assert.ThrowsAsync(() => + client.SendRequestAsync( + "test-service", + new RequestPayload { Method = "GET", Path = "/slow", Headers = new Dictionary(), Claims = new Dictionary() }, + TimeSpan.FromMilliseconds(100), + default)); + + // Verify cancel was received + var result = await cancelReceived.Task.WaitAsync(TimeSpan.FromSeconds(1)); + Assert.True(result); + } + + [Fact] + public async Task MultipleInstances_DistributesRequests() + { + await using var fixture = new InMemoryTransportFixture(); + var client = fixture.CreateClient(); + var server1 = fixture.CreateServer(); + var server2 = fixture.CreateServer(); + + var server1Count = 0; + var server2Count = 0; + + server1.OnRequest += (r, ct) => + { + Interlocked.Increment(ref server1Count); + return Task.FromResult(new ResponsePayload { StatusCode = 200, Headers = new Dictionary() }); + }; + + server2.OnRequest += (r, ct) => + { + Interlocked.Increment(ref server2Count); + return Task.FromResult(new ResponsePayload { StatusCode = 200, Headers = new Dictionary() }); + }; + + await server1.ConnectAsync("test-service", "instance-1", Array.Empty(), default); + await server2.ConnectAsync("test-service", "instance-2", Array.Empty(), default); + + // Send multiple requests + for (int i = 0; i < 100; i++) + { + await client.SendRequestAsync( + "test-service", + new RequestPayload { Method = "GET", Path = "/test", Headers = new Dictionary(), Claims = new Dictionary() }, + TimeSpan.FromSeconds(5), + default); + } + + // Both instances should have received requests + Assert.True(server1Count > 0); + Assert.True(server2Count > 0); + Assert.Equal(100, server1Count + server2Count); + } +} +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Transport.InMemory/InMemoryChannel.cs` +2. `StellaOps.Router.Transport.InMemory/InMemoryTransportHub.cs` +3. `StellaOps.Router.Transport.InMemory/InMemoryTransportClient.cs` +4. `StellaOps.Router.Transport.InMemory/InMemoryTransportServer.cs` +5. `StellaOps.Router.Transport.InMemory/InMemoryRoutingIntegration.cs` +6. `StellaOps.Router.Transport.InMemory/InMemoryTransportExtensions.cs` +7. `StellaOps.Router.Transport.InMemory.Testing/InMemoryTransportFixture.cs` +8. Unit tests for all frame types +9. Integration tests for request/response patterns +10. Streaming tests + +--- + +## Next Step + +Proceed to [Step 14: TCP Transport Implementation](14-Step.md) to implement the primary production transport. diff --git a/docs/router/14-Step.md b/docs/router/14-Step.md new file mode 100644 index 000000000..4d202d568 --- /dev/null +++ b/docs/router/14-Step.md @@ -0,0 +1,1054 @@ +# Step 14: TCP Transport Implementation + +**Phase 3: Transport Layer** +**Estimated Complexity:** High +**Dependencies:** Step 13 (InMemory Transport) + +--- + +## Overview + +The TCP transport is the primary production transport for connecting microservices to the gateway. It provides reliable, ordered frame delivery over persistent connections with efficient binary framing, connection multiplexing, and automatic reconnection. + +--- + +## Goals + +1. Implement efficient binary frame encoding over TCP +2. Support connection multiplexing for high throughput +3. Implement automatic reconnection with exponential backoff +4. Handle partial reads/writes correctly +5. Integrate with .NET's socket pooling and buffer management + +--- + +## Wire Protocol + +### Frame Layout + +``` +┌────────────────────────────────────────────────────────────────┐ +│ TCP Frame Format │ +├──────────┬──────────┬──────────┬──────────┬───────────────────┤ +│ Magic │ Flags │ Type │ Length │ Correlation │ +│ (2B) │ (1B) │ (1B) │ (4B) │ ID (16B) │ +├──────────┴──────────┴──────────┴──────────┴───────────────────┤ +│ Payload │ +│ (Length bytes) │ +└────────────────────────────────────────────────────────────────┘ + +Total Header: 24 bytes +Magic: 0x53 0x52 ("SR" - Stella Router) +Flags: Compression, Final, Error +Type: Hello=1, Heartbeat=2, Request=3, Response=4, Cancel=5 +Length: uint32, big-endian (max 16MB) +Correlation ID: 16 bytes (GUID) +Payload: Variable length +``` + +--- + +## Core Types + +### TCP Frame Codec + +```csharp +namespace StellaOps.Router.Transport.Tcp; + +/// +/// Encodes and decodes frames for TCP wire format. +/// +public sealed class TcpFrameCodec +{ + private const ushort Magic = 0x5352; // "SR" + private const int HeaderSize = 24; + private const int MaxPayloadSize = 16 * 1024 * 1024; // 16MB + + private readonly ArrayPool _bufferPool; + + public TcpFrameCodec(ArrayPool? bufferPool = null) + { + _bufferPool = bufferPool ?? ArrayPool.Shared; + } + + /// + /// Encodes a frame to wire format. + /// + public int Encode(Frame frame, Span destination) + { + if (destination.Length < HeaderSize + frame.Payload.Length) + throw new ArgumentException("Destination buffer too small"); + + var offset = 0; + + // Magic (2 bytes) + BinaryPrimitives.WriteUInt16BigEndian(destination[offset..], Magic); + offset += 2; + + // Flags (1 byte) + destination[offset++] = (byte)frame.Flags; + + // Type (1 byte) + destination[offset++] = (byte)frame.Type; + + // Length (4 bytes) + BinaryPrimitives.WriteUInt32BigEndian(destination[offset..], (uint)frame.Payload.Length); + offset += 4; + + // Correlation ID (16 bytes) + if (Guid.TryParse(frame.CorrelationId, out var guid)) + { + guid.TryWriteBytes(destination[offset..]); + } + offset += 16; + + // Payload + frame.Payload.AsSpan().CopyTo(destination[offset..]); + offset += frame.Payload.Length; + + return offset; + } + + /// + /// Decodes a frame from wire format. + /// + public Frame Decode(ReadOnlySpan source) + { + if (source.Length < HeaderSize) + throw new ProtocolException("Incomplete frame header"); + + var offset = 0; + + // Magic + var magic = BinaryPrimitives.ReadUInt16BigEndian(source[offset..]); + if (magic != Magic) + throw new ProtocolException($"Invalid magic: 0x{magic:X4}"); + offset += 2; + + // Flags + var flags = (FrameFlags)source[offset++]; + + // Type + var type = (FrameType)source[offset++]; + + // Length + var length = BinaryPrimitives.ReadUInt32BigEndian(source[offset..]); + if (length > MaxPayloadSize) + throw new ProtocolException($"Payload too large: {length}"); + offset += 4; + + // Correlation ID + var correlationId = new Guid(source.Slice(offset, 16)).ToString("N"); + offset += 16; + + // Verify we have full payload + if (source.Length < HeaderSize + length) + throw new ProtocolException("Incomplete payload"); + + // Payload + var payload = source.Slice(offset, (int)length).ToArray(); + + return new Frame + { + Type = type, + Flags = flags, + CorrelationId = correlationId, + Payload = payload + }; + } + + /// + /// Attempts to decode a frame from a buffer, returning bytes consumed. + /// + public bool TryDecode(ReadOnlySequence buffer, out Frame frame, out int bytesConsumed) + { + frame = default!; + bytesConsumed = 0; + + if (buffer.Length < HeaderSize) + return false; + + // Read header to get length + Span header = stackalloc byte[HeaderSize]; + buffer.Slice(0, HeaderSize).CopyTo(header); + + var length = BinaryPrimitives.ReadUInt32BigEndian(header[4..]); + var totalLength = HeaderSize + (int)length; + + if (buffer.Length < totalLength) + return false; + + // Decode full frame + var frameBytes = new byte[totalLength]; + buffer.Slice(0, totalLength).CopyTo(frameBytes); + frame = Decode(frameBytes); + bytesConsumed = totalLength; + + return true; + } +} +``` + +### TCP Connection + +```csharp +namespace StellaOps.Router.Transport.Tcp; + +/// +/// Represents a TCP connection with frame-based I/O. +/// +public sealed class TcpFrameConnection : IAsyncDisposable +{ + private readonly Socket _socket; + private readonly NetworkStream _stream; + private readonly TcpFrameCodec _codec; + private readonly ILogger _logger; + private readonly SemaphoreSlim _writeLock = new(1, 1); + private readonly byte[] _readBuffer; + private readonly byte[] _writeBuffer; + private int _readBufferOffset; + private int _readBufferCount; + + public string ConnectionId { get; } + public EndPoint? RemoteEndPoint => _socket.RemoteEndPoint; + public bool IsConnected => _socket.Connected; + + public TcpFrameConnection( + Socket socket, + TcpFrameCodec codec, + ILogger logger) + { + _socket = socket; + _stream = new NetworkStream(socket, ownsSocket: false); + _codec = codec; + _logger = logger; + _readBuffer = new byte[64 * 1024]; // 64KB read buffer + _writeBuffer = new byte[64 * 1024]; // 64KB write buffer + ConnectionId = Guid.NewGuid().ToString("N"); + + // Configure socket options + _socket.NoDelay = true; // Disable Nagle's algorithm + _socket.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.KeepAlive, true); + } + + /// + /// Sends a frame over the connection. + /// + public async ValueTask SendAsync(Frame frame, CancellationToken cancellationToken) + { + await _writeLock.WaitAsync(cancellationToken); + try + { + var size = _codec.Encode(frame, _writeBuffer); + await _stream.WriteAsync(_writeBuffer.AsMemory(0, size), cancellationToken); + await _stream.FlushAsync(cancellationToken); + } + finally + { + _writeLock.Release(); + } + } + + /// + /// Receives a frame from the connection. + /// + public async ValueTask ReceiveAsync(CancellationToken cancellationToken) + { + while (true) + { + // Try to decode from existing buffer + if (_readBufferCount >= 24) // Minimum header size + { + var span = new ReadOnlySpan(_readBuffer, _readBufferOffset, _readBufferCount); + + // Check if we have a complete frame + if (span.Length >= 8) + { + var payloadLength = BinaryPrimitives.ReadUInt32BigEndian(span[4..]); + var totalLength = 24 + (int)payloadLength; + + if (span.Length >= totalLength) + { + var frame = _codec.Decode(span[..totalLength]); + _readBufferOffset += totalLength; + _readBufferCount -= totalLength; + + // Compact buffer if needed + if (_readBufferOffset > _readBuffer.Length / 2) + { + Buffer.BlockCopy(_readBuffer, _readBufferOffset, _readBuffer, 0, _readBufferCount); + _readBufferOffset = 0; + } + + return frame; + } + } + } + + // Need more data + if (_readBufferOffset + _readBufferCount >= _readBuffer.Length) + { + // Compact buffer + Buffer.BlockCopy(_readBuffer, _readBufferOffset, _readBuffer, 0, _readBufferCount); + _readBufferOffset = 0; + } + + var bytesRead = await _stream.ReadAsync( + _readBuffer.AsMemory(_readBufferOffset + _readBufferCount), + cancellationToken); + + if (bytesRead == 0) + { + throw new EndOfStreamException("Connection closed by remote"); + } + + _readBufferCount += bytesRead; + } + } + + /// + /// Receives frames as an async enumerable. + /// + public async IAsyncEnumerable ReceiveAllAsync( + [EnumeratorCancellation] CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + Frame frame; + try + { + frame = await ReceiveAsync(cancellationToken); + } + catch (EndOfStreamException) + { + yield break; + } + catch (OperationCanceledException) + { + yield break; + } + + yield return frame; + } + } + + public async ValueTask DisposeAsync() + { + _writeLock.Dispose(); + await _stream.DisposeAsync(); + _socket.Dispose(); + } +} +``` + +--- + +## Gateway-Side TCP Server + +```csharp +namespace StellaOps.Router.Transport.Tcp; + +/// +/// TCP server running on the gateway to accept microservice connections. +/// +public sealed class TcpTransportServer : IHostedService +{ + private readonly TcpTransportConfig _config; + private readonly TcpFrameCodec _codec; + private readonly IGlobalRoutingState _routingState; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + private Socket? _listener; + private CancellationTokenSource? _cts; + private readonly ConcurrentDictionary _connections = new(); + + public TcpTransportServer( + IOptions config, + TcpFrameCodec codec, + IGlobalRoutingState routingState, + IPayloadSerializer serializer, + ILogger logger) + { + _config = config.Value; + _codec = codec; + _routingState = routingState; + _serializer = serializer; + _logger = logger; + } + + public async Task StartAsync(CancellationToken cancellationToken) + { + _cts = new CancellationTokenSource(); + + _listener = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + _listener.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, true); + _listener.Bind(new IPEndPoint(IPAddress.Parse(_config.ListenAddress), _config.Port)); + _listener.Listen(_config.Backlog); + + _logger.LogInformation( + "TCP transport server listening on {Address}:{Port}", + _config.ListenAddress, _config.Port); + + _ = AcceptConnectionsAsync(_cts.Token); + } + + private async Task AcceptConnectionsAsync(CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + try + { + var socket = await _listener!.AcceptAsync(cancellationToken); + _logger.LogDebug("Accepted connection from {RemoteEndPoint}", socket.RemoteEndPoint); + + _ = HandleConnectionAsync(socket, cancellationToken); + } + catch (OperationCanceledException) + { + break; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error accepting connection"); + } + } + } + + private async Task HandleConnectionAsync(Socket socket, CancellationToken cancellationToken) + { + var connection = new TcpFrameConnection(socket, _codec, _logger); + + try + { + // Wait for HELLO frame + var helloFrame = await connection.ReceiveAsync(cancellationToken) + .AsTask() + .WaitAsync(TimeSpan.FromSeconds(_config.HandshakeTimeoutSeconds), cancellationToken); + + if (helloFrame.Type != FrameType.Hello) + { + _logger.LogWarning("Expected HELLO frame, got {Type}", helloFrame.Type); + return; + } + + var hello = _serializer.DeserializeHello(helloFrame.Payload); + _logger.LogInformation( + "Microservice connected: {ServiceName}/{InstanceId}", + hello.ServiceName, hello.InstanceId); + + // Send HELLO response + var helloResponse = new HelloResponse + { + Accepted = true, + HeartbeatIntervalMs = _config.HeartbeatIntervalMs, + MaxPayloadSize = _config.MaxPayloadSize + }; + + var responseFrame = new Frame + { + Type = FrameType.Hello, + CorrelationId = helloFrame.CorrelationId, + Payload = _serializer.SerializeHelloResponse(helloResponse) + }; + await connection.SendAsync(responseFrame, cancellationToken); + + // Create connection wrapper + var msConnection = new TcpMicroserviceConnection( + connection, + hello.ServiceName, + hello.InstanceId, + hello.Endpoints, + _serializer, + _logger); + + _connections[connection.ConnectionId] = msConnection; + + // Register with routing state + _routingState.RegisterConnection(new EndpointConnection + { + ConnectionId = connection.ConnectionId, + ServiceName = hello.ServiceName, + InstanceId = hello.InstanceId, + Transport = "TCP", + State = ConnectionState.Connected, + Endpoints = hello.Endpoints, + Region = hello.Metadata?.GetValueOrDefault("region"), + LastHeartbeat = DateTimeOffset.UtcNow + }); + + // Process frames + await msConnection.ProcessAsync(cancellationToken); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error handling connection {ConnectionId}", connection.ConnectionId); + } + finally + { + _connections.TryRemove(connection.ConnectionId, out _); + _routingState.RemoveConnection(connection.ConnectionId); + await connection.DisposeAsync(); + } + } + + /// + /// Gets a connection for sending requests to a service instance. + /// + public TcpMicroserviceConnection? GetConnection(string connectionId) + { + return _connections.TryGetValue(connectionId, out var conn) ? conn : null; + } + + public async Task StopAsync(CancellationToken cancellationToken) + { + _cts?.Cancel(); + _listener?.Close(); + + foreach (var connection in _connections.Values) + { + await connection.DisconnectAsync(); + } + + _cts?.Dispose(); + } +} + +/// +/// Represents an active microservice connection. +/// +public sealed class TcpMicroserviceConnection +{ + private readonly TcpFrameConnection _connection; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + private readonly ConcurrentDictionary> _pendingRequests = new(); + private DateTimeOffset _lastActivity; + + public string ServiceName { get; } + public string InstanceId { get; } + public EndpointDescriptor[] Endpoints { get; } + public DateTimeOffset LastActivity => _lastActivity; + + public TcpMicroserviceConnection( + TcpFrameConnection connection, + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + IPayloadSerializer serializer, + ILogger logger) + { + _connection = connection; + ServiceName = serviceName; + InstanceId = instanceId; + Endpoints = endpoints; + _serializer = serializer; + _logger = logger; + _lastActivity = DateTimeOffset.UtcNow; + } + + public async Task ProcessAsync(CancellationToken cancellationToken) + { + await foreach (var frame in _connection.ReceiveAllAsync(cancellationToken)) + { + _lastActivity = DateTimeOffset.UtcNow; + + switch (frame.Type) + { + case FrameType.Response: + if (_pendingRequests.TryRemove(frame.CorrelationId, out var tcs)) + { + tcs.TrySetResult(frame); + } + break; + + case FrameType.Heartbeat: + // Microservice sent heartbeat response + break; + } + } + } + + public async Task SendRequestAsync( + RequestPayload request, + TimeSpan timeout, + CancellationToken cancellationToken) + { + var correlationId = Guid.NewGuid().ToString("N"); + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + _pendingRequests[correlationId] = tcs; + + try + { + var frame = new Frame + { + Type = FrameType.Request, + CorrelationId = correlationId, + Payload = _serializer.SerializeRequest(request) + }; + + await _connection.SendAsync(frame, cancellationToken); + + using var timeoutCts = new CancellationTokenSource(timeout); + using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, timeoutCts.Token); + + try + { + var responseFrame = await tcs.Task.WaitAsync(linkedCts.Token); + return _serializer.DeserializeResponse(responseFrame.Payload); + } + catch (OperationCanceledException) when (timeoutCts.IsCancellationRequested) + { + // Send cancel + await SendCancelAsync(correlationId, CancellationToken.None); + throw new TimeoutException($"Request timed out after {timeout}"); + } + } + finally + { + _pendingRequests.TryRemove(correlationId, out _); + } + } + + private async Task SendCancelAsync(string correlationId, CancellationToken cancellationToken) + { + try + { + var cancelFrame = new Frame + { + Type = FrameType.Cancel, + CorrelationId = correlationId, + Payload = Array.Empty() + }; + await _connection.SendAsync(cancelFrame, cancellationToken); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to send cancel frame"); + } + } + + public async Task DisconnectAsync() + { + foreach (var pending in _pendingRequests.Values) + { + pending.TrySetCanceled(); + } + _pendingRequests.Clear(); + await _connection.DisposeAsync(); + } +} +``` + +--- + +## Microservice-Side TCP Client + +```csharp +namespace StellaOps.Router.Transport.Tcp; + +/// +/// TCP client for microservices to connect to the gateway. +/// +public sealed class TcpTransportClient : ITransportServer, IAsyncDisposable +{ + private readonly TcpClientConfig _config; + private readonly TcpFrameCodec _codec; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + private TcpFrameConnection? _connection; + private CancellationTokenSource? _cts; + private Task? _processingTask; + private int _reconnectAttempts; + + public string TransportType => "TCP"; + public bool IsConnected => _connection?.IsConnected ?? false; + + public event Func>? OnRequest; + public event Func? OnCancel; + + public TcpTransportClient( + IOptions config, + TcpFrameCodec codec, + IPayloadSerializer serializer, + ILogger logger) + { + _config = config.Value; + _codec = codec; + _serializer = serializer; + _logger = logger; + } + + public async Task ConnectAsync( + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + CancellationToken cancellationToken) + { + _cts = new CancellationTokenSource(); + + await ConnectWithRetryAsync(serviceName, instanceId, endpoints, cancellationToken); + + // Start processing loop + _processingTask = ProcessFramesAsync(serviceName, instanceId, endpoints, _cts.Token); + } + + private async Task ConnectWithRetryAsync( + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + try + { + var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + + await socket.ConnectAsync(_config.GatewayHost, _config.GatewayPort, cancellationToken); + + _connection = new TcpFrameConnection(socket, _codec, _logger); + + // Send HELLO + var hello = new HelloPayload + { + ServiceName = serviceName, + InstanceId = instanceId, + Endpoints = endpoints, + Metadata = new Dictionary + { + ["region"] = _config.Region ?? "default", + ["version"] = _config.ServiceVersion ?? "1.0.0" + } + }; + + var helloFrame = new Frame + { + Type = FrameType.Hello, + CorrelationId = Guid.NewGuid().ToString("N"), + Payload = _serializer.SerializeHello(hello) + }; + + await _connection.SendAsync(helloFrame, cancellationToken); + + // Wait for response + var response = await _connection.ReceiveAsync(cancellationToken); + if (response.Type != FrameType.Hello) + { + throw new ProtocolException($"Expected HELLO response, got {response.Type}"); + } + + _reconnectAttempts = 0; + _logger.LogInformation( + "Connected to gateway at {Host}:{Port}", + _config.GatewayHost, _config.GatewayPort); + + return; + } + catch (Exception ex) when (!cancellationToken.IsCancellationRequested) + { + _reconnectAttempts++; + var delay = Math.Min( + _config.InitialReconnectDelayMs * Math.Pow(2, _reconnectAttempts - 1), + _config.MaxReconnectDelayMs); + + _logger.LogWarning( + ex, + "Connection attempt {Attempt} failed, retrying in {Delay}ms", + _reconnectAttempts, delay); + + await Task.Delay((int)delay, cancellationToken); + } + } + } + + private async Task ProcessFramesAsync( + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + try + { + if (_connection == null || !_connection.IsConnected) + { + await ConnectWithRetryAsync(serviceName, instanceId, endpoints, cancellationToken); + } + + await foreach (var frame in _connection!.ReceiveAllAsync(cancellationToken)) + { + switch (frame.Type) + { + case FrameType.Request: + _ = HandleRequestAsync(frame, cancellationToken); + break; + + case FrameType.Cancel: + if (OnCancel != null) + { + await OnCancel(frame.CorrelationId, cancellationToken); + } + break; + + case FrameType.Heartbeat: + await HandleHeartbeatAsync(frame); + break; + } + } + } + catch (EndOfStreamException) + { + _logger.LogWarning("Connection closed, attempting reconnect"); + _connection = null; + } + catch (OperationCanceledException) + { + break; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error processing frames"); + _connection = null; + } + } + } + + private async Task HandleRequestAsync(Frame frame, CancellationToken cancellationToken) + { + if (_connection == null || OnRequest == null) return; + + try + { + var request = _serializer.DeserializeRequest(frame.Payload); + var response = await OnRequest(request, cancellationToken); + + var responseFrame = new Frame + { + Type = FrameType.Response, + CorrelationId = frame.CorrelationId, + Payload = _serializer.SerializeResponse(response), + Flags = FrameFlags.Final + }; + + await _connection.SendAsync(responseFrame, cancellationToken); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error handling request"); + + var errorResponse = new ResponsePayload + { + StatusCode = 500, + Headers = new Dictionary(), + ErrorMessage = ex.Message, + IsFinalChunk = true + }; + + var errorFrame = new Frame + { + Type = FrameType.Response, + CorrelationId = frame.CorrelationId, + Payload = _serializer.SerializeResponse(errorResponse), + Flags = FrameFlags.Final | FrameFlags.Error + }; + + await _connection.SendAsync(errorFrame, cancellationToken); + } + } + + private async Task HandleHeartbeatAsync(Frame frame) + { + if (_connection == null) return; + + var pongFrame = new Frame + { + Type = FrameType.Heartbeat, + CorrelationId = frame.CorrelationId, + Payload = frame.Payload + }; + + await _connection.SendAsync(pongFrame, CancellationToken.None); + } + + public async Task DisconnectAsync() + { + _cts?.Cancel(); + + if (_processingTask != null) + { + try + { + await _processingTask.WaitAsync(TimeSpan.FromSeconds(5)); + } + catch { } + } + + if (_connection != null) + { + await _connection.DisposeAsync(); + } + + _cts?.Dispose(); + } + + public async ValueTask DisposeAsync() + { + await DisconnectAsync(); + } +} +``` + +--- + +## Configuration + +```csharp +namespace StellaOps.Router.Transport.Tcp; + +public class TcpTransportConfig +{ + public string ListenAddress { get; set; } = "0.0.0.0"; + public int Port { get; set; } = 9500; + public int Backlog { get; set; } = 100; + public int HandshakeTimeoutSeconds { get; set; } = 30; + public int HeartbeatIntervalMs { get; set; } = 10000; + public int MaxPayloadSize { get; set; } = 16 * 1024 * 1024; +} + +public class TcpClientConfig +{ + public string GatewayHost { get; set; } = "localhost"; + public int GatewayPort { get; set; } = 9500; + public string? Region { get; set; } + public string? ServiceVersion { get; set; } + public int InitialReconnectDelayMs { get; set; } = 1000; + public int MaxReconnectDelayMs { get; set; } = 30000; + public int ConnectionTimeoutMs { get; set; } = 10000; +} +``` + +--- + +## YAML Configuration + +```yaml +# Gateway config +TcpTransport: + ListenAddress: "0.0.0.0" + Port: 9500 + Backlog: 100 + HandshakeTimeoutSeconds: 30 + HeartbeatIntervalMs: 10000 + MaxPayloadSize: 16777216 # 16MB + +# Microservice config +TcpClient: + GatewayHost: "gateway.internal" + GatewayPort: 9500 + Region: "us-east-1" + ServiceVersion: "1.0.0" + InitialReconnectDelayMs: 1000 + MaxReconnectDelayMs: 30000 +``` + +--- + +## Service Registration + +```csharp +namespace StellaOps.Router.Transport.Tcp; + +public static class TcpTransportExtensions +{ + public static IServiceCollection AddTcpTransport( + this IServiceCollection services, + IConfiguration configuration) + { + services.Configure( + configuration.GetSection("TcpTransport")); + + services.AddSingleton(); + services.AddSingleton(); + services.AddHostedService(sp => sp.GetRequiredService()); + + return services; + } + + public static IServiceCollection AddTcpMicroserviceTransport( + this IServiceCollection services, + IConfiguration configuration) + { + services.Configure( + configuration.GetSection("TcpClient")); + + services.AddSingleton(); + services.AddSingleton(); + + return services; + } +} +``` + +--- + +## Unit Tests + +```csharp +public class TcpFrameCodecTests +{ + [Fact] + public void Encode_Decode_RoundTrips() + { + var codec = new TcpFrameCodec(); + var original = new Frame + { + Type = FrameType.Request, + CorrelationId = Guid.NewGuid().ToString("N"), + Payload = Encoding.UTF8.GetBytes("test payload"), + Flags = FrameFlags.Compressed + }; + + var buffer = new byte[1024]; + var length = codec.Encode(original, buffer); + var decoded = codec.Decode(buffer.AsSpan(0, length)); + + Assert.Equal(original.Type, decoded.Type); + Assert.Equal(original.CorrelationId, decoded.CorrelationId); + Assert.Equal(original.Payload, decoded.Payload); + Assert.Equal(original.Flags, decoded.Flags); + } + + [Fact] + public void Decode_ThrowsOnInvalidMagic() + { + var codec = new TcpFrameCodec(); + var buffer = new byte[24]; + buffer[0] = 0xFF; + buffer[1] = 0xFF; + + Assert.Throws(() => codec.Decode(buffer)); + } +} +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Transport.Tcp/TcpFrameCodec.cs` +2. `StellaOps.Router.Transport.Tcp/TcpFrameConnection.cs` +3. `StellaOps.Router.Transport.Tcp/TcpTransportServer.cs` +4. `StellaOps.Router.Transport.Tcp/TcpMicroserviceConnection.cs` +5. `StellaOps.Router.Transport.Tcp/TcpTransportClient.cs` +6. `StellaOps.Router.Transport.Tcp/TcpTransportConfig.cs` +7. `StellaOps.Router.Transport.Tcp/TcpTransportExtensions.cs` +8. Wire format encoding/decoding tests +9. Connection lifecycle tests +10. Reconnection tests + +--- + +## Next Step + +Proceed to [Step 15: TLS Transport Implementation](15-Step.md) to add TLS encryption on top of TCP. diff --git a/docs/router/15-Step.md b/docs/router/15-Step.md new file mode 100644 index 000000000..a4098accb --- /dev/null +++ b/docs/router/15-Step.md @@ -0,0 +1,1156 @@ +# Step 15: TLS Transport Implementation + +**Phase 3: Transport Layer** +**Estimated Complexity:** Medium +**Dependencies:** Step 14 (TCP Transport) + +--- + +## Overview + +The TLS transport wraps TCP with mutual TLS (mTLS) authentication for secure microservice-to-gateway communication. It provides encryption, server/client authentication, and certificate-based identity for production deployments. + +--- + +## Goals + +1. Add TLS encryption layer on top of TCP transport +2. Support mutual TLS (mTLS) for bidirectional authentication +3. Support certificate rotation without service restart +4. Integrate with platform certificate stores and custom CAs +5. Provide clear certificate validation error messages + +--- + +## Core Architecture + +``` +┌──────────────────────────────────────────────────────────────┐ +│ TLS Transport Stack │ +├──────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────┐ ┌─────────────┐ │ +│ │ Microservice│ │ Gateway │ │ +│ │ Client │◄──── mTLS ────────►│ Server │ │ +│ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ +│ ┌──────▼──────┐ ┌──────▼──────┐ │ +│ │ SslStream │ │ SslStream │ │ +│ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ +│ ┌──────▼──────┐ ┌──────▼──────┐ │ +│ │ Socket │ │ Socket │ │ +│ └─────────────┘ └─────────────┘ │ +│ │ +└──────────────────────────────────────────────────────────────┘ +``` + +--- + +## Configuration + +```csharp +namespace StellaOps.Router.Transport.Tls; + +public class TlsTransportConfig : TcpTransportConfig +{ + /// Path to server certificate PFX/P12 file. + public string? CertificatePath { get; set; } + + /// Password for the certificate file. + public string? CertificatePassword { get; set; } + + /// Thumbprint to load certificate from Windows certificate store. + public string? CertificateThumbprint { get; set; } + + /// Store name for certificate lookup. + public string CertificateStoreName { get; set; } = "My"; + + /// Store location for certificate lookup. + public string CertificateStoreLocation { get; set; } = "CurrentUser"; + + /// Whether to require client certificates (mTLS). + public bool RequireClientCertificate { get; set; } = true; + + /// Path to CA certificate for client validation. + public string? ClientCaCertificatePath { get; set; } + + /// Allowed TLS protocols. + public SslProtocols AllowedProtocols { get; set; } = SslProtocols.Tls12 | SslProtocols.Tls13; + + /// Certificate revocation check mode. + public X509RevocationMode RevocationMode { get; set; } = X509RevocationMode.Online; + + /// Whether to allow untrusted root certificates (dev only). + public bool AllowUntrustedRootCertificates { get; set; } = false; +} + +public class TlsClientConfig : TcpClientConfig +{ + /// Path to client certificate PFX/P12 file. + public string? ClientCertificatePath { get; set; } + + /// Password for the client certificate file. + public string? ClientCertificatePassword { get; set; } + + /// Thumbprint to load client certificate from store. + public string? ClientCertificateThumbprint { get; set; } + + /// Expected server certificate CN/SAN for validation. + public string? ExpectedServerName { get; set; } + + /// Path to CA certificate for server validation. + public string? ServerCaCertificatePath { get; set; } + + /// Whether to skip server certificate validation (dev only). + public bool SkipServerCertificateValidation { get; set; } = false; +} +``` + +--- + +## Certificate Provider + +```csharp +namespace StellaOps.Router.Transport.Tls; + +/// +/// Provides certificates for TLS connections with hot-reload support. +/// +public interface ICertificateProvider +{ + /// Gets the current server certificate. + X509Certificate2? GetServerCertificate(); + + /// Gets the current client certificate. + X509Certificate2? GetClientCertificate(); + + /// Gets CA certificates for validation. + X509Certificate2Collection GetCaCertificates(); + + /// Event raised when certificates are reloaded. + event Action? CertificatesReloaded; +} + +public sealed class CertificateProvider : ICertificateProvider, IDisposable +{ + private readonly TlsTransportConfig _serverConfig; + private readonly TlsClientConfig? _clientConfig; + private readonly ILogger _logger; + private readonly FileSystemWatcher? _fileWatcher; + private X509Certificate2? _serverCertificate; + private X509Certificate2? _clientCertificate; + private X509Certificate2Collection _caCertificates = new(); + + public event Action? CertificatesReloaded; + + public CertificateProvider( + IOptions serverConfig, + IOptions? clientConfig, + ILogger logger) + { + _serverConfig = serverConfig.Value; + _clientConfig = clientConfig?.Value; + _logger = logger; + + LoadCertificates(); + + // Watch for certificate file changes + if (!string.IsNullOrEmpty(_serverConfig.CertificatePath)) + { + var dir = Path.GetDirectoryName(_serverConfig.CertificatePath); + if (dir != null && Directory.Exists(dir)) + { + _fileWatcher = new FileSystemWatcher(dir) + { + Filter = "*.pfx", + NotifyFilter = NotifyFilters.LastWrite | NotifyFilters.CreationTime + }; + _fileWatcher.Changed += OnCertificateFileChanged; + _fileWatcher.EnableRaisingEvents = true; + } + } + } + + private void LoadCertificates() + { + try + { + // Load server certificate + _serverCertificate = LoadCertificate( + _serverConfig.CertificatePath, + _serverConfig.CertificatePassword, + _serverConfig.CertificateThumbprint, + _serverConfig.CertificateStoreName, + _serverConfig.CertificateStoreLocation); + + if (_serverCertificate != null) + { + _logger.LogInformation( + "Loaded server certificate: Subject={Subject}, Expires={Expires}", + _serverCertificate.Subject, + _serverCertificate.NotAfter); + } + + // Load client certificate + if (_clientConfig != null) + { + _clientCertificate = LoadCertificate( + _clientConfig.ClientCertificatePath, + _clientConfig.ClientCertificatePassword, + _clientConfig.ClientCertificateThumbprint, + "My", + "CurrentUser"); + } + + // Load CA certificates + _caCertificates = new X509Certificate2Collection(); + + if (!string.IsNullOrEmpty(_serverConfig.ClientCaCertificatePath) && + File.Exists(_serverConfig.ClientCaCertificatePath)) + { + _caCertificates.Add(new X509Certificate2(_serverConfig.ClientCaCertificatePath)); + } + + if (_clientConfig != null && + !string.IsNullOrEmpty(_clientConfig.ServerCaCertificatePath) && + File.Exists(_clientConfig.ServerCaCertificatePath)) + { + _caCertificates.Add(new X509Certificate2(_clientConfig.ServerCaCertificatePath)); + } + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to load certificates"); + throw; + } + } + + private X509Certificate2? LoadCertificate( + string? path, + string? password, + string? thumbprint, + string storeName, + string storeLocation) + { + // Try file-based certificate first + if (!string.IsNullOrEmpty(path) && File.Exists(path)) + { + return new X509Certificate2( + path, + password, + X509KeyStorageFlags.MachineKeySet | X509KeyStorageFlags.PersistKeySet); + } + + // Try certificate store + if (!string.IsNullOrEmpty(thumbprint)) + { + using var store = new X509Store( + Enum.Parse(storeName), + Enum.Parse(storeLocation)); + + store.Open(OpenFlags.ReadOnly); + + var certs = store.Certificates.Find( + X509FindType.FindByThumbprint, + thumbprint, + validOnly: false); + + return certs.Count > 0 ? certs[0] : null; + } + + return null; + } + + private void OnCertificateFileChanged(object sender, FileSystemEventArgs e) + { + _logger.LogInformation("Certificate file changed, reloading: {Path}", e.FullPath); + + try + { + // Small delay to ensure file is fully written + Thread.Sleep(500); + LoadCertificates(); + CertificatesReloaded?.Invoke(); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to reload certificates"); + } + } + + public X509Certificate2? GetServerCertificate() => _serverCertificate; + public X509Certificate2? GetClientCertificate() => _clientCertificate; + public X509Certificate2Collection GetCaCertificates() => _caCertificates; + + public void Dispose() + { + _fileWatcher?.Dispose(); + _serverCertificate?.Dispose(); + _clientCertificate?.Dispose(); + } +} +``` + +--- + +## TLS Connection Wrapper + +```csharp +namespace StellaOps.Router.Transport.Tls; + +/// +/// TLS-wrapped frame connection. +/// +public sealed class TlsFrameConnection : IAsyncDisposable +{ + private readonly Socket _socket; + private readonly SslStream _sslStream; + private readonly TcpFrameCodec _codec; + private readonly ILogger _logger; + private readonly SemaphoreSlim _writeLock = new(1, 1); + private readonly byte[] _readBuffer; + private readonly byte[] _writeBuffer; + private int _readBufferOffset; + private int _readBufferCount; + + public string ConnectionId { get; } + public EndPoint? RemoteEndPoint => _socket.RemoteEndPoint; + public bool IsConnected => _socket.Connected; + public string? RemoteCertificateSubject { get; private set; } + public string? RemoteCertificateThumbprint { get; private set; } + + public TlsFrameConnection( + Socket socket, + SslStream sslStream, + TcpFrameCodec codec, + ILogger logger) + { + _socket = socket; + _sslStream = sslStream; + _codec = codec; + _logger = logger; + _readBuffer = new byte[64 * 1024]; + _writeBuffer = new byte[64 * 1024]; + ConnectionId = Guid.NewGuid().ToString("N"); + + // Extract remote certificate info + if (_sslStream.RemoteCertificate != null) + { + var cert = new X509Certificate2(_sslStream.RemoteCertificate); + RemoteCertificateSubject = cert.Subject; + RemoteCertificateThumbprint = cert.Thumbprint; + } + } + + public async ValueTask SendAsync(Frame frame, CancellationToken cancellationToken) + { + await _writeLock.WaitAsync(cancellationToken); + try + { + var size = _codec.Encode(frame, _writeBuffer); + await _sslStream.WriteAsync(_writeBuffer.AsMemory(0, size), cancellationToken); + await _sslStream.FlushAsync(cancellationToken); + } + finally + { + _writeLock.Release(); + } + } + + public async ValueTask ReceiveAsync(CancellationToken cancellationToken) + { + while (true) + { + // Try to decode from existing buffer + if (_readBufferCount >= 24) + { + var span = new ReadOnlySpan(_readBuffer, _readBufferOffset, _readBufferCount); + + if (span.Length >= 8) + { + var payloadLength = BinaryPrimitives.ReadUInt32BigEndian(span[4..]); + var totalLength = 24 + (int)payloadLength; + + if (span.Length >= totalLength) + { + var frame = _codec.Decode(span[..totalLength]); + _readBufferOffset += totalLength; + _readBufferCount -= totalLength; + + if (_readBufferOffset > _readBuffer.Length / 2) + { + Buffer.BlockCopy(_readBuffer, _readBufferOffset, _readBuffer, 0, _readBufferCount); + _readBufferOffset = 0; + } + + return frame; + } + } + } + + if (_readBufferOffset + _readBufferCount >= _readBuffer.Length) + { + Buffer.BlockCopy(_readBuffer, _readBufferOffset, _readBuffer, 0, _readBufferCount); + _readBufferOffset = 0; + } + + var bytesRead = await _sslStream.ReadAsync( + _readBuffer.AsMemory(_readBufferOffset + _readBufferCount), + cancellationToken); + + if (bytesRead == 0) + { + throw new EndOfStreamException("TLS connection closed by remote"); + } + + _readBufferCount += bytesRead; + } + } + + public async IAsyncEnumerable ReceiveAllAsync( + [EnumeratorCancellation] CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + Frame frame; + try + { + frame = await ReceiveAsync(cancellationToken); + } + catch (EndOfStreamException) + { + yield break; + } + catch (OperationCanceledException) + { + yield break; + } + catch (IOException ex) when (ex.InnerException is SocketException) + { + yield break; + } + + yield return frame; + } + } + + public async ValueTask DisposeAsync() + { + _writeLock.Dispose(); + await _sslStream.DisposeAsync(); + _socket.Dispose(); + } +} +``` + +--- + +## Gateway TLS Server + +```csharp +namespace StellaOps.Router.Transport.Tls; + +/// +/// TLS-enabled transport server for the gateway. +/// +public sealed class TlsTransportServer : IHostedService +{ + private readonly TlsTransportConfig _config; + private readonly ICertificateProvider _certificateProvider; + private readonly TcpFrameCodec _codec; + private readonly IGlobalRoutingState _routingState; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + private Socket? _listener; + private CancellationTokenSource? _cts; + private readonly ConcurrentDictionary _connections = new(); + + public TlsTransportServer( + IOptions config, + ICertificateProvider certificateProvider, + TcpFrameCodec codec, + IGlobalRoutingState routingState, + IPayloadSerializer serializer, + ILogger logger) + { + _config = config.Value; + _certificateProvider = certificateProvider; + _codec = codec; + _routingState = routingState; + _serializer = serializer; + _logger = logger; + } + + public async Task StartAsync(CancellationToken cancellationToken) + { + var serverCert = _certificateProvider.GetServerCertificate(); + if (serverCert == null) + { + throw new InvalidOperationException("Server certificate not configured"); + } + + _cts = new CancellationTokenSource(); + + _listener = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + _listener.SetSocketOption(SocketOptionLevel.Socket, SocketOptionName.ReuseAddress, true); + _listener.Bind(new IPEndPoint(IPAddress.Parse(_config.ListenAddress), _config.Port)); + _listener.Listen(_config.Backlog); + + _logger.LogInformation( + "TLS transport server listening on {Address}:{Port}", + _config.ListenAddress, _config.Port); + + _ = AcceptConnectionsAsync(_cts.Token); + } + + private async Task AcceptConnectionsAsync(CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + try + { + var socket = await _listener!.AcceptAsync(cancellationToken); + _logger.LogDebug("Accepted TLS connection from {RemoteEndPoint}", socket.RemoteEndPoint); + + _ = HandleConnectionAsync(socket, cancellationToken); + } + catch (OperationCanceledException) + { + break; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error accepting TLS connection"); + } + } + } + + private async Task HandleConnectionAsync(Socket socket, CancellationToken cancellationToken) + { + SslStream? sslStream = null; + + try + { + var networkStream = new NetworkStream(socket, ownsSocket: false); + sslStream = new SslStream( + networkStream, + leaveInnerStreamOpen: false, + ValidateClientCertificate); + + var serverCert = _certificateProvider.GetServerCertificate()!; + + var authOptions = new SslServerAuthenticationOptions + { + ServerCertificate = serverCert, + ClientCertificateRequired = _config.RequireClientCertificate, + EnabledSslProtocols = _config.AllowedProtocols, + CertificateRevocationCheckMode = _config.RevocationMode + }; + + await sslStream.AuthenticateAsServerAsync(authOptions, cancellationToken); + + _logger.LogDebug( + "TLS handshake complete: Protocol={Protocol}, Cipher={Cipher}", + sslStream.SslProtocol, + sslStream.CipherAlgorithm); + + var connection = new TlsFrameConnection(socket, sslStream, _codec, _logger); + + // Wait for HELLO frame + var helloFrame = await connection.ReceiveAsync(cancellationToken) + .AsTask() + .WaitAsync(TimeSpan.FromSeconds(_config.HandshakeTimeoutSeconds), cancellationToken); + + if (helloFrame.Type != FrameType.Hello) + { + _logger.LogWarning("Expected HELLO frame, got {Type}", helloFrame.Type); + return; + } + + var hello = _serializer.DeserializeHello(helloFrame.Payload); + + // Log client certificate identity + if (connection.RemoteCertificateSubject != null) + { + _logger.LogInformation( + "Microservice connected via TLS: {ServiceName}/{InstanceId}, Cert={Subject}", + hello.ServiceName, hello.InstanceId, connection.RemoteCertificateSubject); + } + + // Send HELLO response + var helloResponse = new HelloResponse + { + Accepted = true, + HeartbeatIntervalMs = _config.HeartbeatIntervalMs, + MaxPayloadSize = _config.MaxPayloadSize + }; + + var responseFrame = new Frame + { + Type = FrameType.Hello, + CorrelationId = helloFrame.CorrelationId, + Payload = _serializer.SerializeHelloResponse(helloResponse) + }; + await connection.SendAsync(responseFrame, cancellationToken); + + var msConnection = new TlsMicroserviceConnection( + connection, + hello.ServiceName, + hello.InstanceId, + hello.Endpoints, + _serializer, + _logger); + + _connections[connection.ConnectionId] = msConnection; + + _routingState.RegisterConnection(new EndpointConnection + { + ConnectionId = connection.ConnectionId, + ServiceName = hello.ServiceName, + InstanceId = hello.InstanceId, + Transport = "TLS", + State = ConnectionState.Connected, + Endpoints = hello.Endpoints, + Region = hello.Metadata?.GetValueOrDefault("region"), + LastHeartbeat = DateTimeOffset.UtcNow, + CertificateThumbprint = connection.RemoteCertificateThumbprint + }); + + await msConnection.ProcessAsync(cancellationToken); + } + catch (AuthenticationException ex) + { + _logger.LogWarning(ex, "TLS authentication failed from {RemoteEndPoint}", socket.RemoteEndPoint); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error handling TLS connection"); + } + finally + { + if (sslStream != null) + { + var conn = new TlsFrameConnection(socket, sslStream, _codec, _logger); + _connections.TryRemove(conn.ConnectionId, out _); + _routingState.RemoveConnection(conn.ConnectionId); + await sslStream.DisposeAsync(); + } + socket.Dispose(); + } + } + + private bool ValidateClientCertificate( + object sender, + X509Certificate? certificate, + X509Chain? chain, + SslPolicyErrors sslPolicyErrors) + { + if (!_config.RequireClientCertificate) + return true; + + if (certificate == null) + { + _logger.LogWarning("Client did not provide certificate"); + return false; + } + + if (sslPolicyErrors == SslPolicyErrors.None) + return true; + + if (_config.AllowUntrustedRootCertificates && + sslPolicyErrors == SslPolicyErrors.RemoteCertificateChainErrors) + { + _logger.LogWarning("Accepting client certificate with chain errors (dev mode)"); + return true; + } + + _logger.LogWarning( + "Client certificate validation failed: Errors={Errors}, Subject={Subject}", + sslPolicyErrors, + certificate.Subject); + + return false; + } + + public TlsMicroserviceConnection? GetConnection(string connectionId) + { + return _connections.TryGetValue(connectionId, out var conn) ? conn : null; + } + + public async Task StopAsync(CancellationToken cancellationToken) + { + _cts?.Cancel(); + _listener?.Close(); + + foreach (var connection in _connections.Values) + { + await connection.DisconnectAsync(); + } + + _cts?.Dispose(); + } +} + +public sealed class TlsMicroserviceConnection +{ + private readonly TlsFrameConnection _connection; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + private readonly ConcurrentDictionary> _pendingRequests = new(); + + public string ServiceName { get; } + public string InstanceId { get; } + public EndpointDescriptor[] Endpoints { get; } + public DateTimeOffset LastActivity { get; private set; } + public string? CertificateThumbprint => _connection.RemoteCertificateThumbprint; + + public TlsMicroserviceConnection( + TlsFrameConnection connection, + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + IPayloadSerializer serializer, + ILogger logger) + { + _connection = connection; + ServiceName = serviceName; + InstanceId = instanceId; + Endpoints = endpoints; + _serializer = serializer; + _logger = logger; + LastActivity = DateTimeOffset.UtcNow; + } + + public async Task ProcessAsync(CancellationToken cancellationToken) + { + await foreach (var frame in _connection.ReceiveAllAsync(cancellationToken)) + { + LastActivity = DateTimeOffset.UtcNow; + + if (frame.Type == FrameType.Response && + _pendingRequests.TryRemove(frame.CorrelationId, out var tcs)) + { + tcs.TrySetResult(frame); + } + } + } + + public async Task SendRequestAsync( + RequestPayload request, + TimeSpan timeout, + CancellationToken cancellationToken) + { + var correlationId = Guid.NewGuid().ToString("N"); + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + _pendingRequests[correlationId] = tcs; + + try + { + var frame = new Frame + { + Type = FrameType.Request, + CorrelationId = correlationId, + Payload = _serializer.SerializeRequest(request) + }; + + await _connection.SendAsync(frame, cancellationToken); + + var responseFrame = await tcs.Task.WaitAsync(timeout, cancellationToken); + return _serializer.DeserializeResponse(responseFrame.Payload); + } + finally + { + _pendingRequests.TryRemove(correlationId, out _); + } + } + + public async Task DisconnectAsync() + { + foreach (var pending in _pendingRequests.Values) + { + pending.TrySetCanceled(); + } + _pendingRequests.Clear(); + await _connection.DisposeAsync(); + } +} +``` + +--- + +## Microservice TLS Client + +```csharp +namespace StellaOps.Router.Transport.Tls; + +/// +/// TLS client for microservices to connect securely to the gateway. +/// +public sealed class TlsTransportClient : ITransportServer, IAsyncDisposable +{ + private readonly TlsClientConfig _config; + private readonly ICertificateProvider _certificateProvider; + private readonly TcpFrameCodec _codec; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + private TlsFrameConnection? _connection; + private CancellationTokenSource? _cts; + private Task? _processingTask; + private int _reconnectAttempts; + + public string TransportType => "TLS"; + public bool IsConnected => _connection?.IsConnected ?? false; + + public event Func>? OnRequest; + public event Func? OnCancel; + + public TlsTransportClient( + IOptions config, + ICertificateProvider certificateProvider, + TcpFrameCodec codec, + IPayloadSerializer serializer, + ILogger logger) + { + _config = config.Value; + _certificateProvider = certificateProvider; + _codec = codec; + _serializer = serializer; + _logger = logger; + } + + public async Task ConnectAsync( + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + CancellationToken cancellationToken) + { + _cts = new CancellationTokenSource(); + await ConnectWithRetryAsync(serviceName, instanceId, endpoints, cancellationToken); + _processingTask = ProcessFramesAsync(serviceName, instanceId, endpoints, _cts.Token); + } + + private async Task ConnectWithRetryAsync( + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + try + { + var socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); + await socket.ConnectAsync(_config.GatewayHost, _config.GatewayPort, cancellationToken); + + var networkStream = new NetworkStream(socket, ownsSocket: false); + var sslStream = new SslStream( + networkStream, + leaveInnerStreamOpen: false, + ValidateServerCertificate); + + var clientCert = _certificateProvider.GetClientCertificate(); + var clientCerts = clientCert != null + ? new X509CertificateCollection { clientCert } + : new X509CertificateCollection(); + + var authOptions = new SslClientAuthenticationOptions + { + TargetHost = _config.ExpectedServerName ?? _config.GatewayHost, + ClientCertificates = clientCerts, + EnabledSslProtocols = SslProtocols.Tls12 | SslProtocols.Tls13 + }; + + await sslStream.AuthenticateAsClientAsync(authOptions, cancellationToken); + + _logger.LogDebug( + "TLS handshake complete: Protocol={Protocol}, Server={Server}", + sslStream.SslProtocol, + _config.GatewayHost); + + _connection = new TlsFrameConnection(socket, sslStream, _codec, _logger); + + // Send HELLO + var hello = new HelloPayload + { + ServiceName = serviceName, + InstanceId = instanceId, + Endpoints = endpoints, + Metadata = new Dictionary + { + ["region"] = _config.Region ?? "default", + ["version"] = _config.ServiceVersion ?? "1.0.0" + } + }; + + var helloFrame = new Frame + { + Type = FrameType.Hello, + CorrelationId = Guid.NewGuid().ToString("N"), + Payload = _serializer.SerializeHello(hello) + }; + + await _connection.SendAsync(helloFrame, cancellationToken); + + var response = await _connection.ReceiveAsync(cancellationToken); + if (response.Type != FrameType.Hello) + { + throw new ProtocolException($"Expected HELLO response, got {response.Type}"); + } + + _reconnectAttempts = 0; + _logger.LogInformation( + "Connected to gateway via TLS at {Host}:{Port}", + _config.GatewayHost, _config.GatewayPort); + + return; + } + catch (AuthenticationException ex) + { + _logger.LogError(ex, "TLS authentication failed"); + throw; // Don't retry auth failures + } + catch (Exception ex) when (!cancellationToken.IsCancellationRequested) + { + _reconnectAttempts++; + var delay = Math.Min( + _config.InitialReconnectDelayMs * Math.Pow(2, _reconnectAttempts - 1), + _config.MaxReconnectDelayMs); + + _logger.LogWarning(ex, "TLS connection attempt {Attempt} failed, retrying in {Delay}ms", + _reconnectAttempts, delay); + + await Task.Delay((int)delay, cancellationToken); + } + } + } + + private bool ValidateServerCertificate( + object sender, + X509Certificate? certificate, + X509Chain? chain, + SslPolicyErrors sslPolicyErrors) + { + if (_config.SkipServerCertificateValidation) + { + _logger.LogWarning("Skipping server certificate validation (dev mode)"); + return true; + } + + if (sslPolicyErrors == SslPolicyErrors.None) + return true; + + _logger.LogWarning( + "Server certificate validation failed: Errors={Errors}", + sslPolicyErrors); + + return false; + } + + private async Task ProcessFramesAsync( + string serviceName, + string instanceId, + EndpointDescriptor[] endpoints, + CancellationToken cancellationToken) + { + while (!cancellationToken.IsCancellationRequested) + { + try + { + if (_connection == null || !_connection.IsConnected) + { + await ConnectWithRetryAsync(serviceName, instanceId, endpoints, cancellationToken); + } + + await foreach (var frame in _connection!.ReceiveAllAsync(cancellationToken)) + { + switch (frame.Type) + { + case FrameType.Request: + _ = HandleRequestAsync(frame, cancellationToken); + break; + + case FrameType.Cancel: + if (OnCancel != null) + await OnCancel(frame.CorrelationId, cancellationToken); + break; + + case FrameType.Heartbeat: + await HandleHeartbeatAsync(frame); + break; + } + } + } + catch (EndOfStreamException) + { + _logger.LogWarning("TLS connection closed, attempting reconnect"); + _connection = null; + } + catch (OperationCanceledException) + { + break; + } + catch (Exception ex) + { + _logger.LogError(ex, "Error processing TLS frames"); + _connection = null; + } + } + } + + private async Task HandleRequestAsync(Frame frame, CancellationToken cancellationToken) + { + if (_connection == null || OnRequest == null) return; + + try + { + var request = _serializer.DeserializeRequest(frame.Payload); + var response = await OnRequest(request, cancellationToken); + + var responseFrame = new Frame + { + Type = FrameType.Response, + CorrelationId = frame.CorrelationId, + Payload = _serializer.SerializeResponse(response), + Flags = FrameFlags.Final + }; + + await _connection.SendAsync(responseFrame, cancellationToken); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error handling TLS request"); + + var errorResponse = new ResponsePayload + { + StatusCode = 500, + Headers = new Dictionary(), + ErrorMessage = ex.Message, + IsFinalChunk = true + }; + + var errorFrame = new Frame + { + Type = FrameType.Response, + CorrelationId = frame.CorrelationId, + Payload = _serializer.SerializeResponse(errorResponse), + Flags = FrameFlags.Final | FrameFlags.Error + }; + + await _connection.SendAsync(errorFrame, cancellationToken); + } + } + + private async Task HandleHeartbeatAsync(Frame frame) + { + if (_connection == null) return; + + var pongFrame = new Frame + { + Type = FrameType.Heartbeat, + CorrelationId = frame.CorrelationId, + Payload = frame.Payload + }; + + await _connection.SendAsync(pongFrame, CancellationToken.None); + } + + public async Task DisconnectAsync() + { + _cts?.Cancel(); + if (_processingTask != null) + { + try { await _processingTask.WaitAsync(TimeSpan.FromSeconds(5)); } catch { } + } + if (_connection != null) + { + await _connection.DisposeAsync(); + } + _cts?.Dispose(); + } + + public async ValueTask DisposeAsync() => await DisconnectAsync(); +} +``` + +--- + +## Service Registration + +```csharp +namespace StellaOps.Router.Transport.Tls; + +public static class TlsTransportExtensions +{ + public static IServiceCollection AddTlsTransport( + this IServiceCollection services, + IConfiguration configuration) + { + services.Configure(configuration.GetSection("TlsTransport")); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddHostedService(sp => sp.GetRequiredService()); + + return services; + } + + public static IServiceCollection AddTlsMicroserviceTransport( + this IServiceCollection services, + IConfiguration configuration) + { + services.Configure(configuration.GetSection("TlsClient")); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + + return services; + } +} +``` + +--- + +## YAML Configuration + +```yaml +# Gateway TLS configuration +TlsTransport: + ListenAddress: "0.0.0.0" + Port: 9501 + CertificatePath: "/etc/stellaops/certs/gateway.pfx" + CertificatePassword: "${GATEWAY_CERT_PASSWORD}" + RequireClientCertificate: true + ClientCaCertificatePath: "/etc/stellaops/certs/client-ca.crt" + AllowedProtocols: "Tls12, Tls13" + RevocationMode: "Online" + +# Microservice TLS configuration +TlsClient: + GatewayHost: "gateway.internal" + GatewayPort: 9501 + ClientCertificatePath: "/etc/stellaops/certs/service.pfx" + ClientCertificatePassword: "${SERVICE_CERT_PASSWORD}" + ExpectedServerName: "gateway.stellaops.internal" +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Transport.Tls/TlsTransportConfig.cs` +2. `StellaOps.Router.Transport.Tls/ICertificateProvider.cs` +3. `StellaOps.Router.Transport.Tls/CertificateProvider.cs` +4. `StellaOps.Router.Transport.Tls/TlsFrameConnection.cs` +5. `StellaOps.Router.Transport.Tls/TlsTransportServer.cs` +6. `StellaOps.Router.Transport.Tls/TlsTransportClient.cs` +7. `StellaOps.Router.Transport.Tls/TlsTransportExtensions.cs` +8. Certificate validation tests +9. mTLS handshake tests +10. Certificate rotation tests + +--- + +## Next Step + +Proceed to [Step 16: GraphQL Handler Implementation](16-Step.md) to implement the GraphQL route handler plugin. diff --git a/docs/router/16-Step.md b/docs/router/16-Step.md new file mode 100644 index 000000000..831f7eaef --- /dev/null +++ b/docs/router/16-Step.md @@ -0,0 +1,994 @@ +# Step 16: GraphQL Handler Implementation + +**Phase 4: Handler Plugins** +**Estimated Complexity:** High +**Dependencies:** Step 10 (Microservice Handler) + +--- + +## Overview + +The GraphQL handler routes GraphQL queries, mutations, and subscriptions to appropriate microservices based on schema analysis. It supports schema stitching, query splitting, and federated execution across multiple services. + +--- + +## Goals + +1. Route GraphQL operations to appropriate backend services +2. Support schema federation/stitching across microservices +3. Handle batched queries with DataLoader patterns +4. Support subscriptions via WebSocket upgrade +5. Provide introspection proxying and schema caching + +--- + +## Core Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ GraphQL Handler │ +├──────────────────────────────────────────────────────────────────┤ +│ │ +│ HTTP Request │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ │ +│ │ Query Parser │──► Extract operation type & fields │ +│ └───────┬───────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ ┌─────────────────┐ │ +│ │ Query Planner │───►│ Schema Registry │ │ +│ └───────┬───────┘ └─────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ │ +│ │Query Executor │──► Split & dispatch to services │ +│ └───────┬───────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ │ +│ │Result Merger │──► Combine partial results │ +│ └───────────────┘ │ +│ │ +└──────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Configuration + +```csharp +namespace StellaOps.Router.Handlers.GraphQL; + +public class GraphQLHandlerConfig +{ + /// Path prefix for GraphQL endpoint. + public string Path { get; set; } = "/graphql"; + + /// Whether to enable introspection queries. + public bool EnableIntrospection { get; set; } = true; + + /// Whether to enable subscriptions. + public bool EnableSubscriptions { get; set; } = true; + + /// Maximum query depth to prevent DOS. + public int MaxQueryDepth { get; set; } = 15; + + /// Maximum query complexity score. + public int MaxQueryComplexity { get; set; } = 1000; + + /// Timeout for query execution. + public TimeSpan ExecutionTimeout { get; set; } = TimeSpan.FromSeconds(30); + + /// Cache duration for schema introspection. + public TimeSpan SchemaCacheDuration { get; set; } = TimeSpan.FromMinutes(5); + + /// Whether to enable query batching. + public bool EnableBatching { get; set; } = true; + + /// Maximum batch size. + public int MaxBatchSize { get; set; } = 10; + + /// Registered GraphQL services and their type ownership. + public Dictionary Services { get; set; } = new(); +} + +public class GraphQLServiceConfig +{ + /// Service name for routing. + public required string ServiceName { get; set; } + + /// Root types this service handles (Query, Mutation, Subscription). + public HashSet RootTypes { get; set; } = new(); + + /// Specific fields this service owns. + public Dictionary> OwnedFields { get; set; } = new(); + + /// Whether this service provides the full schema. + public bool IsSchemaProvider { get; set; } +} +``` + +--- + +## Core Types + +```csharp +namespace StellaOps.Router.Handlers.GraphQL; + +/// +/// Parsed GraphQL request. +/// +public sealed class GraphQLRequest +{ + public required string Query { get; init; } + public string? OperationName { get; init; } + public Dictionary? Variables { get; init; } + public Dictionary? Extensions { get; init; } +} + +/// +/// GraphQL response format. +/// +public sealed class GraphQLResponse +{ + public object? Data { get; set; } + public List? Errors { get; set; } + public Dictionary? Extensions { get; set; } +} + +public sealed class GraphQLError +{ + public required string Message { get; init; } + public List? Locations { get; init; } + public List? Path { get; init; } + public Dictionary? Extensions { get; init; } +} + +public sealed class GraphQLLocation +{ + public int Line { get; init; } + public int Column { get; init; } +} + +/// +/// Represents a planned query execution. +/// +public sealed class QueryPlan +{ + public GraphQLOperationType OperationType { get; init; } + public List Nodes { get; init; } = new(); +} + +public sealed class QueryPlanNode +{ + public string ServiceName { get; init; } = ""; + public string SubQuery { get; init; } = ""; + public List RequiredFields { get; init; } = new(); + public List DependsOn { get; init; } = new(); +} + +public enum GraphQLOperationType +{ + Query, + Mutation, + Subscription +} +``` + +--- + +## GraphQL Handler Implementation + +```csharp +namespace StellaOps.Router.Handlers.GraphQL; + +public sealed class GraphQLHandler : IRouteHandler +{ + public string HandlerType => "GraphQL"; + public int Priority => 100; + + private readonly GraphQLHandlerConfig _config; + private readonly IGraphQLParser _parser; + private readonly IQueryPlanner _planner; + private readonly IQueryExecutor _executor; + private readonly ISchemaRegistry _schemaRegistry; + private readonly ILogger _logger; + + public GraphQLHandler( + IOptions config, + IGraphQLParser parser, + IQueryPlanner planner, + IQueryExecutor executor, + ISchemaRegistry schemaRegistry, + ILogger logger) + { + _config = config.Value; + _parser = parser; + _planner = planner; + _executor = executor; + _schemaRegistry = schemaRegistry; + _logger = logger; + } + + public bool CanHandle(RouteMatchResult match) + { + return match.Handler == "GraphQL" || + match.Route.Path.StartsWith(_config.Path, StringComparison.OrdinalIgnoreCase); + } + + public async Task HandleAsync( + HttpContext context, + RouteMatchResult match, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + try + { + // Handle WebSocket upgrade for subscriptions + if (context.WebSockets.IsWebSocketRequest && _config.EnableSubscriptions) + { + return await HandleSubscriptionAsync(context, claims, cancellationToken); + } + + // Parse GraphQL request + var request = await ParseRequestAsync(context, cancellationToken); + + // Validate query + var validationResult = _parser.Validate( + request.Query, + _config.MaxQueryDepth, + _config.MaxQueryComplexity); + + if (!validationResult.IsValid) + { + return CreateErrorResponse(validationResult.Errors); + } + + // Parse and analyze query + var operation = _parser.Parse(request.Query, request.OperationName); + + // Check if introspection + if (operation.IsIntrospection) + { + if (!_config.EnableIntrospection) + { + return CreateErrorResponse(new[] { "Introspection is disabled" }); + } + + return await HandleIntrospectionAsync(request, cancellationToken); + } + + // Plan query execution + var plan = _planner.CreatePlan(operation, _config.Services); + + _logger.LogDebug( + "Query plan created: {NodeCount} nodes for {OperationType}", + plan.Nodes.Count, plan.OperationType); + + // Execute plan + var result = await _executor.ExecuteAsync( + plan, + request, + claims, + _config.ExecutionTimeout, + cancellationToken); + + return CreateSuccessResponse(result); + } + catch (GraphQLParseException ex) + { + return CreateErrorResponse(new[] { ex.Message }); + } + catch (Exception ex) + { + _logger.LogError(ex, "GraphQL execution error"); + return CreateErrorResponse(new[] { "Internal server error" }, 500); + } + } + + private async Task ParseRequestAsync( + HttpContext context, + CancellationToken cancellationToken) + { + if (context.Request.Method == "GET") + { + return new GraphQLRequest + { + Query = context.Request.Query["query"].ToString(), + OperationName = context.Request.Query["operationName"].ToString(), + Variables = ParseVariables(context.Request.Query["variables"].ToString()) + }; + } + + var body = await JsonSerializer.DeserializeAsync( + context.Request.Body, + cancellationToken: cancellationToken); + + return body ?? throw new GraphQLParseException("Invalid request body"); + } + + private Dictionary? ParseVariables(string? json) + { + if (string.IsNullOrEmpty(json)) + return null; + + return JsonSerializer.Deserialize>(json); + } + + private async Task HandleIntrospectionAsync( + GraphQLRequest request, + CancellationToken cancellationToken) + { + var schema = await _schemaRegistry.GetMergedSchemaAsync(cancellationToken); + var result = await _executor.ExecuteIntrospectionAsync(schema, request, cancellationToken); + return CreateSuccessResponse(result); + } + + private async Task HandleSubscriptionAsync( + HttpContext context, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + var webSocket = await context.WebSockets.AcceptWebSocketAsync("graphql-transport-ws"); + await _executor.HandleSubscriptionAsync(webSocket, claims, cancellationToken); + + return new RouteHandlerResult + { + Handled = true, + StatusCode = 101 // Switching Protocols + }; + } + + private RouteHandlerResult CreateSuccessResponse(GraphQLResponse response) + { + return new RouteHandlerResult + { + Handled = true, + StatusCode = 200, + ContentType = "application/json", + Body = JsonSerializer.SerializeToUtf8Bytes(response) + }; + } + + private RouteHandlerResult CreateErrorResponse(IEnumerable messages, int statusCode = 200) + { + var response = new GraphQLResponse + { + Errors = messages.Select(m => new GraphQLError { Message = m }).ToList() + }; + + return new RouteHandlerResult + { + Handled = true, + StatusCode = statusCode, + ContentType = "application/json", + Body = JsonSerializer.SerializeToUtf8Bytes(response) + }; + } +} +``` + +--- + +## Query Planner + +```csharp +namespace StellaOps.Router.Handlers.GraphQL; + +public interface IQueryPlanner +{ + QueryPlan CreatePlan( + ParsedOperation operation, + Dictionary services); +} + +public sealed class QueryPlanner : IQueryPlanner +{ + private readonly ILogger _logger; + + public QueryPlanner(ILogger logger) + { + _logger = logger; + } + + public QueryPlan CreatePlan( + ParsedOperation operation, + Dictionary services) + { + var plan = new QueryPlan + { + OperationType = operation.OperationType + }; + + // Group fields by owning service + var fieldsByService = new Dictionary>(); + + foreach (var field in operation.SelectionSet) + { + var service = FindOwningService(operation.OperationType, field.Name, services); + + if (!fieldsByService.ContainsKey(service)) + { + fieldsByService[service] = new List(); + } + fieldsByService[service].Add(field); + } + + // Create execution nodes + foreach (var (serviceName, fields) in fieldsByService) + { + var subQuery = BuildSubQuery(operation, fields); + + plan.Nodes.Add(new QueryPlanNode + { + ServiceName = serviceName, + SubQuery = subQuery, + RequiredFields = fields.Select(f => f.Name).ToList() + }); + } + + // For mutations, nodes must execute sequentially + if (operation.OperationType == GraphQLOperationType.Mutation) + { + for (int i = 1; i < plan.Nodes.Count; i++) + { + plan.Nodes[i].DependsOn.Add(plan.Nodes[i - 1]); + } + } + + return plan; + } + + private string FindOwningService( + GraphQLOperationType opType, + string fieldName, + Dictionary services) + { + var rootType = opType switch + { + GraphQLOperationType.Query => "Query", + GraphQLOperationType.Mutation => "Mutation", + GraphQLOperationType.Subscription => "Subscription", + _ => "Query" + }; + + foreach (var (name, config) in services) + { + if (config.OwnedFields.TryGetValue(rootType, out var fields) && + fields.Contains(fieldName)) + { + return name; + } + + if (config.RootTypes.Contains(rootType)) + { + return name; + } + } + + throw new GraphQLExecutionException($"No service found for field: {rootType}.{fieldName}"); + } + + private string BuildSubQuery(ParsedOperation operation, List fields) + { + var sb = new StringBuilder(); + + sb.Append(operation.OperationType.ToString().ToLower()); + + if (!string.IsNullOrEmpty(operation.Name)) + { + sb.Append(' ').Append(operation.Name); + } + + if (operation.Variables.Count > 0) + { + sb.Append('('); + sb.Append(string.Join(", ", operation.Variables.Select(v => $"${v.Name}: {v.Type}"))); + sb.Append(')'); + } + + sb.Append(" { "); + foreach (var field in fields) + { + AppendField(sb, field); + } + sb.Append(" }"); + + return sb.ToString(); + } + + private void AppendField(StringBuilder sb, FieldSelection field) + { + if (!string.IsNullOrEmpty(field.Alias)) + { + sb.Append(field.Alias).Append(": "); + } + + sb.Append(field.Name); + + if (field.Arguments.Count > 0) + { + sb.Append('('); + sb.Append(string.Join(", ", field.Arguments.Select(a => $"{a.Key}: {FormatValue(a.Value)}"))); + sb.Append(')'); + } + + if (field.SelectionSet.Count > 0) + { + sb.Append(" { "); + foreach (var subField in field.SelectionSet) + { + AppendField(sb, subField); + sb.Append(' '); + } + sb.Append('}'); + } + + sb.Append(' '); + } + + private string FormatValue(object? value) + { + return value switch + { + null => "null", + string s => $"\"{s}\"", + bool b => b.ToString().ToLower(), + _ => value.ToString() ?? "null" + }; + } +} +``` + +--- + +## Query Executor + +```csharp +namespace StellaOps.Router.Handlers.GraphQL; + +public interface IQueryExecutor +{ + Task ExecuteAsync( + QueryPlan plan, + GraphQLRequest request, + IReadOnlyDictionary claims, + TimeSpan timeout, + CancellationToken cancellationToken); + + Task ExecuteIntrospectionAsync( + GraphQLSchema schema, + GraphQLRequest request, + CancellationToken cancellationToken); + + Task HandleSubscriptionAsync( + WebSocket webSocket, + IReadOnlyDictionary claims, + CancellationToken cancellationToken); +} + +public sealed class QueryExecutor : IQueryExecutor +{ + private readonly ITransportClientFactory _transportFactory; + private readonly IPayloadSerializer _serializer; + private readonly ILogger _logger; + + public QueryExecutor( + ITransportClientFactory transportFactory, + IPayloadSerializer serializer, + ILogger logger) + { + _transportFactory = transportFactory; + _serializer = serializer; + _logger = logger; + } + + public async Task ExecuteAsync( + QueryPlan plan, + GraphQLRequest request, + IReadOnlyDictionary claims, + TimeSpan timeout, + CancellationToken cancellationToken) + { + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(timeout); + + var results = new ConcurrentDictionary(); + var errors = new ConcurrentBag(); + + // Execute nodes respecting dependencies + await ExecuteNodesAsync(plan.Nodes, request, claims, results, errors, cts.Token); + + // Merge results + var data = MergeResults(plan.Nodes, results); + + return new GraphQLResponse + { + Data = data, + Errors = errors.Any() ? errors.ToList() : null + }; + } + + private async Task ExecuteNodesAsync( + List nodes, + GraphQLRequest request, + IReadOnlyDictionary claims, + ConcurrentDictionary results, + ConcurrentBag errors, + CancellationToken cancellationToken) + { + // Group nodes by dependency level + var executed = new HashSet(); + + while (executed.Count < nodes.Count) + { + var ready = nodes + .Where(n => !executed.Contains(n)) + .Where(n => n.DependsOn.All(d => executed.Contains(d))) + .ToList(); + + if (ready.Count == 0) + { + throw new GraphQLExecutionException("Circular dependency in query plan"); + } + + // Execute ready nodes in parallel + await Parallel.ForEachAsync(ready, cancellationToken, async (node, ct) => + { + try + { + var result = await ExecuteNodeAsync(node, request, claims, ct); + MergeNodeResult(results, result); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error executing node for service {Service}", node.ServiceName); + errors.Add(new GraphQLError + { + Message = $"Error from {node.ServiceName}: {ex.Message}", + Path = node.RequiredFields.Cast().ToList() + }); + } + }); + + foreach (var node in ready) + { + executed.Add(node); + } + } + } + + private async Task ExecuteNodeAsync( + QueryPlanNode node, + GraphQLRequest request, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + var client = _transportFactory.GetClient(node.ServiceName); + + var payload = new RequestPayload + { + Method = "POST", + Path = "/graphql", + Headers = new Dictionary + { + ["Content-Type"] = "application/json" + }, + Claims = claims.ToDictionary(x => x.Key, x => x.Value), + Body = JsonSerializer.SerializeToUtf8Bytes(new + { + query = node.SubQuery, + variables = request.Variables, + operationName = request.OperationName + }) + }; + + var response = await client.SendRequestAsync( + node.ServiceName, + payload, + TimeSpan.FromSeconds(30), + cancellationToken); + + if (response.Body == null) + { + throw new GraphQLExecutionException($"Empty response from {node.ServiceName}"); + } + + return JsonSerializer.Deserialize(response.Body) + ?? throw new GraphQLExecutionException($"Invalid response from {node.ServiceName}"); + } + + private void MergeNodeResult(ConcurrentDictionary results, GraphQLResponse response) + { + if (response.Data is JsonElement element && element.ValueKind == JsonValueKind.Object) + { + foreach (var property in element.EnumerateObject()) + { + results[property.Name] = property.Value.Clone(); + } + } + } + + private object? MergeResults(List nodes, ConcurrentDictionary results) + { + return results.ToDictionary(x => x.Key, x => x.Value); + } + + public Task ExecuteIntrospectionAsync( + GraphQLSchema schema, + GraphQLRequest request, + CancellationToken cancellationToken) + { + // Execute introspection against merged schema + var result = schema.ExecuteIntrospection(request); + return Task.FromResult(result); + } + + public async Task HandleSubscriptionAsync( + WebSocket webSocket, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + var buffer = new byte[4096]; + + try + { + while (webSocket.State == WebSocketState.Open && !cancellationToken.IsCancellationRequested) + { + var result = await webSocket.ReceiveAsync(buffer, cancellationToken); + + if (result.MessageType == WebSocketMessageType.Close) + { + await webSocket.CloseAsync( + WebSocketCloseStatus.NormalClosure, + "Closed by client", + cancellationToken); + break; + } + + var message = Encoding.UTF8.GetString(buffer, 0, result.Count); + await HandleSubscriptionMessageAsync(webSocket, message, claims, cancellationToken); + } + } + catch (WebSocketException ex) + { + _logger.LogWarning(ex, "WebSocket error in subscription"); + } + } + + private async Task HandleSubscriptionMessageAsync( + WebSocket webSocket, + string message, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + // Implement graphql-transport-ws protocol + var msg = JsonSerializer.Deserialize(message); + + switch (msg?.Type) + { + case "connection_init": + await SendAsync(webSocket, new { type = "connection_ack" }, cancellationToken); + break; + + case "subscribe": + // Start subscription + break; + + case "complete": + // End subscription + break; + } + } + + private async Task SendAsync(WebSocket webSocket, object message, CancellationToken cancellationToken) + { + var bytes = JsonSerializer.SerializeToUtf8Bytes(message); + await webSocket.SendAsync(bytes, WebSocketMessageType.Text, true, cancellationToken); + } +} + +internal class SubscriptionMessage +{ + public string? Type { get; set; } + public string? Id { get; set; } + public GraphQLRequest? Payload { get; set; } +} +``` + +--- + +## Schema Registry + +```csharp +namespace StellaOps.Router.Handlers.GraphQL; + +public interface ISchemaRegistry +{ + Task GetMergedSchemaAsync(CancellationToken cancellationToken); + void InvalidateCache(); +} + +public sealed class SchemaRegistry : ISchemaRegistry +{ + private readonly GraphQLHandlerConfig _config; + private readonly ITransportClientFactory _transportFactory; + private readonly ILogger _logger; + private GraphQLSchema? _cachedSchema; + private DateTimeOffset _cacheExpiry; + private readonly SemaphoreSlim _lock = new(1, 1); + + public SchemaRegistry( + IOptions config, + ITransportClientFactory transportFactory, + ILogger logger) + { + _config = config.Value; + _transportFactory = transportFactory; + _logger = logger; + } + + public async Task GetMergedSchemaAsync(CancellationToken cancellationToken) + { + if (_cachedSchema != null && DateTimeOffset.UtcNow < _cacheExpiry) + { + return _cachedSchema; + } + + await _lock.WaitAsync(cancellationToken); + try + { + if (_cachedSchema != null && DateTimeOffset.UtcNow < _cacheExpiry) + { + return _cachedSchema; + } + + var schemas = new List(); + + foreach (var (name, config) in _config.Services) + { + if (config.IsSchemaProvider) + { + var schema = await FetchSchemaAsync(config.ServiceName, cancellationToken); + schemas.Add(schema); + } + } + + _cachedSchema = MergeSchemas(schemas); + _cacheExpiry = DateTimeOffset.UtcNow.Add(_config.SchemaCacheDuration); + + _logger.LogInformation("Schema cache refreshed, expires at {Expiry}", _cacheExpiry); + + return _cachedSchema; + } + finally + { + _lock.Release(); + } + } + + private async Task FetchSchemaAsync(string serviceName, CancellationToken cancellationToken) + { + var client = _transportFactory.GetClient(serviceName); + + var introspectionQuery = @" + query IntrospectionQuery { + __schema { + types { ...FullType } + queryType { name } + mutationType { name } + subscriptionType { name } + } + } + fragment FullType on __Type { + kind name description + fields(includeDeprecated: true) { + name description + args { ...InputValue } + type { ...TypeRef } + isDeprecated deprecationReason + } + } + fragment InputValue on __InputValue { name description type { ...TypeRef } } + fragment TypeRef on __Type { + kind name + ofType { kind name ofType { kind name ofType { kind name } } } + }"; + + var payload = new RequestPayload + { + Method = "POST", + Path = "/graphql", + Headers = new Dictionary { ["Content-Type"] = "application/json" }, + Claims = new Dictionary(), + Body = JsonSerializer.SerializeToUtf8Bytes(new { query = introspectionQuery }) + }; + + var response = await client.SendRequestAsync( + serviceName, + payload, + TimeSpan.FromSeconds(30), + cancellationToken); + + return Encoding.UTF8.GetString(response.Body ?? Array.Empty()); + } + + private GraphQLSchema MergeSchemas(List schemas) + { + // Merge multiple introspection results into unified schema + return new GraphQLSchema(schemas); + } + + public void InvalidateCache() + { + _cachedSchema = null; + _cacheExpiry = DateTimeOffset.MinValue; + } +} +``` + +--- + +## YAML Configuration + +```yaml +GraphQL: + Path: "/graphql" + EnableIntrospection: true + EnableSubscriptions: true + MaxQueryDepth: 15 + MaxQueryComplexity: 1000 + ExecutionTimeout: "00:00:30" + SchemaCacheDuration: "00:05:00" + EnableBatching: true + MaxBatchSize: 10 + Services: + users: + ServiceName: "user-service" + RootTypes: + - Query + - Mutation + OwnedFields: + Query: + - user + - users + - me + Mutation: + - createUser + - updateUser + IsSchemaProvider: true + billing: + ServiceName: "billing-service" + OwnedFields: + Query: + - invoices + - subscription + Mutation: + - createInvoice + IsSchemaProvider: true +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Handlers.GraphQL/GraphQLHandler.cs` +2. `StellaOps.Router.Handlers.GraphQL/GraphQLHandlerConfig.cs` +3. `StellaOps.Router.Handlers.GraphQL/IGraphQLParser.cs` +4. `StellaOps.Router.Handlers.GraphQL/IQueryPlanner.cs` +5. `StellaOps.Router.Handlers.GraphQL/QueryPlanner.cs` +6. `StellaOps.Router.Handlers.GraphQL/IQueryExecutor.cs` +7. `StellaOps.Router.Handlers.GraphQL/QueryExecutor.cs` +8. `StellaOps.Router.Handlers.GraphQL/ISchemaRegistry.cs` +9. `StellaOps.Router.Handlers.GraphQL/SchemaRegistry.cs` +10. Unit tests for query planning +11. Integration tests for federated execution +12. Subscription handling tests + +--- + +## Next Step + +Proceed to [Step 17: S3/Storage Handler Implementation](17-Step.md) to implement the storage route handler. diff --git a/docs/router/17-Step.md b/docs/router/17-Step.md new file mode 100644 index 000000000..b5f6ff1bd --- /dev/null +++ b/docs/router/17-Step.md @@ -0,0 +1,903 @@ +# Step 17: S3/Storage Handler Implementation + +**Phase 4: Handler Plugins** +**Estimated Complexity:** Medium +**Dependencies:** Step 10 (Microservice Handler) + +--- + +## Overview + +The S3/Storage handler routes file operations to object storage backends (S3, MinIO, Azure Blob, GCS). It handles presigned URL generation, multipart uploads, streaming downloads, and integrates with claim-based access control. + +--- + +## Goals + +1. Route file operations to appropriate storage backends +2. Generate presigned URLs for direct client uploads/downloads +3. Support multipart uploads for large files +4. Stream files without buffering in gateway +5. Enforce claim-based access control on storage operations + +--- + +## Core Architecture + +``` +┌────────────────────────────────────────────────────────────────┐ +│ Storage Handler │ +├────────────────────────────────────────────────────────────────┤ +│ │ +│ HTTP Request │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ ┌─────────────────────┐ │ +│ │ Path Resolver │───►│ Bucket/Key Mapping │ │ +│ └───────┬───────┘ └─────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ ┌─────────────────────┐ │ +│ │Access Control │───►│ Claim-Based Policy │ │ +│ └───────┬───────┘ └─────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────────────────────┐ │ +│ │ Storage Backend │ │ +│ │ ┌─────┐ ┌───────┐ ┌──────┐ ┌─────┐ │ │ +│ │ │ S3 │ │ MinIO │ │Azure │ │ GCS │ │ │ +│ │ └─────┘ └───────┘ └──────┘ └─────┘ │ │ +│ └───────────────────────────────────────────┘ │ +│ │ +└────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Configuration + +```csharp +namespace StellaOps.Router.Handlers.Storage; + +public class StorageHandlerConfig +{ + /// Path prefix for storage routes. + public string PathPrefix { get; set; } = "/files"; + + /// Default storage backend. + public string DefaultBackend { get; set; } = "s3"; + + /// Maximum upload size (bytes). + public long MaxUploadSize { get; set; } = 5L * 1024 * 1024 * 1024; // 5GB + + /// Multipart threshold (bytes). + public long MultipartThreshold { get; set; } = 100 * 1024 * 1024; // 100MB + + /// Presigned URL expiration. + public TimeSpan PresignedUrlExpiration { get; set; } = TimeSpan.FromHours(1); + + /// Whether to use presigned URLs for uploads. + public bool UsePresignedUploads { get; set; } = true; + + /// Whether to use presigned URLs for downloads. + public bool UsePresignedDownloads { get; set; } = true; + + /// Storage backends configuration. + public Dictionary Backends { get; set; } = new(); + + /// Bucket mappings (path pattern to bucket). + public List BucketMappings { get; set; } = new(); +} + +public class StorageBackendConfig +{ + public string Type { get; set; } = "S3"; // S3, Azure, GCS + public string Endpoint { get; set; } = ""; + public string Region { get; set; } = "us-east-1"; + public string AccessKey { get; set; } = ""; + public string SecretKey { get; set; } = ""; + public bool UsePathStyle { get; set; } = false; + public bool UseSsl { get; set; } = true; +} + +public class BucketMapping +{ + public string PathPattern { get; set; } = ""; + public string Bucket { get; set; } = ""; + public string? KeyPrefix { get; set; } + public string Backend { get; set; } = "default"; + public StorageAccessPolicy Policy { get; set; } = new(); +} + +public class StorageAccessPolicy +{ + public bool RequireAuthentication { get; set; } = true; + public List AllowedClaims { get; set; } = new(); + public string? OwnerClaimPath { get; set; } + public bool EnforceOwnership { get; set; } = false; +} +``` + +--- + +## Storage Handler Implementation + +```csharp +namespace StellaOps.Router.Handlers.Storage; + +public sealed class StorageHandler : IRouteHandler +{ + public string HandlerType => "Storage"; + public int Priority => 90; + + private readonly StorageHandlerConfig _config; + private readonly IStorageBackendFactory _backendFactory; + private readonly IAccessControlEvaluator _accessControl; + private readonly ILogger _logger; + + public StorageHandler( + IOptions config, + IStorageBackendFactory backendFactory, + IAccessControlEvaluator accessControl, + ILogger logger) + { + _config = config.Value; + _backendFactory = backendFactory; + _accessControl = accessControl; + _logger = logger; + } + + public bool CanHandle(RouteMatchResult match) + { + return match.Handler == "Storage" || + match.Route.Path.StartsWith(_config.PathPrefix, StringComparison.OrdinalIgnoreCase); + } + + public async Task HandleAsync( + HttpContext context, + RouteMatchResult match, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + try + { + // Resolve storage location + var location = ResolveLocation(context.Request.Path, context.Request.Query); + + // Check access + var accessResult = _accessControl.Evaluate(location, claims, context.Request.Method); + if (!accessResult.Allowed) + { + return new RouteHandlerResult + { + Handled = true, + StatusCode = 403, + Body = Encoding.UTF8.GetBytes(accessResult.Reason ?? "Access denied") + }; + } + + // Get backend + var backend = _backendFactory.GetBackend(location.Backend); + + return context.Request.Method.ToUpper() switch + { + "GET" => await HandleGetAsync(context, backend, location, cancellationToken), + "HEAD" => await HandleHeadAsync(context, backend, location, cancellationToken), + "PUT" => await HandlePutAsync(context, backend, location, claims, cancellationToken), + "POST" => await HandlePostAsync(context, backend, location, claims, cancellationToken), + "DELETE" => await HandleDeleteAsync(context, backend, location, cancellationToken), + _ => new RouteHandlerResult { Handled = true, StatusCode = 405 } + }; + } + catch (StorageNotFoundException) + { + return new RouteHandlerResult { Handled = true, StatusCode = 404 }; + } + catch (Exception ex) + { + _logger.LogError(ex, "Storage operation error"); + return new RouteHandlerResult + { + Handled = true, + StatusCode = 500, + Body = Encoding.UTF8.GetBytes("Storage operation failed") + }; + } + } + + private StorageLocation ResolveLocation(PathString path, IQueryCollection query) + { + var relativePath = path.Value?.Substring(_config.PathPrefix.Length).TrimStart('/') ?? ""; + + foreach (var mapping in _config.BucketMappings) + { + if (IsMatch(relativePath, mapping.PathPattern)) + { + var key = ExtractKey(relativePath, mapping); + return new StorageLocation + { + Backend = mapping.Backend, + Bucket = mapping.Bucket, + Key = key, + Policy = mapping.Policy + }; + } + } + + // Default: first segment is bucket, rest is key + var segments = relativePath.Split('/', 2); + return new StorageLocation + { + Backend = _config.DefaultBackend, + Bucket = segments[0], + Key = segments.Length > 1 ? segments[1] : "" + }; + } + + private bool IsMatch(string path, string pattern) + { + var regex = new Regex("^" + Regex.Escape(pattern).Replace("\\*", ".*") + "$"); + return regex.IsMatch(path); + } + + private string ExtractKey(string path, BucketMapping mapping) + { + var key = path; + if (!string.IsNullOrEmpty(mapping.KeyPrefix)) + { + key = mapping.KeyPrefix.TrimEnd('/') + "/" + key; + } + return key; + } + + private async Task HandleGetAsync( + HttpContext context, + IStorageBackend backend, + StorageLocation location, + CancellationToken cancellationToken) + { + // Check for presigned download + if (_config.UsePresignedDownloads && !IsRangeRequest(context.Request)) + { + var presignedUrl = await backend.GetPresignedDownloadUrlAsync( + location.Bucket, + location.Key, + _config.PresignedUrlExpiration, + cancellationToken); + + return new RouteHandlerResult + { + Handled = true, + StatusCode = 307, // Temporary Redirect + Headers = new Dictionary + { + ["Location"] = presignedUrl, + ["Cache-Control"] = "no-store" + } + }; + } + + // Stream directly + var metadata = await backend.GetObjectMetadataAsync(location.Bucket, location.Key, cancellationToken); + var stream = await backend.GetObjectStreamAsync(location.Bucket, location.Key, cancellationToken); + + context.Response.StatusCode = 200; + context.Response.ContentType = metadata.ContentType; + context.Response.ContentLength = metadata.ContentLength; + + if (!string.IsNullOrEmpty(metadata.ETag)) + { + context.Response.Headers["ETag"] = metadata.ETag; + } + + await stream.CopyToAsync(context.Response.Body, cancellationToken); + + return new RouteHandlerResult { Handled = true, StatusCode = 200 }; + } + + private bool IsRangeRequest(HttpRequest request) + { + return request.Headers.ContainsKey("Range"); + } + + private async Task HandleHeadAsync( + HttpContext context, + IStorageBackend backend, + StorageLocation location, + CancellationToken cancellationToken) + { + var metadata = await backend.GetObjectMetadataAsync(location.Bucket, location.Key, cancellationToken); + + return new RouteHandlerResult + { + Handled = true, + StatusCode = 200, + Headers = new Dictionary + { + ["Content-Type"] = metadata.ContentType, + ["Content-Length"] = metadata.ContentLength.ToString(), + ["ETag"] = metadata.ETag ?? "", + ["Last-Modified"] = metadata.LastModified.ToString("R") + } + }; + } + + private async Task HandlePutAsync( + HttpContext context, + IStorageBackend backend, + StorageLocation location, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + var contentLength = context.Request.ContentLength ?? 0; + + // Validate size + if (contentLength > _config.MaxUploadSize) + { + return new RouteHandlerResult + { + Handled = true, + StatusCode = 413, + Body = Encoding.UTF8.GetBytes($"File too large. Max size: {_config.MaxUploadSize}") + }; + } + + // Use presigned upload for large files + if (_config.UsePresignedUploads && contentLength > _config.MultipartThreshold) + { + var uploadInfo = await backend.InitiateMultipartUploadAsync( + location.Bucket, + location.Key, + context.Request.ContentType ?? "application/octet-stream", + cancellationToken); + + return new RouteHandlerResult + { + Handled = true, + StatusCode = 200, + ContentType = "application/json", + Body = JsonSerializer.SerializeToUtf8Bytes(new + { + uploadId = uploadInfo.UploadId, + parts = uploadInfo.PresignedPartUrls + }) + }; + } + + // Direct upload + var contentType = context.Request.ContentType ?? "application/octet-stream"; + var metadata = new Dictionary(); + + // Add owner metadata if enforced + if (location.Policy?.EnforceOwnership == true && location.Policy.OwnerClaimPath != null) + { + if (claims.TryGetValue(location.Policy.OwnerClaimPath, out var owner)) + { + metadata["x-owner"] = owner; + } + } + + await backend.PutObjectAsync( + location.Bucket, + location.Key, + context.Request.Body, + contentLength, + contentType, + metadata, + cancellationToken); + + return new RouteHandlerResult + { + Handled = true, + StatusCode = 201, + Headers = new Dictionary + { + ["Location"] = $"{_config.PathPrefix}/{location.Bucket}/{location.Key}" + } + }; + } + + private async Task HandlePostAsync( + HttpContext context, + IStorageBackend backend, + StorageLocation location, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + var action = context.Request.Query["action"].ToString(); + + return action switch + { + "presign" => await HandlePresignRequestAsync(context, backend, location, cancellationToken), + "complete" => await HandleCompleteMultipartAsync(context, backend, location, cancellationToken), + "abort" => await HandleAbortMultipartAsync(context, backend, location, cancellationToken), + _ => await HandlePutAsync(context, backend, location, claims, cancellationToken) + }; + } + + private async Task HandlePresignRequestAsync( + HttpContext context, + IStorageBackend backend, + StorageLocation location, + CancellationToken cancellationToken) + { + var method = context.Request.Query["method"].ToString().ToUpper(); + var expiration = _config.PresignedUrlExpiration; + + string presignedUrl; + if (method == "PUT") + { + var contentType = context.Request.Query["contentType"].ToString(); + presignedUrl = await backend.GetPresignedUploadUrlAsync( + location.Bucket, + location.Key, + contentType, + expiration, + cancellationToken); + } + else + { + presignedUrl = await backend.GetPresignedDownloadUrlAsync( + location.Bucket, + location.Key, + expiration, + cancellationToken); + } + + return new RouteHandlerResult + { + Handled = true, + StatusCode = 200, + ContentType = "application/json", + Body = JsonSerializer.SerializeToUtf8Bytes(new + { + url = presignedUrl, + expiresAt = DateTimeOffset.UtcNow.Add(expiration) + }) + }; + } + + private async Task HandleCompleteMultipartAsync( + HttpContext context, + IStorageBackend backend, + StorageLocation location, + CancellationToken cancellationToken) + { + var body = await JsonSerializer.DeserializeAsync( + context.Request.Body, + cancellationToken: cancellationToken); + + if (body == null) + { + return new RouteHandlerResult { Handled = true, StatusCode = 400 }; + } + + await backend.CompleteMultipartUploadAsync( + location.Bucket, + location.Key, + body.UploadId, + body.Parts, + cancellationToken); + + return new RouteHandlerResult { Handled = true, StatusCode = 200 }; + } + + private async Task HandleAbortMultipartAsync( + HttpContext context, + IStorageBackend backend, + StorageLocation location, + CancellationToken cancellationToken) + { + var uploadId = context.Request.Query["uploadId"].ToString(); + + await backend.AbortMultipartUploadAsync( + location.Bucket, + location.Key, + uploadId, + cancellationToken); + + return new RouteHandlerResult { Handled = true, StatusCode = 204 }; + } + + private async Task HandleDeleteAsync( + HttpContext context, + IStorageBackend backend, + StorageLocation location, + CancellationToken cancellationToken) + { + await backend.DeleteObjectAsync(location.Bucket, location.Key, cancellationToken); + return new RouteHandlerResult { Handled = true, StatusCode = 204 }; + } +} + +internal class CompleteMultipartRequest +{ + public string UploadId { get; set; } = ""; + public List Parts { get; set; } = new(); +} + +internal class StorageLocation +{ + public string Backend { get; set; } = ""; + public string Bucket { get; set; } = ""; + public string Key { get; set; } = ""; + public StorageAccessPolicy? Policy { get; set; } +} +``` + +--- + +## Storage Backend Interface + +```csharp +namespace StellaOps.Router.Handlers.Storage; + +public interface IStorageBackend +{ + Task GetObjectMetadataAsync( + string bucket, string key, CancellationToken cancellationToken); + + Task GetObjectStreamAsync( + string bucket, string key, CancellationToken cancellationToken); + + Task PutObjectAsync( + string bucket, string key, Stream content, long contentLength, + string contentType, Dictionary? metadata, + CancellationToken cancellationToken); + + Task DeleteObjectAsync( + string bucket, string key, CancellationToken cancellationToken); + + Task GetPresignedDownloadUrlAsync( + string bucket, string key, TimeSpan expiration, + CancellationToken cancellationToken); + + Task GetPresignedUploadUrlAsync( + string bucket, string key, string contentType, TimeSpan expiration, + CancellationToken cancellationToken); + + Task InitiateMultipartUploadAsync( + string bucket, string key, string contentType, + CancellationToken cancellationToken); + + Task CompleteMultipartUploadAsync( + string bucket, string key, string uploadId, List parts, + CancellationToken cancellationToken); + + Task AbortMultipartUploadAsync( + string bucket, string key, string uploadId, + CancellationToken cancellationToken); +} + +public class ObjectMetadata +{ + public string ContentType { get; set; } = "application/octet-stream"; + public long ContentLength { get; set; } + public string? ETag { get; set; } + public DateTimeOffset LastModified { get; set; } + public Dictionary CustomMetadata { get; set; } = new(); +} + +public class MultipartUploadInfo +{ + public string UploadId { get; set; } = ""; + public List PresignedPartUrls { get; set; } = new(); +} + +public class PresignedPartUrl +{ + public int PartNumber { get; set; } + public string Url { get; set; } = ""; +} + +public class UploadPart +{ + public int PartNumber { get; set; } + public string ETag { get; set; } = ""; +} +``` + +--- + +## S3 Backend Implementation + +```csharp +namespace StellaOps.Router.Handlers.Storage; + +public sealed class S3StorageBackend : IStorageBackend +{ + private readonly IAmazonS3 _client; + private readonly ILogger _logger; + + public S3StorageBackend(IAmazonS3 client, ILogger logger) + { + _client = client; + _logger = logger; + } + + public async Task GetObjectMetadataAsync( + string bucket, string key, CancellationToken cancellationToken) + { + var response = await _client.GetObjectMetadataAsync(bucket, key, cancellationToken); + + return new ObjectMetadata + { + ContentType = response.Headers.ContentType, + ContentLength = response.ContentLength, + ETag = response.ETag, + LastModified = response.LastModified, + CustomMetadata = response.Metadata.Keys + .ToDictionary(k => k, k => response.Metadata[k]) + }; + } + + public async Task GetObjectStreamAsync( + string bucket, string key, CancellationToken cancellationToken) + { + var response = await _client.GetObjectAsync(bucket, key, cancellationToken); + return response.ResponseStream; + } + + public async Task PutObjectAsync( + string bucket, string key, Stream content, long contentLength, + string contentType, Dictionary? metadata, + CancellationToken cancellationToken) + { + var request = new PutObjectRequest + { + BucketName = bucket, + Key = key, + InputStream = content, + ContentType = contentType + }; + + if (metadata != null) + { + foreach (var (k, v) in metadata) + { + request.Metadata.Add(k, v); + } + } + + await _client.PutObjectAsync(request, cancellationToken); + } + + public async Task DeleteObjectAsync( + string bucket, string key, CancellationToken cancellationToken) + { + await _client.DeleteObjectAsync(bucket, key, cancellationToken); + } + + public Task GetPresignedDownloadUrlAsync( + string bucket, string key, TimeSpan expiration, + CancellationToken cancellationToken) + { + var request = new GetPreSignedUrlRequest + { + BucketName = bucket, + Key = key, + Expires = DateTime.UtcNow.Add(expiration), + Verb = HttpVerb.GET + }; + + var url = _client.GetPreSignedURL(request); + return Task.FromResult(url); + } + + public Task GetPresignedUploadUrlAsync( + string bucket, string key, string contentType, TimeSpan expiration, + CancellationToken cancellationToken) + { + var request = new GetPreSignedUrlRequest + { + BucketName = bucket, + Key = key, + Expires = DateTime.UtcNow.Add(expiration), + Verb = HttpVerb.PUT, + ContentType = contentType + }; + + var url = _client.GetPreSignedURL(request); + return Task.FromResult(url); + } + + public async Task InitiateMultipartUploadAsync( + string bucket, string key, string contentType, + CancellationToken cancellationToken) + { + var initResponse = await _client.InitiateMultipartUploadAsync( + bucket, key, cancellationToken); + + // Generate presigned URLs for parts (assuming 100MB parts, 50 parts max) + var partUrls = new List(); + for (int i = 1; i <= 50; i++) + { + var url = _client.GetPreSignedURL(new GetPreSignedUrlRequest + { + BucketName = bucket, + Key = key, + Expires = DateTime.UtcNow.AddHours(24), + Verb = HttpVerb.PUT, + UploadId = initResponse.UploadId, + PartNumber = i + }); + + partUrls.Add(new PresignedPartUrl { PartNumber = i, Url = url }); + } + + return new MultipartUploadInfo + { + UploadId = initResponse.UploadId, + PresignedPartUrls = partUrls + }; + } + + public async Task CompleteMultipartUploadAsync( + string bucket, string key, string uploadId, List parts, + CancellationToken cancellationToken) + { + var request = new CompleteMultipartUploadRequest + { + BucketName = bucket, + Key = key, + UploadId = uploadId, + PartETags = parts.Select(p => new PartETag(p.PartNumber, p.ETag)).ToList() + }; + + await _client.CompleteMultipartUploadAsync(request, cancellationToken); + } + + public async Task AbortMultipartUploadAsync( + string bucket, string key, string uploadId, + CancellationToken cancellationToken) + { + await _client.AbortMultipartUploadAsync(bucket, key, uploadId, cancellationToken); + } +} +``` + +--- + +## Access Control Evaluator + +```csharp +namespace StellaOps.Router.Handlers.Storage; + +public interface IAccessControlEvaluator +{ + AccessResult Evaluate( + StorageLocation location, + IReadOnlyDictionary claims, + string httpMethod); +} + +public class AccessResult +{ + public bool Allowed { get; set; } + public string? Reason { get; set; } +} + +public sealed class ClaimBasedAccessControlEvaluator : IAccessControlEvaluator +{ + public AccessResult Evaluate( + StorageLocation location, + IReadOnlyDictionary claims, + string httpMethod) + { + var policy = location.Policy ?? new StorageAccessPolicy(); + + // Check authentication requirement + if (policy.RequireAuthentication && !claims.Any()) + { + return new AccessResult { Allowed = false, Reason = "Authentication required" }; + } + + // Check allowed claims + if (policy.AllowedClaims.Any()) + { + var hasRequiredClaim = policy.AllowedClaims.Any(c => + { + var parts = c.Split('=', 2); + if (parts.Length == 2) + { + return claims.TryGetValue(parts[0], out var value) && value == parts[1]; + } + return claims.ContainsKey(c); + }); + + if (!hasRequiredClaim) + { + return new AccessResult { Allowed = false, Reason = "Required claim not present" }; + } + } + + // Check ownership for write operations + if (policy.EnforceOwnership && IsWriteOperation(httpMethod)) + { + if (string.IsNullOrEmpty(policy.OwnerClaimPath)) + { + return new AccessResult { Allowed = false, Reason = "Owner claim path not configured" }; + } + + if (!claims.ContainsKey(policy.OwnerClaimPath)) + { + return new AccessResult { Allowed = false, Reason = "Owner claim required" }; + } + } + + return new AccessResult { Allowed = true }; + } + + private bool IsWriteOperation(string method) + { + return method.ToUpper() is "PUT" or "POST" or "DELETE" or "PATCH"; + } +} +``` + +--- + +## YAML Configuration + +```yaml +Storage: + PathPrefix: "/files" + DefaultBackend: "s3" + MaxUploadSize: 5368709120 # 5GB + MultipartThreshold: 104857600 # 100MB + PresignedUrlExpiration: "01:00:00" + UsePresignedUploads: true + UsePresignedDownloads: true + + Backends: + s3: + Type: "S3" + Endpoint: "https://s3.amazonaws.com" + Region: "us-east-1" + AccessKey: "${AWS_ACCESS_KEY}" + SecretKey: "${AWS_SECRET_KEY}" + minio: + Type: "S3" + Endpoint: "https://minio.internal:9000" + Region: "us-east-1" + AccessKey: "${MINIO_ACCESS_KEY}" + SecretKey: "${MINIO_SECRET_KEY}" + UsePathStyle: true + + BucketMappings: + - PathPattern: "uploads/*" + Bucket: "user-uploads" + KeyPrefix: "files/" + Backend: "s3" + Policy: + RequireAuthentication: true + EnforceOwnership: true + OwnerClaimPath: "sub" + + - PathPattern: "public/*" + Bucket: "public-assets" + Backend: "s3" + Policy: + RequireAuthentication: false +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Handlers.Storage/StorageHandler.cs` +2. `StellaOps.Router.Handlers.Storage/StorageHandlerConfig.cs` +3. `StellaOps.Router.Handlers.Storage/IStorageBackend.cs` +4. `StellaOps.Router.Handlers.Storage/S3StorageBackend.cs` +5. `StellaOps.Router.Handlers.Storage/IAccessControlEvaluator.cs` +6. `StellaOps.Router.Handlers.Storage/ClaimBasedAccessControlEvaluator.cs` +7. `StellaOps.Router.Handlers.Storage/StorageBackendFactory.cs` +8. Presigned URL generation tests +9. Multipart upload tests +10. Access control tests + +--- + +## Next Step + +Proceed to [Step 18: Reverse Proxy Handler Implementation](18-Step.md) to implement direct reverse proxy routing. diff --git a/docs/router/18-Step.md b/docs/router/18-Step.md new file mode 100644 index 000000000..fa40e424d --- /dev/null +++ b/docs/router/18-Step.md @@ -0,0 +1,890 @@ +# Step 18: Reverse Proxy Handler Implementation + +**Phase 4: Handler Plugins** +**Estimated Complexity:** Medium +**Dependencies:** Step 10 (Microservice Handler) + +--- + +## Overview + +The Reverse Proxy handler forwards requests to external HTTP services without using the internal transport protocol. It's used for legacy services, third-party APIs, and services that can't be modified to use the Stella transport layer. + +--- + +## Goals + +1. Forward HTTP requests to configurable upstream servers +2. Support connection pooling and HTTP/2 multiplexing +3. Handle request/response transformation +4. Support health checks and circuit breaking +5. Maintain correlation IDs for tracing + +--- + +## Core Architecture + +``` +┌────────────────────────────────────────────────────────────────┐ +│ Reverse Proxy Handler │ +├────────────────────────────────────────────────────────────────┤ +│ │ +│ Incoming Request │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ ┌─────────────────────┐ │ +│ │Path Rewriter │───►│ URL Transformation │ │ +│ └───────┬───────┘ └─────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ ┌─────────────────────┐ │ +│ │ Header Filter │───►│ Add/Remove Headers │ │ +│ └───────┬───────┘ └─────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────┐ ┌─────────────────────┐ │ +│ │ Load Balancer │───►│ Round Robin/Weighted │ │ +│ └───────┬───────┘ └─────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────────────────────┐ │ +│ │ HttpClient Pool │ │ +│ │ (Connection pooling, HTTP/2, retries) │ │ +│ └───────────────────────────────────────────┘ │ +│ │ +└────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Configuration + +```csharp +namespace StellaOps.Router.Handlers.ReverseProxy; + +public class ReverseProxyConfig +{ + /// Upstream definitions by name. + public Dictionary Upstreams { get; set; } = new(); + + /// Route-to-upstream mappings. + public List Routes { get; set; } = new(); + + /// Default timeout for upstream requests. + public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromSeconds(30); + + /// Whether to forward X-Forwarded-* headers. + public bool AddForwardedHeaders { get; set; } = true; + + /// Whether to preserve host header. + public bool PreserveHost { get; set; } = false; + + /// Connection pool settings. + public ConnectionPoolConfig ConnectionPool { get; set; } = new(); +} + +public class UpstreamConfig +{ + /// Upstream server addresses. + public List Servers { get; set; } = new(); + + /// Load balancing strategy. + public LoadBalanceStrategy LoadBalance { get; set; } = LoadBalanceStrategy.RoundRobin; + + /// Health check configuration. + public HealthCheckConfig? HealthCheck { get; set; } + + /// Circuit breaker configuration. + public CircuitBreakerConfig? CircuitBreaker { get; set; } + + /// Retry configuration. + public RetryConfig? Retry { get; set; } +} + +public class UpstreamServer +{ + public string Address { get; set; } = ""; + public int Weight { get; set; } = 1; + public bool Backup { get; set; } = false; +} + +public class ProxyRoute +{ + /// Path pattern to match. + public string PathPattern { get; set; } = ""; + + /// Target upstream name. + public string Upstream { get; set; } = ""; + + /// Path rewrite rule. + public PathRewriteRule? Rewrite { get; set; } + + /// Header transformations. + public HeaderTransformConfig? Headers { get; set; } + + /// Timeout override. + public TimeSpan? Timeout { get; set; } + + /// Required claims for access. + public List? RequiredClaims { get; set; } +} + +public class PathRewriteRule +{ + public string Pattern { get; set; } = ""; + public string Replacement { get; set; } = ""; +} + +public class HeaderTransformConfig +{ + public Dictionary Add { get; set; } = new(); + public List Remove { get; set; } = new(); + public Dictionary Set { get; set; } = new(); + public bool ForwardClaims { get; set; } = false; + public string ClaimsHeaderPrefix { get; set; } = "X-Claim-"; +} + +public class HealthCheckConfig +{ + public string Path { get; set; } = "/health"; + public TimeSpan Interval { get; set; } = TimeSpan.FromSeconds(10); + public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(5); + public int UnhealthyThreshold { get; set; } = 3; + public int HealthyThreshold { get; set; } = 2; +} + +public class CircuitBreakerConfig +{ + public int FailureThreshold { get; set; } = 5; + public TimeSpan SamplingDuration { get; set; } = TimeSpan.FromSeconds(30); + public TimeSpan BreakDuration { get; set; } = TimeSpan.FromSeconds(30); + public double FailureRatioThreshold { get; set; } = 0.5; +} + +public class RetryConfig +{ + public int MaxRetries { get; set; } = 3; + public TimeSpan InitialDelay { get; set; } = TimeSpan.FromMilliseconds(100); + public double BackoffMultiplier { get; set; } = 2.0; + public List RetryableStatusCodes { get; set; } = new() { 502, 503, 504 }; +} + +public class ConnectionPoolConfig +{ + public int MaxConnectionsPerServer { get; set; } = 100; + public TimeSpan ConnectionIdleTimeout { get; set; } = TimeSpan.FromMinutes(2); + public bool EnableHttp2 { get; set; } = true; +} + +public enum LoadBalanceStrategy +{ + RoundRobin, + Random, + LeastConnections, + WeightedRoundRobin, + IPHash +} +``` + +--- + +## Reverse Proxy Handler Implementation + +```csharp +namespace StellaOps.Router.Handlers.ReverseProxy; + +public sealed class ReverseProxyHandler : IRouteHandler +{ + public string HandlerType => "ReverseProxy"; + public int Priority => 50; + + private readonly ReverseProxyConfig _config; + private readonly IUpstreamManager _upstreamManager; + private readonly IHttpClientFactory _httpClientFactory; + private readonly ILogger _logger; + + public ReverseProxyHandler( + IOptions config, + IUpstreamManager upstreamManager, + IHttpClientFactory httpClientFactory, + ILogger logger) + { + _config = config.Value; + _upstreamManager = upstreamManager; + _httpClientFactory = httpClientFactory; + _logger = logger; + } + + public bool CanHandle(RouteMatchResult match) + { + if (match.Handler == "ReverseProxy") + return true; + + return _config.Routes.Any(r => IsRouteMatch(match.Route.Path, r.PathPattern)); + } + + public async Task HandleAsync( + HttpContext context, + RouteMatchResult match, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + // Find matching route + var route = _config.Routes.FirstOrDefault(r => + IsRouteMatch(context.Request.Path, r.PathPattern)); + + if (route == null) + { + return new RouteHandlerResult { Handled = false }; + } + + // Check required claims + if (route.RequiredClaims?.Any() == true) + { + if (!route.RequiredClaims.All(c => claims.ContainsKey(c))) + { + return new RouteHandlerResult + { + Handled = true, + StatusCode = 403, + Body = Encoding.UTF8.GetBytes("Forbidden") + }; + } + } + + // Get upstream server + var server = await _upstreamManager.GetServerAsync(route.Upstream, context, cancellationToken); + if (server == null) + { + _logger.LogWarning("No healthy upstream for {Upstream}", route.Upstream); + return new RouteHandlerResult + { + Handled = true, + StatusCode = 503, + Body = Encoding.UTF8.GetBytes("Service unavailable") + }; + } + + try + { + return await ForwardRequestAsync(context, route, server, claims, cancellationToken); + } + catch (Exception ex) + { + _logger.LogError(ex, "Proxy error for {Upstream}", route.Upstream); + _upstreamManager.ReportFailure(route.Upstream, server.Address); + + return new RouteHandlerResult + { + Handled = true, + StatusCode = 502, + Body = Encoding.UTF8.GetBytes("Bad gateway") + }; + } + } + + private bool IsRouteMatch(string path, string pattern) + { + if (pattern.EndsWith("*")) + { + return path.StartsWith(pattern.TrimEnd('*'), StringComparison.OrdinalIgnoreCase); + } + return string.Equals(path, pattern, StringComparison.OrdinalIgnoreCase); + } + + private async Task ForwardRequestAsync( + HttpContext context, + ProxyRoute route, + UpstreamServer server, + IReadOnlyDictionary claims, + CancellationToken cancellationToken) + { + var request = context.Request; + + // Build upstream URL + var targetUri = BuildTargetUri(server.Address, request, route.Rewrite); + + // Create HTTP request + var httpRequest = new HttpRequestMessage + { + Method = new HttpMethod(request.Method), + RequestUri = targetUri + }; + + // Copy headers + CopyRequestHeaders(request, httpRequest, route.Headers, claims); + + // Add forwarded headers + if (_config.AddForwardedHeaders) + { + AddForwardedHeaders(context, httpRequest); + } + + // Copy body for non-GET/HEAD requests + if (!HttpMethods.IsGet(request.Method) && !HttpMethods.IsHead(request.Method)) + { + httpRequest.Content = new StreamContent(request.Body); + if (request.ContentType != null) + { + httpRequest.Content.Headers.ContentType = MediaTypeHeaderValue.Parse(request.ContentType); + } + } + + // Send request + var client = _httpClientFactory.CreateClient("proxy"); + var timeout = route.Timeout ?? _config.DefaultTimeout; + + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(timeout); + + var response = await client.SendAsync(httpRequest, HttpCompletionOption.ResponseHeadersRead, cts.Token); + + // Copy response + return await BuildResponseAsync(context, response, route.Headers, cancellationToken); + } + + private Uri BuildTargetUri(string serverAddress, HttpRequest request, PathRewriteRule? rewrite) + { + var path = request.Path.Value ?? "/"; + + if (rewrite != null) + { + path = Regex.Replace(path, rewrite.Pattern, rewrite.Replacement); + } + + var query = request.QueryString.Value ?? ""; + var baseUri = new Uri(serverAddress.TrimEnd('/')); + + return new Uri(baseUri, path + query); + } + + private void CopyRequestHeaders( + HttpRequest source, + HttpRequestMessage target, + HeaderTransformConfig? transform, + IReadOnlyDictionary claims) + { + // Skip hop-by-hop headers + var skipHeaders = new HashSet(StringComparer.OrdinalIgnoreCase) + { + "Connection", "Keep-Alive", "Proxy-Authenticate", "Proxy-Authorization", + "TE", "Trailer", "Transfer-Encoding", "Upgrade", "Host" + }; + + // Headers to remove + if (transform?.Remove != null) + { + foreach (var header in transform.Remove) + { + skipHeaders.Add(header); + } + } + + foreach (var header in source.Headers) + { + if (skipHeaders.Contains(header.Key)) + continue; + + target.Headers.TryAddWithoutValidation(header.Key, header.Value.ToArray()); + } + + // Add configured headers + if (transform?.Add != null) + { + foreach (var (key, value) in transform.Add) + { + target.Headers.TryAddWithoutValidation(key, value); + } + } + + // Set configured headers (overwrite) + if (transform?.Set != null) + { + foreach (var (key, value) in transform.Set) + { + target.Headers.Remove(key); + target.Headers.TryAddWithoutValidation(key, value); + } + } + + // Forward claims as headers + if (transform?.ForwardClaims == true) + { + var prefix = transform.ClaimsHeaderPrefix ?? "X-Claim-"; + foreach (var (key, value) in claims) + { + var headerName = prefix + key.Replace('/', '-').Replace(':', '-'); + target.Headers.TryAddWithoutValidation(headerName, value); + } + } + + // Preserve or set Host + if (_config.PreserveHost) + { + target.Headers.Host = source.Host.Value; + } + } + + private void AddForwardedHeaders(HttpContext context, HttpRequestMessage request) + { + var connection = context.Connection; + var httpRequest = context.Request; + + // X-Forwarded-For + var forwardedFor = httpRequest.Headers["X-Forwarded-For"].FirstOrDefault(); + var clientIp = connection.RemoteIpAddress?.ToString(); + if (!string.IsNullOrEmpty(clientIp)) + { + forwardedFor = string.IsNullOrEmpty(forwardedFor) + ? clientIp + : $"{forwardedFor}, {clientIp}"; + } + request.Headers.TryAddWithoutValidation("X-Forwarded-For", forwardedFor); + + // X-Forwarded-Proto + request.Headers.TryAddWithoutValidation("X-Forwarded-Proto", httpRequest.Scheme); + + // X-Forwarded-Host + request.Headers.TryAddWithoutValidation("X-Forwarded-Host", httpRequest.Host.Value); + + // X-Real-IP + if (connection.RemoteIpAddress != null) + { + request.Headers.TryAddWithoutValidation("X-Real-IP", connection.RemoteIpAddress.ToString()); + } + + // X-Request-ID (correlation) + request.Headers.TryAddWithoutValidation("X-Request-ID", context.TraceIdentifier); + } + + private async Task BuildResponseAsync( + HttpContext context, + HttpResponseMessage response, + HeaderTransformConfig? transform, + CancellationToken cancellationToken) + { + var httpResponse = context.Response; + + httpResponse.StatusCode = (int)response.StatusCode; + + // Copy response headers + var skipHeaders = new HashSet(StringComparer.OrdinalIgnoreCase) + { + "Transfer-Encoding", "Connection" + }; + + foreach (var header in response.Headers) + { + if (skipHeaders.Contains(header.Key)) + continue; + + httpResponse.Headers[header.Key] = header.Value.ToArray(); + } + + foreach (var header in response.Content.Headers) + { + if (skipHeaders.Contains(header.Key)) + continue; + + httpResponse.Headers[header.Key] = header.Value.ToArray(); + } + + // Stream response body + await response.Content.CopyToAsync(httpResponse.Body, cancellationToken); + + return new RouteHandlerResult + { + Handled = true, + StatusCode = (int)response.StatusCode + }; + } +} +``` + +--- + +## Upstream Manager + +```csharp +namespace StellaOps.Router.Handlers.ReverseProxy; + +public interface IUpstreamManager +{ + Task GetServerAsync( + string upstreamName, + HttpContext context, + CancellationToken cancellationToken); + + void ReportSuccess(string upstreamName, string serverAddress); + void ReportFailure(string upstreamName, string serverAddress); +} + +public sealed class UpstreamManager : IUpstreamManager, IHostedService +{ + private readonly ReverseProxyConfig _config; + private readonly ILogger _logger; + private readonly ConcurrentDictionary _serverStates = new(); + private readonly ConcurrentDictionary _roundRobinCounters = new(); + private Timer? _healthCheckTimer; + + public UpstreamManager( + IOptions config, + ILogger logger) + { + _config = config.Value; + _logger = logger; + + InitializeServerStates(); + } + + private void InitializeServerStates() + { + foreach (var (name, upstream) in _config.Upstreams) + { + foreach (var server in upstream.Servers) + { + var key = $"{name}:{server.Address}"; + _serverStates[key] = new ServerState + { + Address = server.Address, + Weight = server.Weight, + IsHealthy = true, + IsBackup = server.Backup + }; + } + } + } + + public Task GetServerAsync( + string upstreamName, + HttpContext context, + CancellationToken cancellationToken) + { + if (!_config.Upstreams.TryGetValue(upstreamName, out var upstream)) + { + return Task.FromResult(null); + } + + var healthyServers = upstream.Servers + .Where(s => IsServerHealthy(upstreamName, s.Address) && !s.Backup) + .ToList(); + + // Fall back to backup servers if no primary available + if (healthyServers.Count == 0) + { + healthyServers = upstream.Servers + .Where(s => IsServerHealthy(upstreamName, s.Address) && s.Backup) + .ToList(); + } + + if (healthyServers.Count == 0) + { + return Task.FromResult(null); + } + + var server = upstream.LoadBalance switch + { + LoadBalanceStrategy.RoundRobin => SelectRoundRobin(upstreamName, healthyServers), + LoadBalanceStrategy.Random => SelectRandom(healthyServers), + LoadBalanceStrategy.WeightedRoundRobin => SelectWeightedRoundRobin(upstreamName, healthyServers), + LoadBalanceStrategy.LeastConnections => SelectLeastConnections(upstreamName, healthyServers), + LoadBalanceStrategy.IPHash => SelectIPHash(context, healthyServers), + _ => healthyServers[0] + }; + + return Task.FromResult(server); + } + + private bool IsServerHealthy(string upstreamName, string address) + { + var key = $"{upstreamName}:{address}"; + return _serverStates.TryGetValue(key, out var state) && state.IsHealthy; + } + + private UpstreamServer SelectRoundRobin(string upstreamName, List servers) + { + var counter = _roundRobinCounters.AddOrUpdate(upstreamName, 0, (_, c) => c + 1); + return servers[counter % servers.Count]; + } + + private UpstreamServer SelectRandom(List servers) + { + return servers[Random.Shared.Next(servers.Count)]; + } + + private UpstreamServer SelectWeightedRoundRobin(string upstreamName, List servers) + { + var totalWeight = servers.Sum(s => s.Weight); + var counter = _roundRobinCounters.AddOrUpdate(upstreamName, 0, (_, c) => c + 1); + var position = counter % totalWeight; + + var cumulative = 0; + foreach (var server in servers) + { + cumulative += server.Weight; + if (position < cumulative) + return server; + } + + return servers[^1]; + } + + private UpstreamServer SelectLeastConnections(string upstreamName, List servers) + { + return servers + .OrderBy(s => + { + var key = $"{upstreamName}:{s.Address}"; + return _serverStates.TryGetValue(key, out var state) ? state.ActiveConnections : 0; + }) + .First(); + } + + private UpstreamServer SelectIPHash(HttpContext context, List servers) + { + var ip = context.Connection.RemoteIpAddress?.ToString() ?? "127.0.0.1"; + var hash = ip.GetHashCode(); + return servers[Math.Abs(hash) % servers.Count]; + } + + public void ReportSuccess(string upstreamName, string serverAddress) + { + var key = $"{upstreamName}:{serverAddress}"; + if (_serverStates.TryGetValue(key, out var state)) + { + state.ConsecutiveFailures = 0; + state.ConsecutiveSuccesses++; + + // Check circuit breaker reset + if (!state.IsHealthy && state.ConsecutiveSuccesses >= GetHealthyThreshold(upstreamName)) + { + state.IsHealthy = true; + _logger.LogInformation("Server {Server} marked healthy", serverAddress); + } + } + } + + public void ReportFailure(string upstreamName, string serverAddress) + { + var key = $"{upstreamName}:{serverAddress}"; + if (_serverStates.TryGetValue(key, out var state)) + { + state.ConsecutiveSuccesses = 0; + state.ConsecutiveFailures++; + + // Check circuit breaker trip + if (state.IsHealthy && state.ConsecutiveFailures >= GetUnhealthyThreshold(upstreamName)) + { + state.IsHealthy = false; + _logger.LogWarning("Server {Server} marked unhealthy after {Failures} failures", + serverAddress, state.ConsecutiveFailures); + } + } + } + + private int GetUnhealthyThreshold(string upstreamName) + { + return _config.Upstreams.TryGetValue(upstreamName, out var upstream) + ? upstream.HealthCheck?.UnhealthyThreshold ?? 3 + : 3; + } + + private int GetHealthyThreshold(string upstreamName) + { + return _config.Upstreams.TryGetValue(upstreamName, out var upstream) + ? upstream.HealthCheck?.HealthyThreshold ?? 2 + : 2; + } + + public Task StartAsync(CancellationToken cancellationToken) + { + _healthCheckTimer = new Timer(PerformHealthChecks, null, TimeSpan.Zero, TimeSpan.FromSeconds(10)); + return Task.CompletedTask; + } + + private async void PerformHealthChecks(object? state) + { + foreach (var (name, upstream) in _config.Upstreams) + { + if (upstream.HealthCheck == null) + continue; + + foreach (var server in upstream.Servers) + { + await CheckServerHealthAsync(name, server, upstream.HealthCheck); + } + } + } + + private async Task CheckServerHealthAsync( + string upstreamName, + UpstreamServer server, + HealthCheckConfig config) + { + try + { + using var client = new HttpClient { Timeout = config.Timeout }; + var uri = new Uri(new Uri(server.Address), config.Path); + var response = await client.GetAsync(uri); + + if (response.IsSuccessStatusCode) + { + ReportSuccess(upstreamName, server.Address); + } + else + { + ReportFailure(upstreamName, server.Address); + } + } + catch + { + ReportFailure(upstreamName, server.Address); + } + } + + public Task StopAsync(CancellationToken cancellationToken) + { + _healthCheckTimer?.Dispose(); + return Task.CompletedTask; + } +} + +internal class ServerState +{ + public string Address { get; set; } = ""; + public int Weight { get; set; } = 1; + public bool IsHealthy { get; set; } = true; + public bool IsBackup { get; set; } + public int ConsecutiveFailures { get; set; } + public int ConsecutiveSuccesses { get; set; } + public int ActiveConnections { get; set; } +} +``` + +--- + +## Service Registration + +```csharp +namespace StellaOps.Router.Handlers.ReverseProxy; + +public static class ReverseProxyExtensions +{ + public static IServiceCollection AddReverseProxyHandler( + this IServiceCollection services, + IConfiguration configuration) + { + services.Configure( + configuration.GetSection("ReverseProxy")); + + services.AddSingleton(); + services.AddHostedService(sp => (UpstreamManager)sp.GetRequiredService()); + + services.AddHttpClient("proxy", client => + { + client.DefaultRequestVersion = HttpVersion.Version20; + client.DefaultVersionPolicy = HttpVersionPolicy.RequestVersionOrLower; + }) + .ConfigurePrimaryHttpMessageHandler(() => new SocketsHttpHandler + { + PooledConnectionLifetime = TimeSpan.FromMinutes(5), + MaxConnectionsPerServer = 100, + EnableMultipleHttp2Connections = true + }); + + services.AddSingleton(); + + return services; + } +} +``` + +--- + +## YAML Configuration + +```yaml +ReverseProxy: + DefaultTimeout: "00:00:30" + AddForwardedHeaders: true + PreserveHost: false + + ConnectionPool: + MaxConnectionsPerServer: 100 + ConnectionIdleTimeout: "00:02:00" + EnableHttp2: true + + Upstreams: + legacy-api: + LoadBalance: RoundRobin + Servers: + - Address: "http://legacy-api-1:8080" + Weight: 2 + - Address: "http://legacy-api-2:8080" + Weight: 1 + - Address: "http://legacy-api-backup:8080" + Backup: true + HealthCheck: + Path: "/health" + Interval: "00:00:10" + Timeout: "00:00:05" + UnhealthyThreshold: 3 + HealthyThreshold: 2 + CircuitBreaker: + FailureThreshold: 5 + SamplingDuration: "00:00:30" + BreakDuration: "00:00:30" + Retry: + MaxRetries: 3 + InitialDelay: "00:00:00.100" + BackoffMultiplier: 2.0 + RetryableStatusCodes: [502, 503, 504] + + external-service: + LoadBalance: LeastConnections + Servers: + - Address: "https://api.external-service.com" + + Routes: + - PathPattern: "/legacy/*" + Upstream: "legacy-api" + Rewrite: + Pattern: "^/legacy" + Replacement: "/api/v1" + Headers: + Add: + X-Proxy-Source: "stella-router" + Remove: + - "X-Internal-Token" + ForwardClaims: true + ClaimsHeaderPrefix: "X-User-" + RequiredClaims: + - "sub" + + - PathPattern: "/external/*" + Upstream: "external-service" + Timeout: "00:01:00" + Headers: + Set: + Authorization: "Bearer ${EXTERNAL_API_KEY}" +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Handlers.ReverseProxy/ReverseProxyHandler.cs` +2. `StellaOps.Router.Handlers.ReverseProxy/ReverseProxyConfig.cs` +3. `StellaOps.Router.Handlers.ReverseProxy/IUpstreamManager.cs` +4. `StellaOps.Router.Handlers.ReverseProxy/UpstreamManager.cs` +5. `StellaOps.Router.Handlers.ReverseProxy/ReverseProxyExtensions.cs` +6. Load balancing strategy tests +7. Health check tests +8. Circuit breaker tests +9. Header transformation tests + +--- + +## Next Step + +Proceed to [Step 19: Additional Handler Plugins](19-Step.md) to implement static files and WebSocket handlers. diff --git a/docs/router/19-Step.md b/docs/router/19-Step.md new file mode 100644 index 000000000..15c51f101 --- /dev/null +++ b/docs/router/19-Step.md @@ -0,0 +1,714 @@ +# Step 19: Microservice Host Builder + +**Phase 5: Microservice SDK** +**Estimated Complexity:** High +**Dependencies:** Step 14 (TCP Transport), Step 15 (TLS Transport) + +--- + +## Overview + +The Microservice Host Builder provides a fluent API for building microservices that connect to the Stella Router. It handles transport configuration, endpoint registration, graceful shutdown, and integration with ASP.NET Core's hosting infrastructure. + +--- + +## Goals + +1. Provide fluent builder API for microservice configuration +2. Support both standalone and ASP.NET Core integrated hosting +3. Handle transport lifecycle (connect, reconnect, disconnect) +4. Support multiple transport configurations +5. Enable dual-exposure mode (gateway + direct HTTP) + +--- + +## Core Architecture + +``` +┌────────────────────────────────────────────────────────────────┐ +│ Microservice Host Builder │ +├────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ StellaMicroserviceHost │ │ +│ │ ┌───────────────┐ ┌───────────────┐ ┌─────────────┐ │ │ +│ │ │Transport Layer│ │Endpoint Registry│ │ Request │ │ │ +│ │ │ (TCP/TLS/etc) │ │(Discovery/Reg) │ │ Dispatcher │ │ │ +│ │ └───────────────┘ └───────────────┘ └─────────────┘ │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Optional: ASP.NET Core Host │ │ +│ │ (Kestrel for direct HTTP access + default claims) │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +└────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Configuration + +```csharp +namespace StellaOps.Microservice; + +public class StellaMicroserviceOptions +{ + /// Service name for registration. + public required string ServiceName { get; set; } + + /// Unique instance identifier (auto-generated if not set). + public string InstanceId { get; set; } = Guid.NewGuid().ToString("N")[..8]; + + /// Service version for routing. + public string Version { get; set; } = "1.0.0"; + + /// Region for routing affinity. + public string? Region { get; set; } + + /// Tags for routing metadata. + public Dictionary Tags { get; set; } = new(); + + /// Router connection pool. + public List Routers { get; set; } = new(); + + /// Transport configuration. + public TransportConfig Transport { get; set; } = new(); + + /// Endpoint discovery configuration. + public EndpointDiscoveryConfig Discovery { get; set; } = new(); + + /// Heartbeat configuration. + public HeartbeatConfig Heartbeat { get; set; } = new(); + + /// Dual exposure mode configuration. + public DualExposureConfig? DualExposure { get; set; } + + /// Graceful shutdown timeout. + public TimeSpan ShutdownTimeout { get; set; } = TimeSpan.FromSeconds(30); +} + +public class RouterConnectionConfig +{ + public string Host { get; set; } = "localhost"; + public int Port { get; set; } = 9500; + public string Transport { get; set; } = "TCP"; // TCP, TLS, InMemory + public int Priority { get; set; } = 1; + public bool Enabled { get; set; } = true; +} + +public class TransportConfig +{ + public string Default { get; set; } = "TCP"; + public TcpClientConfig? Tcp { get; set; } + public TlsClientConfig? Tls { get; set; } + public int MaxReconnectAttempts { get; set; } = -1; // -1 = unlimited + public TimeSpan ReconnectDelay { get; set; } = TimeSpan.FromSeconds(5); +} + +public class EndpointDiscoveryConfig +{ + /// Assemblies to scan for endpoints. + public List ScanAssemblies { get; set; } = new(); + + /// Path to YAML overrides file. + public string? ConfigFilePath { get; set; } + + /// Base path prefix for all endpoints. + public string? BasePath { get; set; } + + /// Whether to auto-discover endpoints via reflection. + public bool AutoDiscover { get; set; } = true; +} + +public class HeartbeatConfig +{ + public TimeSpan Interval { get; set; } = TimeSpan.FromSeconds(10); + public TimeSpan Timeout { get; set; } = TimeSpan.FromSeconds(5); + public int MissedHeartbeatsThreshold { get; set; } = 3; +} + +public class DualExposureConfig +{ + /// Enable direct HTTP access. + public bool Enabled { get; set; } = false; + + /// HTTP port for direct access. + public int HttpPort { get; set; } = 8080; + + /// Default claims for direct access (no JWT). + public Dictionary DefaultClaims { get; set; } = new(); + + /// Whether to require JWT for direct access. + public bool RequireAuthentication { get; set; } = false; +} +``` + +--- + +## Host Builder Implementation + +```csharp +namespace StellaOps.Microservice; + +public interface IStellaMicroserviceBuilder +{ + IStellaMicroserviceBuilder ConfigureServices(Action configure); + IStellaMicroserviceBuilder ConfigureTransport(Action configure); + IStellaMicroserviceBuilder ConfigureEndpoints(Action configure); + IStellaMicroserviceBuilder AddRouter(string host, int port, string transport = "TCP"); + IStellaMicroserviceBuilder EnableDualExposure(Action? configure = null); + IStellaMicroserviceBuilder UseYamlConfig(string path); + IStellaMicroserviceHost Build(); +} + +public sealed class StellaMicroserviceBuilder : IStellaMicroserviceBuilder +{ + private readonly StellaMicroserviceOptions _options; + private readonly IServiceCollection _services; + private readonly List> _configureActions = new(); + + public StellaMicroserviceBuilder(string serviceName) + { + _options = new StellaMicroserviceOptions { ServiceName = serviceName }; + _services = new ServiceCollection(); + + // Add default services + _services.AddLogging(b => b.AddConsole()); + _services.AddSingleton(_options); + } + + public static IStellaMicroserviceBuilder Create(string serviceName) + { + return new StellaMicroserviceBuilder(serviceName); + } + + public IStellaMicroserviceBuilder ConfigureServices(Action configure) + { + _configureActions.Add(configure); + return this; + } + + public IStellaMicroserviceBuilder ConfigureTransport(Action configure) + { + configure(_options.Transport); + return this; + } + + public IStellaMicroserviceBuilder ConfigureEndpoints(Action configure) + { + configure(_options.Discovery); + return this; + } + + public IStellaMicroserviceBuilder AddRouter(string host, int port, string transport = "TCP") + { + _options.Routers.Add(new RouterConnectionConfig + { + Host = host, + Port = port, + Transport = transport, + Priority = _options.Routers.Count + 1 + }); + return this; + } + + public IStellaMicroserviceBuilder EnableDualExposure(Action? configure = null) + { + _options.DualExposure = new DualExposureConfig { Enabled = true }; + configure?.Invoke(_options.DualExposure); + return this; + } + + public IStellaMicroserviceBuilder UseYamlConfig(string path) + { + _options.Discovery.ConfigFilePath = path; + return this; + } + + public IStellaMicroserviceHost Build() + { + // Apply custom service configuration + foreach (var action in _configureActions) + { + action(_services); + } + + // Add core services + AddCoreServices(); + + // Add transport services + AddTransportServices(); + + // Add endpoint services + AddEndpointServices(); + + var serviceProvider = _services.BuildServiceProvider(); + return serviceProvider.GetRequiredService(); + } + + private void AddCoreServices() + { + _services.AddSingleton(); + _services.AddSingleton(); + _services.AddSingleton(); + _services.AddSingleton(); + } + + private void AddTransportServices() + { + _services.AddSingleton(); + + switch (_options.Transport.Default.ToUpper()) + { + case "TCP": + _services.AddSingleton(); + break; + case "TLS": + _services.AddSingleton(); + _services.AddSingleton(); + break; + case "INMEMORY": + // InMemory requires hub to be provided externally + _services.AddSingleton(); + break; + } + } + + private void AddEndpointServices() + { + _services.AddSingleton(); + + if (!string.IsNullOrEmpty(_options.Discovery.ConfigFilePath)) + { + _services.AddSingleton(); + } + } +} +``` + +--- + +## Microservice Host Implementation + +```csharp +namespace StellaOps.Microservice; + +public interface IStellaMicroserviceHost : IAsyncDisposable +{ + StellaMicroserviceOptions Options { get; } + bool IsConnected { get; } + + Task StartAsync(CancellationToken cancellationToken = default); + Task StopAsync(CancellationToken cancellationToken = default); + Task WaitForShutdownAsync(CancellationToken cancellationToken = default); +} + +public sealed class StellaMicroserviceHost : IStellaMicroserviceHost, IHostedService +{ + private readonly StellaMicroserviceOptions _options; + private readonly ITransportServer _transport; + private readonly IEndpointRegistry _endpointRegistry; + private readonly IRequestDispatcher _dispatcher; + private readonly ILogger _logger; + private readonly CancellationTokenSource _shutdownCts = new(); + private readonly TaskCompletionSource _shutdownComplete = new(); + private Timer? _heartbeatTimer; + private IHost? _httpHost; + + public StellaMicroserviceOptions Options => _options; + public bool IsConnected => _transport.IsConnected; + + public StellaMicroserviceHost( + StellaMicroserviceOptions options, + ITransportServer transport, + IEndpointRegistry endpointRegistry, + IRequestDispatcher dispatcher, + ILogger logger) + { + _options = options; + _transport = transport; + _endpointRegistry = endpointRegistry; + _dispatcher = dispatcher; + _logger = logger; + } + + public async Task StartAsync(CancellationToken cancellationToken = default) + { + _logger.LogInformation( + "Starting microservice {ServiceName}/{InstanceId}", + _options.ServiceName, _options.InstanceId); + + // Discover endpoints + var endpoints = await _endpointRegistry.DiscoverEndpointsAsync(cancellationToken); + _logger.LogInformation("Discovered {Count} endpoints", endpoints.Length); + + // Wire up request handler + _transport.OnRequest += HandleRequestAsync; + _transport.OnCancel += HandleCancelAsync; + + // Connect to router + var router = _options.Routers.OrderBy(r => r.Priority).FirstOrDefault() + ?? throw new InvalidOperationException("No routers configured"); + + await _transport.ConnectAsync( + _options.ServiceName, + _options.InstanceId, + endpoints, + cancellationToken); + + _logger.LogInformation( + "Connected to router at {Host}:{Port}", + router.Host, router.Port); + + // Start heartbeat + _heartbeatTimer = new Timer( + SendHeartbeatAsync, + null, + _options.Heartbeat.Interval, + _options.Heartbeat.Interval); + + // Start dual exposure HTTP if enabled + if (_options.DualExposure?.Enabled == true) + { + await StartHttpHostAsync(cancellationToken); + } + + _logger.LogInformation( + "Microservice {ServiceName} started successfully", + _options.ServiceName); + } + + private async Task HandleRequestAsync( + RequestPayload request, + CancellationToken cancellationToken) + { + using var activity = Activity.StartActivity("HandleRequest"); + activity?.SetTag("http.method", request.Method); + activity?.SetTag("http.path", request.Path); + + try + { + return await _dispatcher.DispatchAsync(request, cancellationToken); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error handling request {Path}", request.Path); + return new ResponsePayload + { + StatusCode = 500, + Headers = new Dictionary(), + Body = Encoding.UTF8.GetBytes($"{{\"error\": \"{ex.Message}\"}}"), + IsFinalChunk = true + }; + } + } + + private Task HandleCancelAsync(string correlationId, CancellationToken cancellationToken) + { + _logger.LogDebug("Request {CorrelationId} cancelled", correlationId); + // Propagate cancellation to active request handling + return Task.CompletedTask; + } + + private async void SendHeartbeatAsync(object? state) + { + try + { + await _transport.SendHeartbeatAsync(_shutdownCts.Token); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to send heartbeat"); + } + } + + private async Task StartHttpHostAsync(CancellationToken cancellationToken) + { + var config = _options.DualExposure!; + + _httpHost = Host.CreateDefaultBuilder() + .ConfigureWebHostDefaults(web => + { + web.UseKestrel(k => k.ListenAnyIP(config.HttpPort)); + web.Configure(app => + { + app.UseRouting(); + app.UseEndpoints(endpoints => + { + endpoints.MapFallback(async context => + { + // Inject default claims for direct access + var claims = config.DefaultClaims; + + var request = new RequestPayload + { + Method = context.Request.Method, + Path = context.Request.Path + context.Request.QueryString, + Host = context.Request.Host.Value, + Headers = context.Request.Headers + .ToDictionary(h => h.Key, h => h.Value.ToString()), + Claims = claims, + ClientIp = context.Connection.RemoteIpAddress?.ToString(), + TraceId = context.TraceIdentifier + }; + + // Read body if present + if (context.Request.ContentLength > 0) + { + using var ms = new MemoryStream(); + await context.Request.Body.CopyToAsync(ms); + request = request with { Body = ms.ToArray() }; + } + + var response = await _dispatcher.DispatchAsync(request, context.RequestAborted); + + context.Response.StatusCode = response.StatusCode; + foreach (var (key, value) in response.Headers) + { + context.Response.Headers[key] = value; + } + + if (response.Body != null) + { + await context.Response.Body.WriteAsync(response.Body); + } + }); + }); + }); + }) + .Build(); + + await _httpHost.StartAsync(cancellationToken); + _logger.LogInformation( + "Direct HTTP access enabled on port {Port}", + config.HttpPort); + } + + public async Task StopAsync(CancellationToken cancellationToken = default) + { + _logger.LogInformation( + "Stopping microservice {ServiceName}", + _options.ServiceName); + + _shutdownCts.Cancel(); + _heartbeatTimer?.Dispose(); + + if (_httpHost != null) + { + await _httpHost.StopAsync(cancellationToken); + } + + await _transport.DisconnectAsync(); + + _logger.LogInformation( + "Microservice {ServiceName} stopped", + _options.ServiceName); + + _shutdownComplete.TrySetResult(); + } + + public Task WaitForShutdownAsync(CancellationToken cancellationToken = default) + { + return _shutdownComplete.Task.WaitAsync(cancellationToken); + } + + public async ValueTask DisposeAsync() + { + await StopAsync(); + _shutdownCts.Dispose(); + } + + // IHostedService implementation for ASP.NET Core integration + Task IHostedService.StartAsync(CancellationToken cancellationToken) => StartAsync(cancellationToken); + Task IHostedService.StopAsync(CancellationToken cancellationToken) => StopAsync(cancellationToken); +} +``` + +--- + +## ASP.NET Core Integration + +```csharp +namespace StellaOps.Microservice; + +public static class StellaMicroserviceExtensions +{ + /// + /// Adds Stella microservice to an existing ASP.NET Core host. + /// + public static IServiceCollection AddStellaMicroservice( + this IServiceCollection services, + Action configure) + { + var options = new StellaMicroserviceOptions { ServiceName = "unknown" }; + configure(options); + + services.AddSingleton(options); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + + // Add transport based on configuration + switch (options.Transport.Default.ToUpper()) + { + case "TCP": + services.AddSingleton(); + break; + case "TLS": + services.AddSingleton(); + services.AddSingleton(); + break; + } + + services.AddSingleton(); + services.AddHostedService(sp => (StellaMicroserviceHost)sp.GetRequiredService()); + + return services; + } + + /// + /// Configures an endpoint handler for the microservice. + /// + public static IServiceCollection AddEndpointHandler( + this IServiceCollection services) + where THandler : class, IEndpointHandler + { + services.AddScoped(); + return services; + } +} +``` + +--- + +## Usage Examples + +### Standalone Microservice + +```csharp +var host = StellaMicroserviceBuilder + .Create("billing-service") + .AddRouter("gateway.internal", 9500, "TLS") + .ConfigureTransport(t => + { + t.Tls = new TlsClientConfig + { + ClientCertificatePath = "/etc/certs/billing.pfx", + ClientCertificatePassword = Environment.GetEnvironmentVariable("CERT_PASSWORD") + }; + }) + .ConfigureEndpoints(e => + { + e.BasePath = "/billing"; + e.ScanAssemblies.Add("BillingService.Handlers"); + }) + .ConfigureServices(services => + { + services.AddScoped(); + services.AddScoped(); + }) + .Build(); + +await host.StartAsync(); +await host.WaitForShutdownAsync(); +``` + +### ASP.NET Core Integration + +```csharp +var builder = WebApplication.CreateBuilder(args); + +builder.Services.AddStellaMicroservice(options => +{ + options.ServiceName = "user-service"; + options.Region = "us-east-1"; + options.Routers.Add(new RouterConnectionConfig + { + Host = "gateway.internal", + Port = 9500 + }); + options.DualExposure = new DualExposureConfig + { + Enabled = true, + HttpPort = 8080, + DefaultClaims = new Dictionary + { + ["tier"] = "free" + } + }; +}); + +builder.Services.AddEndpointHandler(); + +var app = builder.Build(); +await app.RunAsync(); +``` + +--- + +## YAML Configuration + +```yaml +Microservice: + ServiceName: "billing-service" + Version: "1.0.0" + Region: "us-east-1" + Tags: + team: "payments" + tier: "critical" + + Routers: + - Host: "gateway-primary.internal" + Port: 9500 + Transport: "TLS" + Priority: 1 + - Host: "gateway-secondary.internal" + Port: 9500 + Transport: "TLS" + Priority: 2 + + Transport: + Default: "TLS" + Tls: + ClientCertificatePath: "/etc/certs/service.pfx" + ClientCertificatePassword: "${CERT_PASSWORD}" + + Discovery: + AutoDiscover: true + BasePath: "/billing" + ConfigFilePath: "/etc/stellaops/endpoints.yaml" + + Heartbeat: + Interval: "00:00:10" + Timeout: "00:00:05" + + DualExposure: + Enabled: true + HttpPort: 8080 + DefaultClaims: + tier: "free" + + ShutdownTimeout: "00:00:30" +``` + +--- + +## Deliverables + +1. `StellaOps.Microservice/StellaMicroserviceOptions.cs` +2. `StellaOps.Microservice/IStellaMicroserviceBuilder.cs` +3. `StellaOps.Microservice/StellaMicroserviceBuilder.cs` +4. `StellaOps.Microservice/IStellaMicroserviceHost.cs` +5. `StellaOps.Microservice/StellaMicroserviceHost.cs` +6. `StellaOps.Microservice/StellaMicroserviceExtensions.cs` +7. Builder pattern tests +8. Lifecycle tests (start/stop/reconnect) +9. Dual exposure mode tests + +--- + +## Next Step + +Proceed to [Step 20: Endpoint Discovery & Registration](20-Step.md) to implement automatic endpoint discovery. diff --git a/docs/router/20-Step.md b/docs/router/20-Step.md new file mode 100644 index 000000000..96006acda --- /dev/null +++ b/docs/router/20-Step.md @@ -0,0 +1,696 @@ +# Step 20: Endpoint Discovery & Registration + +**Phase 5: Microservice SDK** +**Estimated Complexity:** Medium +**Dependencies:** Step 19 (Microservice Host Builder) + +--- + +## Overview + +Endpoint discovery automatically finds and registers HTTP endpoints from microservice code using attributes and reflection. YAML configuration provides overrides for metadata like rate limits, authentication requirements, and versioning. + +--- + +## Goals + +1. Discover endpoints via reflection and attributes +2. Support YAML-based metadata overrides +3. Generate EndpointDescriptor for router registration +4. Support endpoint versioning and deprecation +5. Validate endpoint configurations at startup + +--- + +## Endpoint Attributes + +```csharp +namespace StellaOps.Microservice; + +/// +/// Marks a class as containing Stella endpoints. +/// +[AttributeUsage(AttributeTargets.Class)] +public sealed class StellaEndpointAttribute : Attribute +{ + public string? BasePath { get; set; } + public string? Version { get; set; } + public string[]? Tags { get; set; } +} + +/// +/// Marks a method as a Stella endpoint handler. +/// +[AttributeUsage(AttributeTargets.Method)] +public sealed class StellaRouteAttribute : Attribute +{ + public string Method { get; } + public string Path { get; } + public string? Name { get; set; } + public string? Description { get; set; } + + public StellaRouteAttribute(string method, string path) + { + Method = method; + Path = path; + } +} + +/// +/// Specifies authentication requirements for an endpoint. +/// +[AttributeUsage(AttributeTargets.Method | AttributeTargets.Class)] +public sealed class StellaAuthAttribute : Attribute +{ + public bool Required { get; set; } = true; + public string[]? RequiredClaims { get; set; } + public string? Policy { get; set; } +} + +/// +/// Specifies rate limiting for an endpoint. +/// +[AttributeUsage(AttributeTargets.Method | AttributeTargets.Class)] +public sealed class StellaRateLimitAttribute : Attribute +{ + public int RequestsPerMinute { get; set; } + public string? BucketKey { get; set; } // e.g., "sub", "ip", "path" +} + +/// +/// Specifies timeout for an endpoint. +/// +[AttributeUsage(AttributeTargets.Method | AttributeTargets.Class)] +public sealed class StellaTimeoutAttribute : Attribute +{ + public int TimeoutMs { get; } + + public StellaTimeoutAttribute(int timeoutMs) + { + TimeoutMs = timeoutMs; + } +} + +/// +/// Marks an endpoint as deprecated. +/// +[AttributeUsage(AttributeTargets.Method)] +public sealed class StellaDeprecatedAttribute : Attribute +{ + public string? Message { get; set; } + public string? AlternativeEndpoint { get; set; } + public string? SunsetDate { get; set; } +} + +/// +/// Convenience attributes for common HTTP methods. +/// +public sealed class StellaGetAttribute : StellaRouteAttribute +{ + public StellaGetAttribute(string path) : base("GET", path) { } +} + +public sealed class StellaPostAttribute : StellaRouteAttribute +{ + public StellaPostAttribute(string path) : base("POST", path) { } +} + +public sealed class StellaPutAttribute : StellaRouteAttribute +{ + public StellaPutAttribute(string path) : base("PUT", path) { } +} + +public sealed class StellaDeleteAttribute : StellaRouteAttribute +{ + public StellaDeleteAttribute(string path) : base("DELETE", path) { } +} + +public sealed class StellaPatchAttribute : StellaRouteAttribute +{ + public StellaPatchAttribute(string path) : base("PATCH", path) { } +} +``` + +--- + +## Endpoint Descriptor + +```csharp +namespace StellaOps.Microservice; + +/// +/// Describes an endpoint for router registration. +/// +public sealed class EndpointDescriptor +{ + /// HTTP method (GET, POST, etc.). + public required string Method { get; init; } + + /// Path pattern (may include parameters like {id}). + public required string Path { get; init; } + + /// Unique endpoint name. + public string? Name { get; init; } + + /// Endpoint description for documentation. + public string? Description { get; init; } + + /// API version. + public string? Version { get; init; } + + /// Tags for grouping/filtering. + public string[]? Tags { get; init; } + + /// Whether authentication is required. + public bool RequiresAuth { get; init; } = true; + + /// Required claims for access. + public string[]? RequiredClaims { get; init; } + + /// Authentication policy name. + public string? AuthPolicy { get; init; } + + /// Rate limit configuration. + public RateLimitDescriptor? RateLimit { get; init; } + + /// Request timeout in milliseconds. + public int? TimeoutMs { get; init; } + + /// Deprecation information. + public DeprecationDescriptor? Deprecation { get; init; } + + /// Custom metadata. + public Dictionary? Metadata { get; init; } +} + +public sealed class RateLimitDescriptor +{ + public int RequestsPerMinute { get; init; } + public string BucketKey { get; init; } = "sub"; +} + +public sealed class DeprecationDescriptor +{ + public string? Message { get; init; } + public string? AlternativeEndpoint { get; init; } + public DateOnly? SunsetDate { get; init; } +} +``` + +--- + +## Endpoint Discovery Interface + +```csharp +namespace StellaOps.Microservice; + +public interface IEndpointDiscovery +{ + /// + /// Discovers endpoints from configured assemblies. + /// + Task> DiscoverAsync(CancellationToken cancellationToken); +} + +public sealed class DiscoveredEndpoint +{ + public required EndpointDescriptor Descriptor { get; init; } + public required Type HandlerType { get; init; } + public required MethodInfo HandlerMethod { get; init; } +} +``` + +--- + +## Reflection-Based Discovery + +```csharp +namespace StellaOps.Microservice; + +public sealed class ReflectionEndpointDiscovery : IEndpointDiscovery +{ + private readonly EndpointDiscoveryConfig _config; + private readonly ILogger _logger; + + public ReflectionEndpointDiscovery( + StellaMicroserviceOptions options, + ILogger logger) + { + _config = options.Discovery; + _logger = logger; + } + + public Task> DiscoverAsync(CancellationToken cancellationToken) + { + var endpoints = new List(); + var assemblies = GetAssembliesToScan(); + + foreach (var assembly in assemblies) + { + foreach (var type in assembly.GetExportedTypes()) + { + var classAttr = type.GetCustomAttribute(); + if (classAttr == null) + continue; + + var classAuth = type.GetCustomAttribute(); + var classRateLimit = type.GetCustomAttribute(); + var classTimeout = type.GetCustomAttribute(); + + foreach (var method in type.GetMethods(BindingFlags.Public | BindingFlags.Instance)) + { + var routeAttr = method.GetCustomAttribute(); + if (routeAttr == null) + continue; + + var endpoint = BuildEndpoint( + type, method, classAttr, routeAttr, + classAuth, classRateLimit, classTimeout); + + endpoints.Add(endpoint); + + _logger.LogDebug( + "Discovered endpoint: {Method} {Path}", + endpoint.Descriptor.Method, endpoint.Descriptor.Path); + } + } + } + + _logger.LogInformation("Discovered {Count} endpoints", endpoints.Count); + return Task.FromResult>(endpoints); + } + + private IEnumerable GetAssembliesToScan() + { + if (_config.ScanAssemblies.Any()) + { + return _config.ScanAssemblies.Select(Assembly.Load); + } + + // Default: scan entry assembly and referenced assemblies + var entry = Assembly.GetEntryAssembly(); + if (entry == null) + return Enumerable.Empty(); + + return new[] { entry } + .Concat(entry.GetReferencedAssemblies().Select(Assembly.Load)); + } + + private DiscoveredEndpoint BuildEndpoint( + Type handlerType, + MethodInfo method, + StellaEndpointAttribute classAttr, + StellaRouteAttribute routeAttr, + StellaAuthAttribute? classAuth, + StellaRateLimitAttribute? classRateLimit, + StellaTimeoutAttribute? classTimeout) + { + // Method-level attributes override class-level + var methodAuth = method.GetCustomAttribute() ?? classAuth; + var methodRateLimit = method.GetCustomAttribute() ?? classRateLimit; + var methodTimeout = method.GetCustomAttribute() ?? classTimeout; + var deprecatedAttr = method.GetCustomAttribute(); + + // Build full path + var basePath = classAttr.BasePath?.TrimEnd('/') ?? ""; + if (!string.IsNullOrEmpty(_config.BasePath)) + { + basePath = _config.BasePath.TrimEnd('/') + basePath; + } + var fullPath = basePath + "/" + routeAttr.Path.TrimStart('/'); + + var descriptor = new EndpointDescriptor + { + Method = routeAttr.Method, + Path = fullPath, + Name = routeAttr.Name ?? $"{handlerType.Name}.{method.Name}", + Description = routeAttr.Description, + Version = classAttr.Version, + Tags = classAttr.Tags, + RequiresAuth = methodAuth?.Required ?? true, + RequiredClaims = methodAuth?.RequiredClaims, + AuthPolicy = methodAuth?.Policy, + RateLimit = methodRateLimit != null ? new RateLimitDescriptor + { + RequestsPerMinute = methodRateLimit.RequestsPerMinute, + BucketKey = methodRateLimit.BucketKey ?? "sub" + } : null, + TimeoutMs = methodTimeout?.TimeoutMs, + Deprecation = deprecatedAttr != null ? new DeprecationDescriptor + { + Message = deprecatedAttr.Message, + AlternativeEndpoint = deprecatedAttr.AlternativeEndpoint, + SunsetDate = DateOnly.TryParse(deprecatedAttr.SunsetDate, out var date) ? date : null + } : null + }; + + return new DiscoveredEndpoint + { + Descriptor = descriptor, + HandlerType = handlerType, + HandlerMethod = method + }; + } +} +``` + +--- + +## YAML Override Provider + +```csharp +namespace StellaOps.Microservice; + +public interface IEndpointOverrideProvider +{ + /// + /// Applies overrides to discovered endpoints. + /// + void ApplyOverrides(IList endpoints); +} + +public sealed class YamlEndpointOverrideProvider : IEndpointOverrideProvider +{ + private readonly EndpointDiscoveryConfig _config; + private readonly ILogger _logger; + private readonly Dictionary _overrides = new(); + + public YamlEndpointOverrideProvider( + StellaMicroserviceOptions options, + ILogger logger) + { + _config = options.Discovery; + _logger = logger; + + LoadOverrides(); + } + + private void LoadOverrides() + { + if (string.IsNullOrEmpty(_config.ConfigFilePath)) + return; + + if (!File.Exists(_config.ConfigFilePath)) + { + _logger.LogWarning("Endpoint config file not found: {Path}", _config.ConfigFilePath); + return; + } + + var yaml = File.ReadAllText(_config.ConfigFilePath); + var deserializer = new DeserializerBuilder() + .WithNamingConvention(CamelCaseNamingConvention.Instance) + .Build(); + + var config = deserializer.Deserialize(yaml); + + if (config?.Endpoints != null) + { + foreach (var (key, value) in config.Endpoints) + { + _overrides[key] = value; + } + } + + _logger.LogInformation("Loaded {Count} endpoint overrides", _overrides.Count); + } + + public void ApplyOverrides(IList endpoints) + { + foreach (var endpoint in endpoints) + { + var key = $"{endpoint.Descriptor.Method} {endpoint.Descriptor.Path}"; + + if (_overrides.TryGetValue(key, out var over) || + _overrides.TryGetValue(endpoint.Descriptor.Path, out over) || + (endpoint.Descriptor.Name != null && _overrides.TryGetValue(endpoint.Descriptor.Name, out over))) + { + ApplyOverride(endpoint, over); + } + } + } + + private void ApplyOverride(DiscoveredEndpoint endpoint, EndpointOverride over) + { + // Create new descriptor with overrides applied + var original = endpoint.Descriptor; + + var updated = new EndpointDescriptor + { + Method = original.Method, + Path = original.Path, + Name = over.Name ?? original.Name, + Description = over.Description ?? original.Description, + Version = over.Version ?? original.Version, + Tags = over.Tags ?? original.Tags, + RequiresAuth = over.RequiresAuth ?? original.RequiresAuth, + RequiredClaims = over.RequiredClaims ?? original.RequiredClaims, + AuthPolicy = over.AuthPolicy ?? original.AuthPolicy, + RateLimit = over.RateLimit != null ? new RateLimitDescriptor + { + RequestsPerMinute = over.RateLimit.RequestsPerMinute, + BucketKey = over.RateLimit.BucketKey ?? "sub" + } : original.RateLimit, + TimeoutMs = over.TimeoutMs ?? original.TimeoutMs, + Deprecation = original.Deprecation, // Keep original deprecation + Metadata = MergeMetadata(original.Metadata, over.Metadata) + }; + + // Replace descriptor (need mutable property or rebuild) + // In real implementation, use record with 'with' expression + _logger.LogDebug("Applied override to endpoint {Path}", original.Path); + } + + private Dictionary? MergeMetadata( + Dictionary? original, + Dictionary? over) + { + if (original == null && over == null) + return null; + + var result = new Dictionary(original ?? new()); + if (over != null) + { + foreach (var (key, value) in over) + { + result[key] = value; + } + } + return result; + } +} + +internal class EndpointOverrideConfig +{ + public Dictionary? Endpoints { get; set; } +} + +internal class EndpointOverride +{ + public string? Name { get; set; } + public string? Description { get; set; } + public string? Version { get; set; } + public string[]? Tags { get; set; } + public bool? RequiresAuth { get; set; } + public string[]? RequiredClaims { get; set; } + public string? AuthPolicy { get; set; } + public RateLimitOverride? RateLimit { get; set; } + public int? TimeoutMs { get; set; } + public Dictionary? Metadata { get; set; } +} + +internal class RateLimitOverride +{ + public int RequestsPerMinute { get; set; } + public string? BucketKey { get; set; } +} +``` + +--- + +## Endpoint Registry + +```csharp +namespace StellaOps.Microservice; + +public interface IEndpointRegistry +{ + Task DiscoverEndpointsAsync(CancellationToken cancellationToken); + DiscoveredEndpoint? FindEndpoint(string method, string path); +} + +public sealed class EndpointRegistry : IEndpointRegistry +{ + private readonly IEndpointDiscovery _discovery; + private readonly IEndpointOverrideProvider? _overrideProvider; + private readonly ILogger _logger; + private IReadOnlyList? _endpoints; + private readonly Dictionary _endpointLookup = new(); + + public EndpointRegistry( + IEndpointDiscovery discovery, + IEndpointOverrideProvider? overrideProvider, + ILogger logger) + { + _discovery = discovery; + _overrideProvider = overrideProvider; + _logger = logger; + } + + public async Task DiscoverEndpointsAsync(CancellationToken cancellationToken) + { + _endpoints = await _discovery.DiscoverAsync(cancellationToken); + + if (_overrideProvider != null) + { + var mutableList = _endpoints.ToList(); + _overrideProvider.ApplyOverrides(mutableList); + _endpoints = mutableList; + } + + // Build lookup table + _endpointLookup.Clear(); + foreach (var endpoint in _endpoints) + { + var key = $"{endpoint.Descriptor.Method}:{endpoint.Descriptor.Path}"; + _endpointLookup[key] = endpoint; + } + + // Validate endpoints + ValidateEndpoints(_endpoints); + + return _endpoints.Select(e => e.Descriptor).ToArray(); + } + + public DiscoveredEndpoint? FindEndpoint(string method, string path) + { + // Exact match + var key = $"{method}:{path}"; + if (_endpointLookup.TryGetValue(key, out var endpoint)) + return endpoint; + + // Pattern match for path parameters + foreach (var ep in _endpoints ?? Enumerable.Empty()) + { + if (ep.Descriptor.Method != method) + continue; + + if (IsPathMatch(path, ep.Descriptor.Path)) + return ep; + } + + return null; + } + + private bool IsPathMatch(string requestPath, string pattern) + { + var patternSegments = pattern.Split('/', StringSplitOptions.RemoveEmptyEntries); + var pathSegments = requestPath.Split('/', StringSplitOptions.RemoveEmptyEntries); + + if (patternSegments.Length != pathSegments.Length) + return false; + + for (int i = 0; i < patternSegments.Length; i++) + { + var patternSeg = patternSegments[i]; + var pathSeg = pathSegments[i]; + + // Check for path parameter + if (patternSeg.StartsWith('{') && patternSeg.EndsWith('}')) + continue; + + if (!string.Equals(patternSeg, pathSeg, StringComparison.OrdinalIgnoreCase)) + return false; + } + + return true; + } + + private void ValidateEndpoints(IReadOnlyList endpoints) + { + var duplicates = endpoints + .GroupBy(e => $"{e.Descriptor.Method}:{e.Descriptor.Path}") + .Where(g => g.Count() > 1) + .Select(g => g.Key) + .ToList(); + + if (duplicates.Any()) + { + throw new InvalidOperationException( + $"Duplicate endpoints detected: {string.Join(", ", duplicates)}"); + } + + // Validate handler method signatures + foreach (var endpoint in endpoints) + { + ValidateHandlerMethod(endpoint); + } + } + + private void ValidateHandlerMethod(DiscoveredEndpoint endpoint) + { + var method = endpoint.HandlerMethod; + var returnType = method.ReturnType; + + // Must return Task or Task where T can be serialized + if (!typeof(Task).IsAssignableFrom(returnType)) + { + throw new InvalidOperationException( + $"Handler {method.Name} must return Task or Task"); + } + } +} +``` + +--- + +## YAML Configuration Example + +```yaml +# endpoints.yaml - Endpoint overrides + +Endpoints: + # Override by path + "GET /billing/invoices": + RateLimit: + RequestsPerMinute: 100 + BucketKey: "sub" + TimeoutMs: 30000 + + # Override by name + "InvoiceHandler.GetInvoice": + RequiredClaims: + - "billing:read" + AuthPolicy: "billing-read" + + # Override by method + path + "POST /billing/invoices": + RequiredClaims: + - "billing:write" + RateLimit: + RequestsPerMinute: 10 + BucketKey: "sub" + Metadata: + audit: "required" +``` + +--- + +## Deliverables + +1. `StellaOps.Microservice/Attributes/*.cs` (all endpoint attributes) +2. `StellaOps.Microservice/EndpointDescriptor.cs` +3. `StellaOps.Microservice/IEndpointDiscovery.cs` +4. `StellaOps.Microservice/ReflectionEndpointDiscovery.cs` +5. `StellaOps.Microservice/IEndpointOverrideProvider.cs` +6. `StellaOps.Microservice/YamlEndpointOverrideProvider.cs` +7. `StellaOps.Microservice/IEndpointRegistry.cs` +8. `StellaOps.Microservice/EndpointRegistry.cs` +9. Attribute parsing tests +10. YAML override tests +11. Path matching tests + +--- + +## Next Step + +Proceed to [Step 21: Request/Response Context](21-Step.md) to implement the request handling context. diff --git a/docs/router/21-Step.md b/docs/router/21-Step.md new file mode 100644 index 000000000..fa0f73381 --- /dev/null +++ b/docs/router/21-Step.md @@ -0,0 +1,793 @@ +# Step 21: Request/Response Context + +**Phase 5: Microservice SDK** +**Estimated Complexity:** Medium +**Dependencies:** Step 20 (Endpoint Discovery) + +--- + +## Overview + +The Request/Response Context provides a clean abstraction for endpoint handlers to access request data, claims, and build responses. It hides transport details while providing easy access to parsed path parameters, query strings, headers, and the request body. + +--- + +## Goals + +1. Provide clean request context abstraction +2. Support path parameter extraction +3. Provide typed body deserialization +4. Support streaming responses +5. Enable easy response building + +--- + +## Request Context + +```csharp +namespace StellaOps.Microservice; + +/// +/// Context for handling a request in a microservice endpoint. +/// +public sealed class StellaRequestContext +{ + private readonly RequestPayload _payload; + private readonly Dictionary _pathParameters; + private readonly Lazy _query; + private readonly Lazy _headers; + + internal StellaRequestContext( + RequestPayload payload, + Dictionary pathParameters) + { + _payload = payload; + _pathParameters = pathParameters; + _query = new Lazy(() => ParseQuery(payload.Path)); + _headers = new Lazy(() => new HeaderDictionary( + payload.Headers.ToDictionary( + h => h.Key, + h => new StringValues(h.Value)))); + } + + /// HTTP method. + public string Method => _payload.Method; + + /// Request path (without query string). + public string Path => _payload.Path.Split('?')[0]; + + /// Full path including query string. + public string FullPath => _payload.Path; + + /// Host header value. + public string? Host => _payload.Host; + + /// Client IP address. + public string? ClientIp => _payload.ClientIp; + + /// Trace/correlation ID. + public string? TraceId => _payload.TraceId; + + /// Request headers. + public IHeaderDictionary Headers => _headers.Value; + + /// Query string parameters. + public IQueryCollection Query => _query.Value; + + /// Authenticated claims from JWT + hydration. + public IReadOnlyDictionary Claims => _payload.Claims; + + /// Path parameters extracted from route pattern. + public IReadOnlyDictionary PathParameters => _pathParameters; + + /// Content-Type header value. + public string? ContentType => Headers.ContentType; + + /// Content-Length header value. + public long? ContentLength => _payload.ContentLength > 0 ? _payload.ContentLength : null; + + /// Whether the request has a body. + public bool HasBody => _payload.Body != null && _payload.Body.Length > 0; + + /// Raw request body bytes. + public byte[]? RawBody => _payload.Body; + + /// + /// Gets a path parameter by name. + /// + public string? GetPathParameter(string name) + { + return _pathParameters.TryGetValue(name, out var value) ? value : null; + } + + /// + /// Gets a required path parameter, throws if missing. + /// + public string RequirePathParameter(string name) + { + return _pathParameters.TryGetValue(name, out var value) + ? value + : throw new ArgumentException($"Missing path parameter: {name}"); + } + + /// + /// Gets a query parameter by name. + /// + public string? GetQueryParameter(string name) + { + return Query.TryGetValue(name, out var values) ? values.FirstOrDefault() : null; + } + + /// + /// Gets all values for a query parameter. + /// + public string[] GetQueryParameterValues(string name) + { + return Query.TryGetValue(name, out var values) ? values.ToArray() : Array.Empty(); + } + + /// + /// Gets a header value by name. + /// + public string? GetHeader(string name) + { + return Headers.TryGetValue(name, out var values) ? values.FirstOrDefault() : null; + } + + /// + /// Gets a claim value by name. + /// + public string? GetClaim(string name) + { + return Claims.TryGetValue(name, out var value) ? value : null; + } + + /// + /// Gets a required claim, throws if missing. + /// + public string RequireClaim(string name) + { + return Claims.TryGetValue(name, out var value) + ? value + : throw new UnauthorizedAccessException($"Missing required claim: {name}"); + } + + /// + /// Reads the body as a string. + /// + public string? ReadBodyAsString(Encoding? encoding = null) + { + if (_payload.Body == null || _payload.Body.Length == 0) + return null; + + return (encoding ?? Encoding.UTF8).GetString(_payload.Body); + } + + /// + /// Deserializes the body as JSON. + /// + public T? ReadBodyAsJson(JsonSerializerOptions? options = null) + { + if (_payload.Body == null || _payload.Body.Length == 0) + return default; + + return JsonSerializer.Deserialize(_payload.Body, options ?? JsonDefaults.Options); + } + + /// + /// Deserializes the body as JSON, throwing if null or invalid. + /// + public T RequireBodyAsJson(JsonSerializerOptions? options = null) where T : class + { + var result = ReadBodyAsJson(options); + return result ?? throw new ArgumentException("Request body is required"); + } + + /// + /// Gets a body stream for reading. + /// + public Stream GetBodyStream() + { + return new MemoryStream(_payload.Body ?? Array.Empty(), writable: false); + } + + private static IQueryCollection ParseQuery(string path) + { + var queryIndex = path.IndexOf('?'); + if (queryIndex < 0) + return QueryCollection.Empty; + + var queryString = path[(queryIndex + 1)..]; + return QueryHelpers.ParseQuery(queryString); + } +} + +internal static class JsonDefaults +{ + public static readonly JsonSerializerOptions Options = new() + { + PropertyNameCaseInsensitive = true, + PropertyNamingPolicy = JsonNamingPolicy.CamelCase + }; +} +``` + +--- + +## Response Builder + +```csharp +namespace StellaOps.Microservice; + +/// +/// Builder for constructing endpoint responses. +/// +public sealed class StellaResponseBuilder +{ + private int _statusCode = 200; + private readonly Dictionary _headers = new(StringComparer.OrdinalIgnoreCase); + private byte[]? _body; + private string _contentType = "application/json"; + + /// + /// Creates a new response builder. + /// + public static StellaResponseBuilder Create() => new(); + + /// + /// Sets the status code. + /// + public StellaResponseBuilder WithStatus(int statusCode) + { + _statusCode = statusCode; + return this; + } + + /// + /// Sets a response header. + /// + public StellaResponseBuilder WithHeader(string name, string value) + { + _headers[name] = value; + return this; + } + + /// + /// Sets multiple response headers. + /// + public StellaResponseBuilder WithHeaders(IEnumerable> headers) + { + foreach (var (key, value) in headers) + { + _headers[key] = value; + } + return this; + } + + /// + /// Sets the Content-Type header. + /// + public StellaResponseBuilder WithContentType(string contentType) + { + _contentType = contentType; + return this; + } + + /// + /// Sets a JSON body. + /// + public StellaResponseBuilder WithJson(T value, JsonSerializerOptions? options = null) + { + _contentType = "application/json"; + _body = JsonSerializer.SerializeToUtf8Bytes(value, options ?? JsonDefaults.Options); + return this; + } + + /// + /// Sets a string body. + /// + public StellaResponseBuilder WithText(string text, Encoding? encoding = null) + { + if (!_headers.ContainsKey("Content-Type") && _contentType == "application/json") + { + _contentType = "text/plain"; + } + _body = (encoding ?? Encoding.UTF8).GetBytes(text); + return this; + } + + /// + /// Sets raw bytes as body. + /// + public StellaResponseBuilder WithBytes(byte[] data, string? contentType = null) + { + if (contentType != null) + { + _contentType = contentType; + } + _body = data; + return this; + } + + /// + /// Sets a stream as body. + /// + public StellaResponseBuilder WithStream(Stream stream, string? contentType = null) + { + if (contentType != null) + { + _contentType = contentType; + } + using var ms = new MemoryStream(); + stream.CopyTo(ms); + _body = ms.ToArray(); + return this; + } + + /// + /// Builds the response payload. + /// + public ResponsePayload Build() + { + _headers["Content-Type"] = _contentType; + + return new ResponsePayload + { + StatusCode = _statusCode, + Headers = new Dictionary(_headers), + Body = _body, + IsFinalChunk = true + }; + } + + // Static factory methods for common responses + + /// Creates a 200 OK response with JSON body. + public static ResponsePayload Ok(T value) => + Create().WithStatus(200).WithJson(value).Build(); + + /// Creates a 200 OK response with no body. + public static ResponsePayload Ok() => + Create().WithStatus(200).Build(); + + /// Creates a 201 Created response with JSON body. + public static ResponsePayload Created(T value, string? location = null) + { + var builder = Create().WithStatus(201).WithJson(value); + if (location != null) + { + builder.WithHeader("Location", location); + } + return builder.Build(); + } + + /// Creates a 204 No Content response. + public static ResponsePayload NoContent() => + Create().WithStatus(204).Build(); + + /// Creates a 400 Bad Request response. + public static ResponsePayload BadRequest(string message) => + Create().WithStatus(400).WithJson(new { error = message }).Build(); + + /// Creates a 400 Bad Request response with validation errors. + public static ResponsePayload BadRequest(Dictionary errors) => + Create().WithStatus(400).WithJson(new { errors }).Build(); + + /// Creates a 401 Unauthorized response. + public static ResponsePayload Unauthorized(string? message = null) => + Create().WithStatus(401).WithJson(new { error = message ?? "Unauthorized" }).Build(); + + /// Creates a 403 Forbidden response. + public static ResponsePayload Forbidden(string? message = null) => + Create().WithStatus(403).WithJson(new { error = message ?? "Forbidden" }).Build(); + + /// Creates a 404 Not Found response. + public static ResponsePayload NotFound(string? message = null) => + Create().WithStatus(404).WithJson(new { error = message ?? "Not found" }).Build(); + + /// Creates a 409 Conflict response. + public static ResponsePayload Conflict(string message) => + Create().WithStatus(409).WithJson(new { error = message }).Build(); + + /// Creates a 500 Internal Server Error response. + public static ResponsePayload InternalError(string? message = null) => + Create().WithStatus(500).WithJson(new { error = message ?? "Internal server error" }).Build(); + + /// Creates a 503 Service Unavailable response. + public static ResponsePayload ServiceUnavailable(string? message = null) => + Create().WithStatus(503).WithJson(new { error = message ?? "Service unavailable" }).Build(); + + /// Creates a redirect response. + public static ResponsePayload Redirect(string location, bool permanent = false) => + Create() + .WithStatus(permanent ? 301 : 302) + .WithHeader("Location", location) + .Build(); +} +``` + +--- + +## Endpoint Handler Interface + +```csharp +namespace StellaOps.Microservice; + +/// +/// Interface for endpoint handler classes. +/// +public interface IEndpointHandler +{ +} + +/// +/// Base class for endpoint handlers with helper methods. +/// +public abstract class EndpointHandler : IEndpointHandler +{ + /// Current request context (set by dispatcher). + public StellaRequestContext Context { get; internal set; } = null!; + + /// Creates a 200 OK response with JSON body. + protected ResponsePayload Ok(T value) => StellaResponseBuilder.Ok(value); + + /// Creates a 200 OK response with no body. + protected ResponsePayload Ok() => StellaResponseBuilder.Ok(); + + /// Creates a 201 Created response. + protected ResponsePayload Created(T value, string? location = null) => + StellaResponseBuilder.Created(value, location); + + /// Creates a 204 No Content response. + protected ResponsePayload NoContent() => StellaResponseBuilder.NoContent(); + + /// Creates a 400 Bad Request response. + protected ResponsePayload BadRequest(string message) => + StellaResponseBuilder.BadRequest(message); + + /// Creates a 401 Unauthorized response. + protected ResponsePayload Unauthorized(string? message = null) => + StellaResponseBuilder.Unauthorized(message); + + /// Creates a 403 Forbidden response. + protected ResponsePayload Forbidden(string? message = null) => + StellaResponseBuilder.Forbidden(message); + + /// Creates a 404 Not Found response. + protected ResponsePayload NotFound(string? message = null) => + StellaResponseBuilder.NotFound(message); + + /// Creates a response with custom status and body. + protected StellaResponseBuilder Response() => StellaResponseBuilder.Create(); +} +``` + +--- + +## Request Dispatcher + +```csharp +namespace StellaOps.Microservice; + +public interface IRequestDispatcher +{ + Task DispatchAsync(RequestPayload request, CancellationToken cancellationToken); +} + +public sealed class RequestDispatcher : IRequestDispatcher +{ + private readonly IEndpointRegistry _registry; + private readonly IServiceProvider _serviceProvider; + private readonly ILogger _logger; + + public RequestDispatcher( + IEndpointRegistry registry, + IServiceProvider serviceProvider, + ILogger logger) + { + _registry = registry; + _serviceProvider = serviceProvider; + _logger = logger; + } + + public async Task DispatchAsync( + RequestPayload request, + CancellationToken cancellationToken) + { + var path = request.Path.Split('?')[0]; + var endpoint = _registry.FindEndpoint(request.Method, path); + + if (endpoint == null) + { + _logger.LogDebug("No endpoint found for {Method} {Path}", request.Method, path); + return StellaResponseBuilder.NotFound($"No endpoint: {request.Method} {path}"); + } + + // Extract path parameters + var pathParams = ExtractPathParameters(path, endpoint.Descriptor.Path); + + // Create request context + var context = new StellaRequestContext(request, pathParams); + + // Create handler instance + using var scope = _serviceProvider.CreateScope(); + var handler = scope.ServiceProvider.GetService(endpoint.HandlerType); + + if (handler == null) + { + // Try to create without DI + handler = Activator.CreateInstance(endpoint.HandlerType); + } + + if (handler == null) + { + _logger.LogError("Cannot create handler {Type}", endpoint.HandlerType); + return StellaResponseBuilder.InternalError("Handler instantiation failed"); + } + + // Set context on base handler + if (handler is EndpointHandler baseHandler) + { + baseHandler.Context = context; + } + + try + { + // Invoke handler method + var result = endpoint.HandlerMethod.Invoke(handler, BuildMethodParameters( + endpoint.HandlerMethod, context, cancellationToken)); + + // Handle async methods + if (result is Task taskResponse) + { + return await taskResponse; + } + else if (result is Task task) + { + await task; + // Method returned Task without result - assume OK + return StellaResponseBuilder.Ok(); + } + else if (result is ResponsePayload response) + { + return response; + } + else if (result != null) + { + // Serialize result as JSON + return StellaResponseBuilder.Ok(result); + } + else + { + return StellaResponseBuilder.NoContent(); + } + } + catch (TargetInvocationException ex) when (ex.InnerException != null) + { + throw ex.InnerException; + } + } + + private Dictionary ExtractPathParameters(string actualPath, string pattern) + { + var result = new Dictionary(); + + var patternSegments = pattern.Split('/', StringSplitOptions.RemoveEmptyEntries); + var pathSegments = actualPath.Split('/', StringSplitOptions.RemoveEmptyEntries); + + for (int i = 0; i < patternSegments.Length && i < pathSegments.Length; i++) + { + var patternSeg = patternSegments[i]; + if (patternSeg.StartsWith('{') && patternSeg.EndsWith('}')) + { + var paramName = patternSeg[1..^1]; + result[paramName] = pathSegments[i]; + } + } + + return result; + } + + private object?[] BuildMethodParameters( + MethodInfo method, + StellaRequestContext context, + CancellationToken cancellationToken) + { + var parameters = method.GetParameters(); + var args = new object?[parameters.Length]; + + for (int i = 0; i < parameters.Length; i++) + { + var param = parameters[i]; + var paramType = param.ParameterType; + + if (paramType == typeof(StellaRequestContext)) + { + args[i] = context; + } + else if (paramType == typeof(CancellationToken)) + { + args[i] = cancellationToken; + } + else if (param.GetCustomAttribute() != null) + { + var value = context.GetPathParameter(param.Name ?? ""); + args[i] = ConvertParameter(value, paramType); + } + else if (param.GetCustomAttribute() != null) + { + var value = context.GetQueryParameter(param.Name ?? ""); + args[i] = ConvertParameter(value, paramType); + } + else if (param.GetCustomAttribute() != null) + { + var headerName = param.GetCustomAttribute()?.Name ?? param.Name; + var value = context.GetHeader(headerName ?? ""); + args[i] = ConvertParameter(value, paramType); + } + else if (param.GetCustomAttribute() != null) + { + var claimName = param.GetCustomAttribute()?.Name ?? param.Name; + var value = context.GetClaim(claimName ?? ""); + args[i] = ConvertParameter(value, paramType); + } + else if (param.GetCustomAttribute() != null || IsComplexType(paramType)) + { + // Deserialize body + args[i] = context.ReadBodyAsJson(paramType); + } + else + { + args[i] = param.HasDefaultValue ? param.DefaultValue : null; + } + } + + return args; + } + + private static object? ConvertParameter(string? value, Type targetType) + { + if (value == null) + return targetType.IsValueType ? Activator.CreateInstance(targetType) : null; + + if (targetType == typeof(string)) + return value; + + if (targetType == typeof(int) || targetType == typeof(int?)) + return int.TryParse(value, out var i) ? i : null; + + if (targetType == typeof(long) || targetType == typeof(long?)) + return long.TryParse(value, out var l) ? l : null; + + if (targetType == typeof(Guid) || targetType == typeof(Guid?)) + return Guid.TryParse(value, out var g) ? g : null; + + if (targetType == typeof(bool) || targetType == typeof(bool?)) + return bool.TryParse(value, out var b) ? b : null; + + return Convert.ChangeType(value, targetType); + } + + private static bool IsComplexType(Type type) + { + return !type.IsPrimitive && + type != typeof(string) && + type != typeof(decimal) && + type != typeof(Guid) && + type != typeof(DateTime) && + type != typeof(DateTimeOffset) && + !type.IsEnum; + } + + private object? ReadBodyAsJson(StellaRequestContext context, Type targetType) + { + if (!context.HasBody) + return null; + + var json = context.RawBody; + return JsonSerializer.Deserialize(json, targetType, JsonDefaults.Options); + } +} +``` + +--- + +## Parameter Binding Attributes + +```csharp +namespace StellaOps.Microservice; + +[AttributeUsage(AttributeTargets.Parameter)] +public sealed class FromPathAttribute : Attribute { } + +[AttributeUsage(AttributeTargets.Parameter)] +public sealed class FromQueryAttribute : Attribute { } + +[AttributeUsage(AttributeTargets.Parameter)] +public sealed class FromHeaderAttribute : Attribute +{ + public string? Name { get; set; } +} + +[AttributeUsage(AttributeTargets.Parameter)] +public sealed class FromClaimAttribute : Attribute +{ + public string? Name { get; set; } +} + +[AttributeUsage(AttributeTargets.Parameter)] +public sealed class FromBodyAttribute : Attribute { } +``` + +--- + +## Usage Example + +```csharp +[StellaEndpoint(BasePath = "/billing")] +public class InvoiceHandler : EndpointHandler +{ + private readonly InvoiceService _service; + + public InvoiceHandler(InvoiceService service) + { + _service = service; + } + + [StellaGet("invoices/{id}")] + public async Task GetInvoice( + [FromPath] Guid id, + CancellationToken cancellationToken) + { + var invoice = await _service.GetByIdAsync(id, cancellationToken); + if (invoice == null) + return NotFound($"Invoice {id} not found"); + + return Ok(invoice); + } + + [StellaPost("invoices")] + [StellaAuth(RequiredClaims = new[] { "billing:write" })] + public async Task CreateInvoice( + [FromBody] CreateInvoiceRequest request, + [FromClaim(Name = "sub")] string userId, + CancellationToken cancellationToken) + { + var invoice = await _service.CreateAsync(request, userId, cancellationToken); + return Created(invoice, $"/billing/invoices/{invoice.Id}"); + } + + [StellaGet("invoices")] + public async Task ListInvoices( + StellaRequestContext context, + CancellationToken cancellationToken) + { + var page = int.Parse(context.GetQueryParameter("page") ?? "1"); + var pageSize = int.Parse(context.GetQueryParameter("pageSize") ?? "20"); + + var invoices = await _service.ListAsync(page, pageSize, cancellationToken); + return Ok(invoices); + } +} +``` + +--- + +## Deliverables + +1. `StellaOps.Microservice/StellaRequestContext.cs` +2. `StellaOps.Microservice/StellaResponseBuilder.cs` +3. `StellaOps.Microservice/IEndpointHandler.cs` +4. `StellaOps.Microservice/EndpointHandler.cs` +5. `StellaOps.Microservice/IRequestDispatcher.cs` +6. `StellaOps.Microservice/RequestDispatcher.cs` +7. `StellaOps.Microservice/ParameterBindingAttributes.cs` +8. Parameter binding tests +9. Response builder tests +10. Dispatcher routing tests + +--- + +## Next Step + +Proceed to [Step 22: Logging & Tracing](22-Step.md) to implement structured logging and distributed tracing. diff --git a/docs/router/22-Step.md b/docs/router/22-Step.md new file mode 100644 index 000000000..b198d25d3 --- /dev/null +++ b/docs/router/22-Step.md @@ -0,0 +1,698 @@ +# Step 22: Logging & Tracing + +**Phase 6: Observability & Resilience** +**Estimated Complexity:** Medium +**Dependencies:** Step 19 (Microservice Host Builder) + +--- + +## Overview + +Structured logging and distributed tracing provide observability across the gateway and microservices. Correlation IDs flow from HTTP requests through the transport layer to microservice handlers, enabling end-to-end request tracking. + +--- + +## Goals + +1. Implement structured logging with consistent context +2. Propagate correlation IDs across all layers +3. Integrate with OpenTelemetry for distributed tracing +4. Support log level configuration per component +5. Provide sensitive data filtering + +--- + +## Correlation Context + +```csharp +namespace StellaOps.Router.Common; + +/// +/// Provides correlation context for request tracking. +/// +public static class CorrelationContext +{ + private static readonly AsyncLocal _current = new(); + + public static CorrelationData Current => _current.Value ?? CorrelationData.Empty; + + public static IDisposable BeginScope(CorrelationData data) + { + var previous = _current.Value; + _current.Value = data; + return new CorrelationScope(previous); + } + + public static IDisposable BeginScope(string correlationId, string? serviceName = null) + { + return BeginScope(new CorrelationData + { + CorrelationId = correlationId, + ServiceName = serviceName ?? Current.ServiceName, + ParentId = Current.CorrelationId + }); + } + + private sealed class CorrelationScope : IDisposable + { + private readonly CorrelationData? _previous; + + public CorrelationScope(CorrelationData? previous) + { + _previous = previous; + } + + public void Dispose() + { + _current.Value = _previous; + } + } +} + +public sealed class CorrelationData +{ + public static readonly CorrelationData Empty = new(); + + public string CorrelationId { get; init; } = ""; + public string? ParentId { get; init; } + public string? ServiceName { get; init; } + public string? InstanceId { get; init; } + public string? Method { get; init; } + public string? Path { get; init; } + public string? UserId { get; init; } + public Dictionary Extra { get; init; } = new(); +} +``` + +--- + +## Structured Log Enricher + +```csharp +namespace StellaOps.Router.Common; + +/// +/// Enriches log entries with correlation context. +/// +public sealed class CorrelationLogEnricher : ILoggerProvider +{ + private readonly ILoggerProvider _inner; + + public CorrelationLogEnricher(ILoggerProvider inner) + { + _inner = inner; + } + + public ILogger CreateLogger(string categoryName) + { + return new CorrelationLogger(_inner.CreateLogger(categoryName)); + } + + public void Dispose() => _inner.Dispose(); + + private sealed class CorrelationLogger : ILogger + { + private readonly ILogger _inner; + + public CorrelationLogger(ILogger inner) + { + _inner = inner; + } + + public IDisposable? BeginScope(TState state) where TState : notnull + { + return _inner.BeginScope(state); + } + + public bool IsEnabled(LogLevel logLevel) => _inner.IsEnabled(logLevel); + + public void Log( + LogLevel logLevel, + EventId eventId, + TState state, + Exception? exception, + Func formatter) + { + var correlation = CorrelationContext.Current; + + // Create enriched state + using var scope = _inner.BeginScope(new Dictionary + { + ["CorrelationId"] = correlation.CorrelationId, + ["ServiceName"] = correlation.ServiceName, + ["InstanceId"] = correlation.InstanceId, + ["Method"] = correlation.Method, + ["Path"] = correlation.Path, + ["UserId"] = correlation.UserId + }); + + _inner.Log(logLevel, eventId, state, exception, formatter); + } + } +} +``` + +--- + +## Gateway Request Logging + +```csharp +namespace StellaOps.Router.Gateway; + +/// +/// Middleware for request/response logging with correlation. +/// +public sealed class RequestLoggingMiddleware +{ + private readonly RequestDelegate _next; + private readonly ILogger _logger; + private readonly RequestLoggingConfig _config; + + public RequestLoggingMiddleware( + RequestDelegate next, + ILogger logger, + IOptions config) + { + _next = next; + _logger = logger; + _config = config.Value; + } + + public async Task InvokeAsync(HttpContext context) + { + var correlationId = context.Request.Headers["X-Correlation-ID"].FirstOrDefault() + ?? context.TraceIdentifier; + + // Set correlation context + using var scope = CorrelationContext.BeginScope(new CorrelationData + { + CorrelationId = correlationId, + ServiceName = "gateway", + Method = context.Request.Method, + Path = context.Request.Path + }); + + var sw = Stopwatch.StartNew(); + + try + { + // Log request + if (_config.LogRequests) + { + LogRequest(context, correlationId); + } + + await _next(context); + + sw.Stop(); + + // Log response + if (_config.LogResponses) + { + LogResponse(context, correlationId, sw.ElapsedMilliseconds); + } + } + catch (Exception ex) + { + sw.Stop(); + LogError(context, correlationId, sw.ElapsedMilliseconds, ex); + throw; + } + } + + private void LogRequest(HttpContext context, string correlationId) + { + var request = context.Request; + + _logger.LogInformation( + "HTTP {Method} {Path} started | CorrelationId={CorrelationId} ClientIP={ClientIP} UserAgent={UserAgent}", + request.Method, + request.Path + request.QueryString, + correlationId, + context.Connection.RemoteIpAddress, + SanitizeHeader(request.Headers.UserAgent)); + } + + private void LogResponse(HttpContext context, string correlationId, long elapsedMs) + { + var level = context.Response.StatusCode >= 500 ? LogLevel.Error + : context.Response.StatusCode >= 400 ? LogLevel.Warning + : LogLevel.Information; + + _logger.Log( + level, + "HTTP {Method} {Path} completed {StatusCode} in {ElapsedMs}ms | CorrelationId={CorrelationId}", + context.Request.Method, + context.Request.Path, + context.Response.StatusCode, + elapsedMs, + correlationId); + } + + private void LogError(HttpContext context, string correlationId, long elapsedMs, Exception ex) + { + _logger.LogError( + ex, + "HTTP {Method} {Path} failed after {ElapsedMs}ms | CorrelationId={CorrelationId}", + context.Request.Method, + context.Request.Path, + elapsedMs, + correlationId); + } + + private static string SanitizeHeader(StringValues value) + { + var str = value.ToString(); + return str.Length > 200 ? str[..200] + "..." : str; + } +} + +public class RequestLoggingConfig +{ + public bool LogRequests { get; set; } = true; + public bool LogResponses { get; set; } = true; + public bool LogHeaders { get; set; } = false; + public bool LogBody { get; set; } = false; + public int MaxBodyLogLength { get; set; } = 1000; + public HashSet SensitiveHeaders { get; set; } = new(StringComparer.OrdinalIgnoreCase) + { + "Authorization", "Cookie", "X-API-Key" + }; +} +``` + +--- + +## OpenTelemetry Integration + +```csharp +namespace StellaOps.Router.Common; + +/// +/// Configures OpenTelemetry tracing for the router. +/// +public static class OpenTelemetryExtensions +{ + public static IServiceCollection AddStellaTracing( + this IServiceCollection services, + IConfiguration configuration) + { + var config = configuration.GetSection("Tracing").Get() + ?? new TracingConfig(); + + services.AddOpenTelemetry() + .WithTracing(builder => + { + builder + .SetResourceBuilder(ResourceBuilder.CreateDefault() + .AddService(config.ServiceName)) + .AddSource(StellaActivitySource.Name) + .AddAspNetCoreInstrumentation(options => + { + options.Filter = ctx => + !ctx.Request.Path.StartsWithSegments("/health"); + options.RecordException = true; + }) + .AddHttpClientInstrumentation(); + + // Add exporter based on config + switch (config.Exporter.ToLower()) + { + case "jaeger": + builder.AddJaegerExporter(o => + { + o.AgentHost = config.JaegerHost; + o.AgentPort = config.JaegerPort; + }); + break; + + case "otlp": + builder.AddOtlpExporter(o => + { + o.Endpoint = new Uri(config.OtlpEndpoint); + }); + break; + + case "console": + builder.AddConsoleExporter(); + break; + } + }); + + return services; + } +} + +public static class StellaActivitySource +{ + public const string Name = "StellaOps.Router"; + + private static readonly ActivitySource _source = new(Name); + + public static Activity? StartActivity(string name, ActivityKind kind = ActivityKind.Internal) + { + return _source.StartActivity(name, kind); + } + + public static Activity? StartRequestActivity(string method, string path) + { + var activity = _source.StartActivity("HandleRequest", ActivityKind.Server); + activity?.SetTag("http.method", method); + activity?.SetTag("http.route", path); + return activity; + } + + public static Activity? StartTransportActivity(string transport, string serviceName) + { + var activity = _source.StartActivity("Transport", ActivityKind.Client); + activity?.SetTag("transport.type", transport); + activity?.SetTag("service.name", serviceName); + return activity; + } +} + +public class TracingConfig +{ + public string ServiceName { get; set; } = "stella-router"; + public string Exporter { get; set; } = "console"; + public string JaegerHost { get; set; } = "localhost"; + public int JaegerPort { get; set; } = 6831; + public string OtlpEndpoint { get; set; } = "http://localhost:4317"; + public double SampleRate { get; set; } = 1.0; +} +``` + +--- + +## Transport Trace Propagation + +```csharp +namespace StellaOps.Router.Transport; + +/// +/// Propagates trace context through the transport layer. +/// +public sealed class TracePropagator +{ + /// + /// Injects trace context into request payload. + /// + public void InjectContext(RequestPayload payload) + { + var activity = Activity.Current; + if (activity == null) + return; + + var headers = new Dictionary(payload.Headers); + + // Inject W3C Trace Context + headers["traceparent"] = $"00-{activity.TraceId}-{activity.SpanId}-{(activity.Recorded ? "01" : "00")}"; + + if (!string.IsNullOrEmpty(activity.TraceStateString)) + { + headers["tracestate"] = activity.TraceStateString; + } + + // Create new payload with updated headers + // (In real implementation, use record with 'with' expression) + } + + /// + /// Extracts trace context from request payload. + /// + public ActivityContext? ExtractContext(RequestPayload payload) + { + if (!payload.Headers.TryGetValue("traceparent", out var traceparent)) + return null; + + if (ActivityContext.TryParse(traceparent, payload.Headers.GetValueOrDefault("tracestate"), out var ctx)) + { + return ctx; + } + + return null; + } +} +``` + +--- + +## Microservice Logging + +```csharp +namespace StellaOps.Microservice; + +/// +/// Request logging for microservice handlers. +/// +public sealed class HandlerLoggingDecorator : IRequestDispatcher +{ + private readonly IRequestDispatcher _inner; + private readonly ILogger _logger; + private readonly TracePropagator _propagator; + + public HandlerLoggingDecorator( + IRequestDispatcher inner, + ILogger logger, + TracePropagator propagator) + { + _inner = inner; + _logger = logger; + _propagator = propagator; + } + + public async Task DispatchAsync( + RequestPayload request, + CancellationToken cancellationToken) + { + // Extract and restore trace context + var parentContext = _propagator.ExtractContext(request); + using var activity = StellaActivitySource.StartActivity( + "HandleRequest", + ActivityKind.Server, + parentContext ?? default); + + activity?.SetTag("http.method", request.Method); + activity?.SetTag("http.route", request.Path); + + // Set correlation context + var correlationId = request.TraceId ?? activity?.TraceId.ToString() ?? Guid.NewGuid().ToString("N"); + using var scope = CorrelationContext.BeginScope(new CorrelationData + { + CorrelationId = correlationId, + Method = request.Method, + Path = request.Path, + UserId = request.Claims.GetValueOrDefault("sub") + }); + + var sw = Stopwatch.StartNew(); + + try + { + _logger.LogDebug( + "Handling {Method} {Path} | CorrelationId={CorrelationId}", + request.Method, request.Path, correlationId); + + var response = await _inner.DispatchAsync(request, cancellationToken); + + sw.Stop(); + activity?.SetTag("http.status_code", response.StatusCode); + + var level = response.StatusCode >= 500 ? LogLevel.Error + : response.StatusCode >= 400 ? LogLevel.Warning + : LogLevel.Debug; + + _logger.Log( + level, + "Completed {Method} {Path} with {StatusCode} in {ElapsedMs}ms | CorrelationId={CorrelationId}", + request.Method, request.Path, response.StatusCode, sw.ElapsedMilliseconds, correlationId); + + return response; + } + catch (Exception ex) + { + sw.Stop(); + activity?.SetStatus(ActivityStatusCode.Error, ex.Message); + + _logger.LogError( + ex, + "Failed {Method} {Path} after {ElapsedMs}ms | CorrelationId={CorrelationId}", + request.Method, request.Path, sw.ElapsedMilliseconds, correlationId); + + throw; + } + } +} +``` + +--- + +## Sensitive Data Filtering + +```csharp +namespace StellaOps.Router.Common; + +/// +/// Filters sensitive data from logs. +/// +public sealed class SensitiveDataFilter +{ + private readonly HashSet _sensitiveFields; + private readonly Regex _cardNumberRegex; + private readonly Regex _ssnRegex; + + public SensitiveDataFilter(IOptions config) + { + var cfg = config.Value; + _sensitiveFields = new HashSet(cfg.SensitiveFields, StringComparer.OrdinalIgnoreCase); + _cardNumberRegex = new Regex(@"\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b"); + _ssnRegex = new Regex(@"\b\d{3}-\d{2}-\d{4}\b"); + } + + public string Filter(string input) + { + var result = input; + + // Mask card numbers + result = _cardNumberRegex.Replace(result, m => + m.Value[..4] + "****" + m.Value[^4..]); + + // Mask SSNs + result = _ssnRegex.Replace(result, "***-**-****"); + + return result; + } + + public Dictionary FilterHeaders(IReadOnlyDictionary headers) + { + return headers.ToDictionary( + h => h.Key, + h => _sensitiveFields.Contains(h.Key) ? "[REDACTED]" : h.Value); + } + + public object FilterObject(object obj) + { + // Deep filter for JSON objects + var json = JsonSerializer.Serialize(obj); + var filtered = FilterJsonProperties(json); + return JsonSerializer.Deserialize(filtered)!; + } + + private string FilterJsonProperties(string json) + { + var doc = JsonDocument.Parse(json); + using var stream = new MemoryStream(); + using var writer = new Utf8JsonWriter(stream); + + FilterElement(doc.RootElement, writer); + + writer.Flush(); + return Encoding.UTF8.GetString(stream.ToArray()); + } + + private void FilterElement(JsonElement element, Utf8JsonWriter writer) + { + switch (element.ValueKind) + { + case JsonValueKind.Object: + writer.WriteStartObject(); + foreach (var property in element.EnumerateObject()) + { + writer.WritePropertyName(property.Name); + if (_sensitiveFields.Contains(property.Name)) + { + writer.WriteStringValue("[REDACTED]"); + } + else + { + FilterElement(property.Value, writer); + } + } + writer.WriteEndObject(); + break; + + case JsonValueKind.Array: + writer.WriteStartArray(); + foreach (var item in element.EnumerateArray()) + { + FilterElement(item, writer); + } + writer.WriteEndArray(); + break; + + default: + element.WriteTo(writer); + break; + } + } +} + +public class SensitiveDataConfig +{ + public HashSet SensitiveFields { get; set; } = new(StringComparer.OrdinalIgnoreCase) + { + "password", "secret", "token", "apiKey", "api_key", + "authorization", "creditCard", "credit_card", "ssn", + "socialSecurityNumber", "social_security_number" + }; +} +``` + +--- + +## YAML Configuration + +```yaml +Logging: + LogLevel: + Default: "Information" + "StellaOps.Router": "Debug" + "Microsoft.AspNetCore": "Warning" + +RequestLogging: + LogRequests: true + LogResponses: true + LogHeaders: false + LogBody: false + MaxBodyLogLength: 1000 + SensitiveHeaders: + - Authorization + - Cookie + - X-API-Key + +Tracing: + ServiceName: "stella-router" + Exporter: "otlp" + OtlpEndpoint: "http://otel-collector:4317" + SampleRate: 1.0 + +SensitiveData: + SensitiveFields: + - password + - secret + - token + - apiKey + - creditCard + - ssn +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Common/CorrelationContext.cs` +2. `StellaOps.Router.Common/CorrelationLogEnricher.cs` +3. `StellaOps.Router.Gateway/RequestLoggingMiddleware.cs` +4. `StellaOps.Router.Common/OpenTelemetryExtensions.cs` +5. `StellaOps.Router.Common/StellaActivitySource.cs` +6. `StellaOps.Router.Transport/TracePropagator.cs` +7. `StellaOps.Microservice/HandlerLoggingDecorator.cs` +8. `StellaOps.Router.Common/SensitiveDataFilter.cs` +9. Correlation propagation tests +10. Trace context tests + +--- + +## Next Step + +Proceed to [Step 23: Metrics & Health Checks](23-Step.md) to implement observability metrics. diff --git a/docs/router/23-Step.md b/docs/router/23-Step.md new file mode 100644 index 000000000..552259c8f --- /dev/null +++ b/docs/router/23-Step.md @@ -0,0 +1,769 @@ +# Step 23: Metrics & Health Checks + +**Phase 6: Observability & Resilience** +**Estimated Complexity:** Medium +**Dependencies:** Step 22 (Logging & Tracing) + +--- + +## Overview + +Metrics and health checks provide operational visibility into the router and microservices. Prometheus-compatible metrics expose request rates, latencies, error rates, and connection pool status. Health checks enable load balancers and orchestrators to route traffic appropriately. + +--- + +## Goals + +1. Expose Prometheus-compatible metrics +2. Track request/response metrics per endpoint +3. Monitor transport layer health +4. Provide liveness and readiness probes +5. Support custom health check integrations + +--- + +## Metrics Configuration + +```csharp +namespace StellaOps.Router.Common; + +public class MetricsConfig +{ + /// Whether to enable metrics collection. + public bool Enabled { get; set; } = true; + + /// Path for metrics endpoint. + public string Path { get; set; } = "/metrics"; + + /// Histogram buckets for request duration. + public double[] DurationBuckets { get; set; } = new[] + { + 0.001, 0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 10.0 + }; + + /// Labels to include in metrics. + public HashSet IncludeLabels { get; set; } = new() + { + "method", "path", "status_code", "service" + }; + + /// Whether to include path in labels (may cause high cardinality). + public bool IncludePathLabel { get; set; } = false; + + /// Maximum unique path labels before aggregating. + public int MaxPathCardinality { get; set; } = 100; +} +``` + +--- + +## Core Metrics + +```csharp +namespace StellaOps.Router.Common; + +/// +/// Central metrics registry for Stella Router. +/// +public sealed class StellaMetrics +{ + // Request metrics + public static readonly Counter RequestsTotal = Meter.CreateCounter( + "stella_requests_total", + description: "Total number of requests processed"); + + public static readonly Histogram RequestDuration = Meter.CreateHistogram( + "stella_request_duration_seconds", + unit: "s", + description: "Request processing duration in seconds"); + + public static readonly Counter RequestErrors = Meter.CreateCounter( + "stella_request_errors_total", + description: "Total number of request errors"); + + // Transport metrics + public static readonly UpDownCounter ActiveConnections = Meter.CreateUpDownCounter( + "stella_active_connections", + description: "Number of active transport connections"); + + public static readonly Counter ConnectionsTotal = Meter.CreateCounter( + "stella_connections_total", + description: "Total number of transport connections"); + + public static readonly Counter FramesSent = Meter.CreateCounter( + "stella_frames_sent_total", + description: "Total number of frames sent"); + + public static readonly Counter FramesReceived = Meter.CreateCounter( + "stella_frames_received_total", + description: "Total number of frames received"); + + public static readonly Counter BytesSent = Meter.CreateCounter( + "stella_bytes_sent_total", + unit: "By", + description: "Total bytes sent"); + + public static readonly Counter BytesReceived = Meter.CreateCounter( + "stella_bytes_received_total", + unit: "By", + description: "Total bytes received"); + + // Rate limiting metrics + public static readonly Counter RateLimitHits = Meter.CreateCounter( + "stella_rate_limit_hits_total", + description: "Number of requests that hit rate limits"); + + public static readonly Gauge RateLimitBuckets = Meter.CreateGauge( + "stella_rate_limit_buckets", + description: "Number of active rate limit buckets"); + + // Auth metrics + public static readonly Counter AuthSuccesses = Meter.CreateCounter( + "stella_auth_success_total", + description: "Number of successful authentications"); + + public static readonly Counter AuthFailures = Meter.CreateCounter( + "stella_auth_failures_total", + description: "Number of failed authentications"); + + // Circuit breaker metrics + public static readonly Gauge CircuitBreakerState = Meter.CreateGauge( + "stella_circuit_breaker_state", + description: "Circuit breaker state (0=closed, 1=half-open, 2=open)"); + + private static readonly Meter Meter = new("StellaOps.Router", "1.0.0"); +} +``` + +--- + +## Request Metrics Middleware + +```csharp +namespace StellaOps.Router.Gateway; + +/// +/// Middleware to collect request metrics. +/// +public sealed class MetricsMiddleware +{ + private readonly RequestDelegate _next; + private readonly MetricsConfig _config; + private readonly PathNormalizer _pathNormalizer; + + public MetricsMiddleware( + RequestDelegate next, + IOptions config) + { + _next = next; + _config = config.Value; + _pathNormalizer = new PathNormalizer(_config.MaxPathCardinality); + } + + public async Task InvokeAsync(HttpContext context) + { + if (!_config.Enabled) + { + await _next(context); + return; + } + + var sw = Stopwatch.StartNew(); + var method = context.Request.Method; + var path = _config.IncludePathLabel + ? _pathNormalizer.Normalize(context.Request.Path) + : "aggregated"; + + try + { + await _next(context); + } + finally + { + sw.Stop(); + + var tags = new TagList + { + { "method", method }, + { "status_code", context.Response.StatusCode.ToString() } + }; + + if (_config.IncludePathLabel) + { + tags.Add("path", path); + } + + StellaMetrics.RequestsTotal.Add(1, tags); + StellaMetrics.RequestDuration.Record(sw.Elapsed.TotalSeconds, tags); + + if (context.Response.StatusCode >= 400) + { + StellaMetrics.RequestErrors.Add(1, tags); + } + } + } +} + +/// +/// Normalizes paths to prevent high cardinality. +/// +internal sealed class PathNormalizer +{ + private readonly int _maxCardinality; + private readonly ConcurrentDictionary _pathCache = new(); + private int _uniquePaths; + + public PathNormalizer(int maxCardinality) + { + _maxCardinality = maxCardinality; + } + + public string Normalize(string path) + { + if (_pathCache.TryGetValue(path, out var normalized)) + return normalized; + + // Replace path parameters with placeholders + var segments = path.Split('/'); + for (int i = 0; i < segments.Length; i++) + { + if (Guid.TryParse(segments[i], out _) || + int.TryParse(segments[i], out _) || + segments[i].Length > 20) + { + segments[i] = "{id}"; + } + } + + normalized = string.Join("/", segments); + + if (Interlocked.Increment(ref _uniquePaths) <= _maxCardinality) + { + _pathCache[path] = normalized; + } + else + { + normalized = "other"; + } + + return normalized; + } +} +``` + +--- + +## Transport Metrics + +```csharp +namespace StellaOps.Router.Transport; + +/// +/// Collects metrics for transport layer operations. +/// +public sealed class TransportMetricsCollector +{ + public void RecordConnectionOpened(string transport, string serviceName) + { + var tags = new TagList + { + { "transport", transport }, + { "service", serviceName } + }; + + StellaMetrics.ConnectionsTotal.Add(1, tags); + StellaMetrics.ActiveConnections.Add(1, tags); + } + + public void RecordConnectionClosed(string transport, string serviceName) + { + var tags = new TagList + { + { "transport", transport }, + { "service", serviceName } + }; + + StellaMetrics.ActiveConnections.Add(-1, tags); + } + + public void RecordFrameSent(string transport, FrameType type, int bytes) + { + var tags = new TagList + { + { "transport", transport }, + { "frame_type", type.ToString() } + }; + + StellaMetrics.FramesSent.Add(1, tags); + StellaMetrics.BytesSent.Add(bytes, new TagList { { "transport", transport } }); + } + + public void RecordFrameReceived(string transport, FrameType type, int bytes) + { + var tags = new TagList + { + { "transport", transport }, + { "frame_type", type.ToString() } + }; + + StellaMetrics.FramesReceived.Add(1, tags); + StellaMetrics.BytesReceived.Add(bytes, new TagList { { "transport", transport } }); + } +} +``` + +--- + +## Health Check System + +```csharp +namespace StellaOps.Router.Common; + +/// +/// Health check result. +/// +public sealed class HealthCheckResult +{ + public HealthStatus Status { get; init; } + public string? Description { get; init; } + public TimeSpan Duration { get; init; } + public IReadOnlyDictionary? Data { get; init; } + public Exception? Exception { get; init; } +} + +public enum HealthStatus +{ + Healthy, + Degraded, + Unhealthy +} + +/// +/// Health check interface. +/// +public interface IHealthCheck +{ + string Name { get; } + Task CheckAsync(CancellationToken cancellationToken); +} + +/// +/// Aggregates multiple health checks. +/// +public sealed class HealthCheckService +{ + private readonly IEnumerable _checks; + private readonly ILogger _logger; + + public HealthCheckService( + IEnumerable checks, + ILogger logger) + { + _checks = checks; + _logger = logger; + } + + public async Task CheckHealthAsync(CancellationToken cancellationToken) + { + var results = new Dictionary(); + var overallStatus = HealthStatus.Healthy; + + foreach (var check in _checks) + { + var sw = Stopwatch.StartNew(); + + try + { + var result = await check.CheckAsync(cancellationToken); + result = result with { Duration = sw.Elapsed }; + results[check.Name] = result; + + if (result.Status > overallStatus) + { + overallStatus = result.Status; + } + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Health check {Name} failed", check.Name); + results[check.Name] = new HealthCheckResult + { + Status = HealthStatus.Unhealthy, + Description = ex.Message, + Duration = sw.Elapsed, + Exception = ex + }; + overallStatus = HealthStatus.Unhealthy; + } + } + + return new HealthReport + { + Status = overallStatus, + Checks = results, + TotalDuration = results.Values.Sum(r => r.Duration.TotalMilliseconds) + }; + } +} + +public sealed class HealthReport +{ + public HealthStatus Status { get; init; } + public IReadOnlyDictionary Checks { get; init; } = new Dictionary(); + public double TotalDuration { get; init; } +} +``` + +--- + +## Built-in Health Checks + +```csharp +namespace StellaOps.Router.Gateway; + +/// +/// Checks that at least one transport connection is active. +/// +public sealed class TransportHealthCheck : IHealthCheck +{ + private readonly IGlobalRoutingState _routingState; + + public string Name => "transport"; + + public TransportHealthCheck(IGlobalRoutingState routingState) + { + _routingState = routingState; + } + + public Task CheckAsync(CancellationToken cancellationToken) + { + var connections = _routingState.GetAllConnections(); + var activeCount = connections.Count(c => c.State == ConnectionState.Connected); + + if (activeCount == 0) + { + return Task.FromResult(new HealthCheckResult + { + Status = HealthStatus.Unhealthy, + Description = "No active transport connections", + Data = new Dictionary { ["connections"] = 0 } + }); + } + + return Task.FromResult(new HealthCheckResult + { + Status = HealthStatus.Healthy, + Description = $"{activeCount} active connections", + Data = new Dictionary { ["connections"] = activeCount } + }); + } +} + +/// +/// Checks Authority service connectivity. +/// +public sealed class AuthorityHealthCheck : IHealthCheck +{ + private readonly IAuthorityClient _authority; + private readonly TimeSpan _timeout; + + public string Name => "authority"; + + public AuthorityHealthCheck( + IAuthorityClient authority, + IOptions config) + { + _authority = authority; + _timeout = config.Value.HealthCheckTimeout; + } + + public async Task CheckAsync(CancellationToken cancellationToken) + { + try + { + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(_timeout); + + var isHealthy = await _authority.CheckHealthAsync(cts.Token); + + return new HealthCheckResult + { + Status = isHealthy ? HealthStatus.Healthy : HealthStatus.Degraded, + Description = isHealthy ? "Authority is responsive" : "Authority returned unhealthy" + }; + } + catch (Exception ex) + { + return new HealthCheckResult + { + Status = HealthStatus.Degraded, // Degraded, not unhealthy - gateway can still work + Description = $"Authority unreachable: {ex.Message}", + Exception = ex + }; + } + } +} + +/// +/// Checks rate limiter backend connectivity. +/// +public sealed class RateLimiterHealthCheck : IHealthCheck +{ + private readonly IRateLimiter _rateLimiter; + + public string Name => "rate_limiter"; + + public RateLimiterHealthCheck(IRateLimiter rateLimiter) + { + _rateLimiter = rateLimiter; + } + + public async Task CheckAsync(CancellationToken cancellationToken) + { + try + { + // Try a simple operation + await _rateLimiter.CheckLimitAsync( + new RateLimitContext { Key = "__health_check__", Tier = RateLimitTier.Free }, + cancellationToken); + + return new HealthCheckResult + { + Status = HealthStatus.Healthy, + Description = "Rate limiter is responsive" + }; + } + catch (Exception ex) + { + return new HealthCheckResult + { + Status = HealthStatus.Degraded, + Description = $"Rate limiter error: {ex.Message}", + Exception = ex + }; + } + } +} +``` + +--- + +## Health Endpoints + +```csharp +namespace StellaOps.Router.Gateway; + +/// +/// Health check endpoints. +/// +public static class HealthEndpoints +{ + public static IEndpointRouteBuilder MapHealthEndpoints( + this IEndpointRouteBuilder endpoints, + string basePath = "/health") + { + endpoints.MapGet(basePath + "/live", LivenessCheck); + endpoints.MapGet(basePath + "/ready", ReadinessCheck); + endpoints.MapGet(basePath, DetailedHealthCheck); + + return endpoints; + } + + /// + /// Liveness probe - is the process running? + /// + private static IResult LivenessCheck() + { + return Results.Ok(new { status = "alive" }); + } + + /// + /// Readiness probe - can the service accept traffic? + /// + private static async Task ReadinessCheck( + HealthCheckService healthService, + CancellationToken cancellationToken) + { + var report = await healthService.CheckHealthAsync(cancellationToken); + + return report.Status == HealthStatus.Unhealthy + ? Results.Json(new + { + status = "not_ready", + checks = report.Checks.ToDictionary(c => c.Key, c => c.Value.Status.ToString()) + }, statusCode: 503) + : Results.Ok(new { status = "ready" }); + } + + /// + /// Detailed health report. + /// + private static async Task DetailedHealthCheck( + HealthCheckService healthService, + CancellationToken cancellationToken) + { + var report = await healthService.CheckHealthAsync(cancellationToken); + + var response = new + { + status = report.Status.ToString().ToLower(), + totalDuration = $"{report.TotalDuration:F2}ms", + checks = report.Checks.ToDictionary(c => c.Key, c => new + { + status = c.Value.Status.ToString().ToLower(), + description = c.Value.Description, + duration = $"{c.Value.Duration.TotalMilliseconds:F2}ms", + data = c.Value.Data + }) + }; + + var statusCode = report.Status switch + { + HealthStatus.Healthy => 200, + HealthStatus.Degraded => 200, // Still return 200 for degraded + HealthStatus.Unhealthy => 503, + _ => 200 + }; + + return Results.Json(response, statusCode: statusCode); + } +} +``` + +--- + +## Prometheus Metrics Endpoint + +```csharp +namespace StellaOps.Router.Gateway; + +/// +/// Exposes metrics in Prometheus format. +/// +public sealed class PrometheusMetricsEndpoint +{ + public static void Map(IEndpointRouteBuilder endpoints, string path = "/metrics") + { + endpoints.MapGet(path, async (HttpContext context) => + { + var exporter = context.RequestServices.GetRequiredService(); + var metrics = await exporter.ExportAsync(); + + context.Response.ContentType = "text/plain; version=0.0.4"; + await context.Response.WriteAsync(metrics); + }); + } +} + +public sealed class PrometheusExporter +{ + private readonly MeterProvider _meterProvider; + + public PrometheusExporter(MeterProvider meterProvider) + { + _meterProvider = meterProvider; + } + + public Task ExportAsync() + { + // Use OpenTelemetry's Prometheus exporter + // This is a simplified example + var sb = new StringBuilder(); + + // Export would iterate over all registered metrics + // Real implementation uses OpenTelemetry.Exporter.Prometheus + + return Task.FromResult(sb.ToString()); + } +} +``` + +--- + +## Service Registration + +```csharp +namespace StellaOps.Router.Gateway; + +public static class MetricsExtensions +{ + public static IServiceCollection AddStellaMetrics( + this IServiceCollection services, + IConfiguration configuration) + { + services.Configure(configuration.GetSection("Metrics")); + + services.AddOpenTelemetry() + .WithMetrics(builder => + { + builder + .AddMeter("StellaOps.Router") + .AddAspNetCoreInstrumentation() + .AddPrometheusExporter(); + }); + + return services; + } + + public static IServiceCollection AddStellaHealthChecks( + this IServiceCollection services) + { + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + + return services; + } +} +``` + +--- + +## YAML Configuration + +```yaml +Metrics: + Enabled: true + Path: "/metrics" + IncludePathLabel: false + MaxPathCardinality: 100 + DurationBuckets: + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.1 + - 0.25 + - 0.5 + - 1 + - 2.5 + - 5 + - 10 + +HealthChecks: + Enabled: true + Path: "/health" + CacheDuration: "00:00:05" +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Common/StellaMetrics.cs` +2. `StellaOps.Router.Gateway/MetricsMiddleware.cs` +3. `StellaOps.Router.Transport/TransportMetricsCollector.cs` +4. `StellaOps.Router.Common/HealthCheckService.cs` +5. `StellaOps.Router.Gateway/TransportHealthCheck.cs` +6. `StellaOps.Router.Gateway/AuthorityHealthCheck.cs` +7. `StellaOps.Router.Gateway/HealthEndpoints.cs` +8. `StellaOps.Router.Gateway/PrometheusMetricsEndpoint.cs` +9. Metrics collection tests +10. Health check tests + +--- + +## Next Step + +Proceed to [Step 24: Circuit Breaker & Retry Policies](24-Step.md) to implement resilience patterns. diff --git a/docs/router/24-Step.md b/docs/router/24-Step.md new file mode 100644 index 000000000..c092f2d52 --- /dev/null +++ b/docs/router/24-Step.md @@ -0,0 +1,856 @@ +# Step 24: Circuit Breaker & Retry Policies + +**Phase 6: Observability & Resilience** +**Estimated Complexity:** High +**Dependencies:** Step 23 (Metrics & Health Checks) + +--- + +## Overview + +Circuit breakers and retry policies protect the system from cascading failures and transient errors. The circuit breaker prevents requests to failing services, while retry policies automatically retry failed requests with exponential backoff. + +--- + +## Goals + +1. Implement circuit breaker pattern for service protection +2. Support configurable retry policies +3. Enable per-service and per-endpoint policies +4. Integrate with metrics for observability +5. Provide graceful degradation strategies + +--- + +## Circuit Breaker Configuration + +```csharp +namespace StellaOps.Router.Resilience; + +public class CircuitBreakerConfig +{ + /// Number of failures before opening circuit. + public int FailureThreshold { get; set; } = 5; + + /// Time window for counting failures. + public TimeSpan SamplingDuration { get; set; } = TimeSpan.FromSeconds(30); + + /// How long to stay open before testing. + public TimeSpan BreakDuration { get; set; } = TimeSpan.FromSeconds(30); + + /// Minimum throughput before circuit can trip. + public int MinimumThroughput { get; set; } = 10; + + /// Failure ratio to trip circuit (0.0 to 1.0). + public double FailureRatioThreshold { get; set; } = 0.5; + + /// HTTP status codes considered failures. + public HashSet FailureStatusCodes { get; set; } = new() + { + 500, 502, 503, 504 + }; + + /// Exception types considered failures. + public HashSet FailureExceptions { get; set; } = new() + { + typeof(TimeoutException), + typeof(TaskCanceledException), + typeof(HttpRequestException) + }; +} +``` + +--- + +## Circuit Breaker Implementation + +```csharp +namespace StellaOps.Router.Resilience; + +public enum CircuitState +{ + Closed = 0, // Normal operation + Open = 2, // Blocking requests + HalfOpen = 1 // Testing with limited requests +} + +/// +/// Circuit breaker for a single service or endpoint. +/// +public sealed class CircuitBreaker +{ + private readonly CircuitBreakerConfig _config; + private readonly ILogger _logger; + private readonly SlidingWindow _window; + private CircuitState _state = CircuitState.Closed; + private DateTimeOffset _openedAt; + private readonly SemaphoreSlim _halfOpenLock = new(1, 1); + + public string Name { get; } + public CircuitState State => _state; + public DateTimeOffset LastStateChange { get; private set; } + + public CircuitBreaker( + string name, + CircuitBreakerConfig config, + ILogger logger) + { + Name = name; + _config = config; + _logger = logger; + _window = new SlidingWindow(config.SamplingDuration); + LastStateChange = DateTimeOffset.UtcNow; + } + + /// + /// Checks if request is allowed through the circuit. + /// + public async Task AllowRequestAsync(CancellationToken cancellationToken) + { + switch (_state) + { + case CircuitState.Closed: + return true; + + case CircuitState.Open: + if (DateTimeOffset.UtcNow - _openedAt >= _config.BreakDuration) + { + await TryTransitionToHalfOpenAsync(); + } + return _state == CircuitState.HalfOpen; + + case CircuitState.HalfOpen: + // Only allow one request at a time in half-open + return await _halfOpenLock.WaitAsync(0, cancellationToken); + + default: + return false; + } + } + + /// + /// Records a successful request. + /// + public void RecordSuccess() + { + _window.RecordSuccess(); + + if (_state == CircuitState.HalfOpen) + { + TransitionToClosed(); + _halfOpenLock.Release(); + } + } + + /// + /// Records a failed request. + /// + public void RecordFailure() + { + _window.RecordFailure(); + + if (_state == CircuitState.HalfOpen) + { + TransitionToOpen(); + _halfOpenLock.Release(); + } + else if (_state == CircuitState.Closed) + { + CheckThreshold(); + } + } + + private void CheckThreshold() + { + var stats = _window.GetStats(); + + if (stats.TotalRequests < _config.MinimumThroughput) + return; + + var failureRatio = (double)stats.Failures / stats.TotalRequests; + + if (failureRatio >= _config.FailureRatioThreshold || + stats.Failures >= _config.FailureThreshold) + { + TransitionToOpen(); + } + } + + private void TransitionToOpen() + { + _state = CircuitState.Open; + _openedAt = DateTimeOffset.UtcNow; + LastStateChange = _openedAt; + + _logger.LogWarning( + "Circuit {Name} opened. Failures: {Failures}, Ratio: {Ratio:P2}", + Name, _window.GetStats().Failures, + (double)_window.GetStats().Failures / Math.Max(1, _window.GetStats().TotalRequests)); + + StellaMetrics.CircuitBreakerState.Record((int)CircuitState.Open, + new TagList { { "circuit", Name } }); + } + + private async Task TryTransitionToHalfOpenAsync() + { + if (_state != CircuitState.Open) + return; + + if (await _halfOpenLock.WaitAsync(0)) + { + _state = CircuitState.HalfOpen; + LastStateChange = DateTimeOffset.UtcNow; + _window.Reset(); + + _logger.LogInformation("Circuit {Name} transitioning to half-open", Name); + + StellaMetrics.CircuitBreakerState.Record((int)CircuitState.HalfOpen, + new TagList { { "circuit", Name } }); + } + } + + private void TransitionToClosed() + { + _state = CircuitState.Closed; + LastStateChange = DateTimeOffset.UtcNow; + _window.Reset(); + + _logger.LogInformation("Circuit {Name} closed", Name); + + StellaMetrics.CircuitBreakerState.Record((int)CircuitState.Closed, + new TagList { { "circuit", Name } }); + } +} + +/// +/// Sliding window for tracking success/failure counts. +/// +internal sealed class SlidingWindow +{ + private readonly TimeSpan _duration; + private readonly ConcurrentQueue<(DateTimeOffset Time, bool Success)> _events = new(); + + public SlidingWindow(TimeSpan duration) + { + _duration = duration; + } + + public void RecordSuccess() + { + _events.Enqueue((DateTimeOffset.UtcNow, true)); + Cleanup(); + } + + public void RecordFailure() + { + _events.Enqueue((DateTimeOffset.UtcNow, false)); + Cleanup(); + } + + public WindowStats GetStats() + { + Cleanup(); + + var successes = 0; + var failures = 0; + + foreach (var evt in _events) + { + if (evt.Success) + successes++; + else + failures++; + } + + return new WindowStats(successes, failures); + } + + public void Reset() + { + _events.Clear(); + } + + private void Cleanup() + { + var cutoff = DateTimeOffset.UtcNow - _duration; + + while (_events.TryPeek(out var evt) && evt.Time < cutoff) + { + _events.TryDequeue(out _); + } + } +} + +internal readonly record struct WindowStats(int Successes, int Failures) +{ + public int TotalRequests => Successes + Failures; +} +``` + +--- + +## Retry Policy Configuration + +```csharp +namespace StellaOps.Router.Resilience; + +public class RetryPolicyConfig +{ + /// Maximum number of retries. + public int MaxRetries { get; set; } = 3; + + /// Initial delay before first retry. + public TimeSpan InitialDelay { get; set; } = TimeSpan.FromMilliseconds(100); + + /// Maximum delay between retries. + public TimeSpan MaxDelay { get; set; } = TimeSpan.FromSeconds(10); + + /// Backoff multiplier for exponential delay. + public double BackoffMultiplier { get; set; } = 2.0; + + /// Whether to add jitter to delays. + public bool UseJitter { get; set; } = true; + + /// Maximum jitter to add (percentage of delay). + public double MaxJitterPercent { get; set; } = 0.25; + + /// HTTP status codes that trigger retry. + public HashSet RetryableStatusCodes { get; set; } = new() + { + 408, 429, 500, 502, 503, 504 + }; + + /// Exception types that trigger retry. + public HashSet RetryableExceptions { get; set; } = new() + { + typeof(TimeoutException), + typeof(HttpRequestException), + typeof(IOException) + }; +} +``` + +--- + +## Retry Policy Implementation + +```csharp +namespace StellaOps.Router.Resilience; + +/// +/// Executes operations with retry logic. +/// +public sealed class RetryPolicy +{ + private readonly RetryPolicyConfig _config; + private readonly ILogger _logger; + + public RetryPolicy(RetryPolicyConfig config, ILogger logger) + { + _config = config; + _logger = logger; + } + + /// + /// Executes an operation with retry logic. + /// + public async Task ExecuteAsync( + Func> operation, + Func shouldRetry, + CancellationToken cancellationToken) + { + var attempt = 0; + var totalDelay = TimeSpan.Zero; + + while (true) + { + try + { + attempt++; + var result = await operation(cancellationToken); + + if (shouldRetry(result) && attempt <= _config.MaxRetries) + { + var delay = CalculateDelay(attempt); + totalDelay += delay; + + _logger.LogDebug( + "Retrying operation (attempt {Attempt}/{MaxRetries}) after {Delay}ms", + attempt, _config.MaxRetries, delay.TotalMilliseconds); + + await Task.Delay(delay, cancellationToken); + continue; + } + + if (attempt > 1) + { + _logger.LogDebug( + "Operation succeeded after {Attempts} attempts, total delay: {TotalDelay}ms", + attempt, totalDelay.TotalMilliseconds); + } + + return result; + } + catch (Exception ex) when (ShouldRetry(ex) && attempt <= _config.MaxRetries) + { + var delay = CalculateDelay(attempt); + totalDelay += delay; + + _logger.LogWarning( + ex, + "Operation failed (attempt {Attempt}/{MaxRetries}), retrying after {Delay}ms", + attempt, _config.MaxRetries, delay.TotalMilliseconds); + + await Task.Delay(delay, cancellationToken); + } + } + } + + /// + /// Executes an operation with retry logic (response payload variant). + /// + public Task ExecuteAsync( + Func> operation, + CancellationToken cancellationToken) + { + return ExecuteAsync( + operation, + response => _config.RetryableStatusCodes.Contains(response.StatusCode), + cancellationToken); + } + + private bool ShouldRetry(Exception ex) + { + var exType = ex.GetType(); + return _config.RetryableExceptions.Any(t => t.IsAssignableFrom(exType)); + } + + private TimeSpan CalculateDelay(int attempt) + { + // Exponential backoff + var delay = TimeSpan.FromMilliseconds( + _config.InitialDelay.TotalMilliseconds * Math.Pow(_config.BackoffMultiplier, attempt - 1)); + + // Cap at max delay + if (delay > _config.MaxDelay) + { + delay = _config.MaxDelay; + } + + // Add jitter + if (_config.UseJitter) + { + var jitter = delay.TotalMilliseconds * _config.MaxJitterPercent * Random.Shared.NextDouble(); + delay = TimeSpan.FromMilliseconds(delay.TotalMilliseconds + jitter); + } + + return delay; + } +} +``` + +--- + +## Resilience Policy Executor + +```csharp +namespace StellaOps.Router.Resilience; + +/// +/// Combines circuit breaker and retry policies. +/// +public interface IResiliencePolicy +{ + Task ExecuteAsync( + string serviceName, + Func> operation, + CancellationToken cancellationToken); +} + +public sealed class ResiliencePolicy : IResiliencePolicy +{ + private readonly ICircuitBreakerRegistry _circuitBreakers; + private readonly RetryPolicy _retryPolicy; + private readonly ResilienceConfig _config; + private readonly ILogger _logger; + + public ResiliencePolicy( + ICircuitBreakerRegistry circuitBreakers, + RetryPolicy retryPolicy, + IOptions config, + ILogger logger) + { + _circuitBreakers = circuitBreakers; + _retryPolicy = retryPolicy; + _config = config.Value; + _logger = logger; + } + + public async Task ExecuteAsync( + string serviceName, + Func> operation, + CancellationToken cancellationToken) + { + var circuitBreaker = _circuitBreakers.GetOrCreate(serviceName); + + // Check circuit breaker + if (!await circuitBreaker.AllowRequestAsync(cancellationToken)) + { + _logger.LogWarning("Circuit breaker {Name} is open, rejecting request", serviceName); + + return _config.FallbackResponse ?? new ResponsePayload + { + StatusCode = 503, + Headers = new Dictionary + { + ["X-Circuit-Breaker"] = "open", + ["Retry-After"] = "30" + }, + Body = Encoding.UTF8.GetBytes(JsonSerializer.Serialize(new + { + error = "Service temporarily unavailable", + service = serviceName + })), + IsFinalChunk = true + }; + } + + try + { + // Execute with retry + var response = await _retryPolicy.ExecuteAsync(operation, cancellationToken); + + // Record result + if (IsSuccess(response)) + { + circuitBreaker.RecordSuccess(); + } + else if (IsFailure(response)) + { + circuitBreaker.RecordFailure(); + } + + return response; + } + catch (Exception) + { + circuitBreaker.RecordFailure(); + throw; + } + } + + private bool IsSuccess(ResponsePayload response) + { + return response.StatusCode >= 200 && response.StatusCode < 400; + } + + private bool IsFailure(ResponsePayload response) + { + return _config.CircuitBreaker.FailureStatusCodes.Contains(response.StatusCode); + } +} + +public class ResilienceConfig +{ + public CircuitBreakerConfig CircuitBreaker { get; set; } = new(); + public RetryPolicyConfig Retry { get; set; } = new(); + public ResponsePayload? FallbackResponse { get; set; } +} +``` + +--- + +## Circuit Breaker Registry + +```csharp +namespace StellaOps.Router.Resilience; + +/// +/// Registry of circuit breakers per service. +/// +public interface ICircuitBreakerRegistry +{ + CircuitBreaker GetOrCreate(string name); + IReadOnlyDictionary GetAll(); + void Reset(string name); + void ResetAll(); +} + +public sealed class CircuitBreakerRegistry : ICircuitBreakerRegistry +{ + private readonly ConcurrentDictionary _breakers = new(); + private readonly CircuitBreakerConfig _config; + private readonly ILoggerFactory _loggerFactory; + + public CircuitBreakerRegistry( + IOptions config, + ILoggerFactory loggerFactory) + { + _config = config.Value; + _loggerFactory = loggerFactory; + } + + public CircuitBreaker GetOrCreate(string name) + { + return _breakers.GetOrAdd(name, n => + { + var logger = _loggerFactory.CreateLogger(); + return new CircuitBreaker(n, _config, logger); + }); + } + + public IReadOnlyDictionary GetAll() + { + return _breakers; + } + + public void Reset(string name) + { + if (_breakers.TryRemove(name, out _)) + { + // Will be recreated fresh on next request + } + } + + public void ResetAll() + { + _breakers.Clear(); + } +} +``` + +--- + +## Bulkhead Pattern + +```csharp +namespace StellaOps.Router.Resilience; + +/// +/// Bulkhead pattern - limits concurrent requests to a service. +/// +public sealed class Bulkhead +{ + private readonly SemaphoreSlim _semaphore; + private readonly BulkheadConfig _config; + private readonly string _name; + private int _queuedRequests; + + public string Name => _name; + public int ActiveRequests => _config.MaxConcurrency - _semaphore.CurrentCount; + public int QueuedRequests => _queuedRequests; + + public Bulkhead(string name, BulkheadConfig config) + { + _name = name; + _config = config; + _semaphore = new SemaphoreSlim(config.MaxConcurrency, config.MaxConcurrency); + } + + /// + /// Acquires a slot in the bulkhead. + /// + public async Task AcquireAsync(CancellationToken cancellationToken) + { + var queued = Interlocked.Increment(ref _queuedRequests); + + if (queued > _config.MaxQueueSize) + { + Interlocked.Decrement(ref _queuedRequests); + return null; // Reject immediately + } + + try + { + var acquired = await _semaphore.WaitAsync(_config.QueueTimeout, cancellationToken); + Interlocked.Decrement(ref _queuedRequests); + + if (!acquired) + { + return null; + } + + return new BulkheadLease(_semaphore); + } + catch + { + Interlocked.Decrement(ref _queuedRequests); + throw; + } + } + + private sealed class BulkheadLease : IDisposable + { + private readonly SemaphoreSlim _semaphore; + private bool _disposed; + + public BulkheadLease(SemaphoreSlim semaphore) + { + _semaphore = semaphore; + } + + public void Dispose() + { + if (!_disposed) + { + _semaphore.Release(); + _disposed = true; + } + } + } +} + +public class BulkheadConfig +{ + public int MaxConcurrency { get; set; } = 100; + public int MaxQueueSize { get; set; } = 50; + public TimeSpan QueueTimeout { get; set; } = TimeSpan.FromSeconds(10); +} +``` + +--- + +## Resilience Middleware + +```csharp +namespace StellaOps.Router.Gateway; + +/// +/// Middleware that applies resilience policies to requests. +/// +public sealed class ResilienceMiddleware +{ + private readonly RequestDelegate _next; + private readonly IResiliencePolicy _policy; + + public ResilienceMiddleware(RequestDelegate next, IResiliencePolicy policy) + { + _next = next; + _policy = policy; + } + + public async Task InvokeAsync(HttpContext context) + { + // Get target service from route data + var serviceName = context.GetRouteValue("service")?.ToString(); + + if (string.IsNullOrEmpty(serviceName)) + { + await _next(context); + return; + } + + try + { + await _next(context); + } + catch (Exception ex) when (IsTransientException(ex)) + { + // Convert to 503 with retry information + context.Response.StatusCode = 503; + context.Response.Headers["Retry-After"] = "30"; + await context.Response.WriteAsJsonAsync(new + { + error = "Service temporarily unavailable", + retryAfter = 30 + }); + } + } + + private bool IsTransientException(Exception ex) + { + return ex is TimeoutException or + HttpRequestException or + TaskCanceledException; + } +} +``` + +--- + +## Service Registration + +```csharp +namespace StellaOps.Router.Resilience; + +public static class ResilienceExtensions +{ + public static IServiceCollection AddStellaResilience( + this IServiceCollection services, + IConfiguration configuration) + { + services.Configure(configuration.GetSection("Resilience")); + services.Configure(configuration.GetSection("Resilience:CircuitBreaker")); + services.Configure(configuration.GetSection("Resilience:Retry")); + services.Configure(configuration.GetSection("Resilience:Bulkhead")); + + services.AddSingleton(); + services.AddSingleton(); + services.AddSingleton(); + + return services; + } +} +``` + +--- + +## YAML Configuration + +```yaml +Resilience: + CircuitBreaker: + FailureThreshold: 5 + SamplingDuration: "00:00:30" + BreakDuration: "00:00:30" + MinimumThroughput: 10 + FailureRatioThreshold: 0.5 + FailureStatusCodes: + - 500 + - 502 + - 503 + - 504 + + Retry: + MaxRetries: 3 + InitialDelay: "00:00:00.100" + MaxDelay: "00:00:10" + BackoffMultiplier: 2.0 + UseJitter: true + MaxJitterPercent: 0.25 + RetryableStatusCodes: + - 408 + - 429 + - 502 + - 503 + - 504 + + Bulkhead: + MaxConcurrency: 100 + MaxQueueSize: 50 + QueueTimeout: "00:00:10" +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Resilience/CircuitBreaker.cs` +2. `StellaOps.Router.Resilience/CircuitBreakerConfig.cs` +3. `StellaOps.Router.Resilience/ICircuitBreakerRegistry.cs` +4. `StellaOps.Router.Resilience/CircuitBreakerRegistry.cs` +5. `StellaOps.Router.Resilience/RetryPolicy.cs` +6. `StellaOps.Router.Resilience/RetryPolicyConfig.cs` +7. `StellaOps.Router.Resilience/IResiliencePolicy.cs` +8. `StellaOps.Router.Resilience/ResiliencePolicy.cs` +9. `StellaOps.Router.Resilience/Bulkhead.cs` +10. `StellaOps.Router.Gateway/ResilienceMiddleware.cs` +11. Circuit breaker state transition tests +12. Retry policy tests +13. Bulkhead tests + +--- + +## Next Step + +Proceed to [Step 25: Configuration Hot-Reload](25-Step.md) to implement dynamic configuration updates. diff --git a/docs/router/25-Step.md b/docs/router/25-Step.md new file mode 100644 index 000000000..d509c8c08 --- /dev/null +++ b/docs/router/25-Step.md @@ -0,0 +1,754 @@ +# Step 25: Configuration Hot-Reload + +**Phase 7: Testing & Documentation** +**Estimated Complexity:** Medium +**Dependencies:** All previous configuration steps + +--- + +## Overview + +Configuration hot-reload enables dynamic updates to router and microservice configuration without restarts. This includes route definitions, rate limits, circuit breaker settings, and JWKS rotation. + +--- + +## Goals + +1. Support YAML configuration hot-reload +2. Implement file watcher for configuration changes +3. Provide atomic configuration updates +4. Support validation before applying changes +5. Enable rollback on invalid configuration + +--- + +## Configuration Watcher + +```csharp +namespace StellaOps.Router.Configuration; + +/// +/// Watches configuration files for changes and triggers reloads. +/// +public sealed class ConfigurationWatcher : IHostedService, IDisposable +{ + private readonly IConfiguration _configuration; + private readonly IOptionsMonitor _routerConfig; + private readonly ILogger _logger; + private readonly List _watchers = new(); + private readonly Subject _changes = new(); + private readonly TimeSpan _debounceInterval = TimeSpan.FromMilliseconds(500); + private readonly ConcurrentDictionary _lastChange = new(); + + public IObservable Changes => _changes; + + public ConfigurationWatcher( + IConfiguration configuration, + IOptionsMonitor routerConfig, + ILogger logger) + { + _configuration = configuration; + _routerConfig = routerConfig; + _logger = logger; + } + + public Task StartAsync(CancellationToken cancellationToken) + { + // Watch all YAML configuration files + var configPaths = GetConfigurationFilePaths(); + + foreach (var path in configPaths) + { + if (!File.Exists(path)) + continue; + + var directory = Path.GetDirectoryName(path)!; + var fileName = Path.GetFileName(path); + + var watcher = new FileSystemWatcher(directory) + { + Filter = fileName, + NotifyFilter = NotifyFilters.LastWrite | NotifyFilters.Size, + EnableRaisingEvents = true + }; + + watcher.Changed += OnConfigurationFileChanged; + _watchers.Add(watcher); + + _logger.LogInformation("Watching configuration file: {Path}", path); + } + + // Also subscribe to IOptionsMonitor for programmatic changes + _routerConfig.OnChange(config => + { + _changes.OnNext(new ConfigurationChange + { + Section = "Router", + ChangeType = ChangeType.Modified, + Timestamp = DateTimeOffset.UtcNow + }); + }); + + return Task.CompletedTask; + } + + private void OnConfigurationFileChanged(object sender, FileSystemEventArgs e) + { + // Debounce rapid changes + var now = DateTimeOffset.UtcNow; + + if (_lastChange.TryGetValue(e.FullPath, out var lastChange) && + now - lastChange < _debounceInterval) + { + return; + } + + _lastChange[e.FullPath] = now; + + _logger.LogInformation("Configuration file changed: {Path}", e.FullPath); + + // Delay to allow file writes to complete + Task.Delay(100).ContinueWith(_ => + { + try + { + // Validate configuration before notifying + if (ValidateConfiguration(e.FullPath)) + { + _changes.OnNext(new ConfigurationChange + { + Section = DetermineSectionFromPath(e.FullPath), + ChangeType = ChangeType.Modified, + FilePath = e.FullPath, + Timestamp = now + }); + } + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to process configuration change for {Path}", e.FullPath); + } + }); + } + + private bool ValidateConfiguration(string path) + { + try + { + var yaml = File.ReadAllText(path); + var deserializer = new DeserializerBuilder() + .WithNamingConvention(CamelCaseNamingConvention.Instance) + .Build(); + + // Try to deserialize to validate YAML syntax + var doc = deserializer.Deserialize>(yaml); + return doc != null; + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Invalid configuration file: {Path}", path); + return false; + } + } + + private string DetermineSectionFromPath(string path) + { + var fileName = Path.GetFileNameWithoutExtension(path).ToLower(); + + return fileName switch + { + "router" => "Router", + "routes" => "Routes", + "ratelimits" => "RateLimits", + "endpoints" => "Endpoints", + _ => "Unknown" + }; + } + + private IEnumerable GetConfigurationFilePaths() + { + // Get paths from configuration providers + var paths = new List(); + + if (_configuration is IConfigurationRoot root) + { + foreach (var provider in root.Providers) + { + if (provider is FileConfigurationProvider fileProvider) + { + var source = fileProvider.Source; + if (source.FileProvider?.GetFileInfo(source.Path ?? "") is { Exists: true } fileInfo) + { + paths.Add(fileInfo.PhysicalPath ?? ""); + } + } + } + } + + return paths.Where(p => !string.IsNullOrEmpty(p)); + } + + public Task StopAsync(CancellationToken cancellationToken) + { + foreach (var watcher in _watchers) + { + watcher.EnableRaisingEvents = false; + } + return Task.CompletedTask; + } + + public void Dispose() + { + foreach (var watcher in _watchers) + { + watcher.Dispose(); + } + _changes.Dispose(); + } +} + +public sealed class ConfigurationChange +{ + public string Section { get; init; } = ""; + public ChangeType ChangeType { get; init; } + public string? FilePath { get; init; } + public DateTimeOffset Timestamp { get; init; } +} + +public enum ChangeType +{ + Added, + Modified, + Removed +} +``` + +--- + +## Route Configuration Reloader + +```csharp +namespace StellaOps.Router.Configuration; + +/// +/// Handles hot-reload of route configurations. +/// +public sealed class RouteConfigurationReloader : IHostedService +{ + private readonly ConfigurationWatcher _watcher; + private readonly IRouteRegistry _routeRegistry; + private readonly ILogger _logger; + private IDisposable? _subscription; + + public RouteConfigurationReloader( + ConfigurationWatcher watcher, + IRouteRegistry routeRegistry, + ILogger logger) + { + _watcher = watcher; + _routeRegistry = routeRegistry; + _logger = logger; + } + + public Task StartAsync(CancellationToken cancellationToken) + { + _subscription = _watcher.Changes + .Where(c => c.Section == "Routes") + .Subscribe(OnRoutesChanged); + + return Task.CompletedTask; + } + + private void OnRoutesChanged(ConfigurationChange change) + { + _logger.LogInformation("Reloading routes from {Path}", change.FilePath); + + try + { + _routeRegistry.Reload(); + _logger.LogInformation("Routes reloaded successfully"); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to reload routes, keeping previous configuration"); + } + } + + public Task StopAsync(CancellationToken cancellationToken) + { + _subscription?.Dispose(); + return Task.CompletedTask; + } +} +``` + +--- + +## Rate Limit Configuration Reloader + +```csharp +namespace StellaOps.Router.Configuration; + +/// +/// Handles hot-reload of rate limit configurations. +/// +public sealed class RateLimitConfigurationReloader : IHostedService +{ + private readonly ConfigurationWatcher _watcher; + private readonly IRateLimiter _rateLimiter; + private readonly IOptionsMonitor _config; + private readonly ILogger _logger; + private IDisposable? _subscription; + + public RateLimitConfigurationReloader( + ConfigurationWatcher watcher, + IRateLimiter rateLimiter, + IOptionsMonitor config, + ILogger logger) + { + _watcher = watcher; + _rateLimiter = rateLimiter; + _config = config; + _logger = logger; + } + + public Task StartAsync(CancellationToken cancellationToken) + { + _subscription = _watcher.Changes + .Where(c => c.Section == "RateLimits") + .Subscribe(OnRateLimitsChanged); + + _config.OnChange(OnRateLimitConfigChanged); + + return Task.CompletedTask; + } + + private void OnRateLimitsChanged(ConfigurationChange change) + { + _logger.LogInformation("Rate limit configuration changed, applying updates"); + ApplyRateLimitChanges(); + } + + private void OnRateLimitConfigChanged(RateLimitConfig config) + { + _logger.LogInformation("Rate limit options changed, applying updates"); + ApplyRateLimitChanges(); + } + + private void ApplyRateLimitChanges() + { + try + { + // Rate limiter will pick up new config from IOptionsMonitor + // Clear any cached tier information + if (_rateLimiter is ICacheableRateLimiter cacheable) + { + cacheable.ClearCache(); + } + + _logger.LogInformation("Rate limit configuration applied successfully"); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to apply rate limit changes"); + } + } + + public Task StopAsync(CancellationToken cancellationToken) + { + _subscription?.Dispose(); + return Task.CompletedTask; + } +} + +public interface ICacheableRateLimiter +{ + void ClearCache(); +} +``` + +--- + +## JWKS Hot-Reload + +```csharp +namespace StellaOps.Router.Configuration; + +/// +/// Handles JWKS rotation and cache refresh. +/// +public sealed class JwksReloader : IHostedService +{ + private readonly IJwksCache _jwksCache; + private readonly JwtAuthenticationConfig _config; + private readonly ILogger _logger; + private Timer? _refreshTimer; + + public JwksReloader( + IJwksCache jwksCache, + IOptions config, + ILogger logger) + { + _jwksCache = jwksCache; + _config = config.Value; + _logger = logger; + } + + public Task StartAsync(CancellationToken cancellationToken) + { + // Periodic refresh of JWKS + var interval = _config.JwksRefreshInterval; + + _refreshTimer = new Timer( + RefreshJwks, + null, + interval, + interval); + + _logger.LogInformation( + "JWKS refresh scheduled every {Interval}", + interval); + + return Task.CompletedTask; + } + + private async void RefreshJwks(object? state) + { + try + { + _logger.LogDebug("Refreshing JWKS cache"); + + await _jwksCache.RefreshAsync(CancellationToken.None); + + _logger.LogDebug("JWKS cache refreshed successfully"); + } + catch (Exception ex) + { + _logger.LogWarning(ex, "Failed to refresh JWKS cache, will retry"); + } + } + + public Task StopAsync(CancellationToken cancellationToken) + { + _refreshTimer?.Dispose(); + return Task.CompletedTask; + } +} +``` + +--- + +## Configuration Validation + +```csharp +namespace StellaOps.Router.Configuration; + +/// +/// Validates configuration before applying changes. +/// +public interface IConfigurationValidator +{ + ValidationResult Validate(T config) where T : class; +} + +public sealed class ConfigurationValidator : IConfigurationValidator +{ + private readonly ILogger _logger; + + public ConfigurationValidator(ILogger logger) + { + _logger = logger; + } + + public ValidationResult Validate(T config) where T : class + { + var errors = new List(); + + // Use data annotations validation + var context = new ValidationContext(config); + var results = new List(); + + if (!Validator.TryValidateObject(config, context, results, validateAllProperties: true)) + { + errors.AddRange(results.Select(r => r.ErrorMessage ?? "Unknown validation error")); + } + + // Type-specific validation + errors.AddRange(config switch + { + RouterConfig router => ValidateRouterConfig(router), + RateLimitConfig rateLimit => ValidateRateLimitConfig(rateLimit), + _ => Enumerable.Empty() + }); + + if (errors.Any()) + { + _logger.LogWarning( + "Configuration validation failed: {Errors}", + string.Join(", ", errors)); + } + + return new ValidationResult + { + IsValid = !errors.Any(), + Errors = errors + }; + } + + private IEnumerable ValidateRouterConfig(RouterConfig config) + { + if (config.MaxPayloadSize <= 0) + yield return "MaxPayloadSize must be positive"; + + if (config.RequestTimeout <= TimeSpan.Zero) + yield return "RequestTimeout must be positive"; + } + + private IEnumerable ValidateRateLimitConfig(RateLimitConfig config) + { + foreach (var (tier, limits) in config.Tiers) + { + if (limits.RequestsPerMinute <= 0) + yield return $"Tier {tier}: RequestsPerMinute must be positive"; + } + } +} + +public sealed class ValidationResult +{ + public bool IsValid { get; init; } + public IReadOnlyList Errors { get; init; } = Array.Empty(); +} +``` + +--- + +## Atomic Configuration Update + +```csharp +namespace StellaOps.Router.Configuration; + +/// +/// Provides atomic configuration updates with rollback support. +/// +public sealed class AtomicConfigurationUpdater +{ + private readonly IConfigurationValidator _validator; + private readonly ILogger _logger; + private readonly ReaderWriterLockSlim _lock = new(); + + public AtomicConfigurationUpdater( + IConfigurationValidator validator, + ILogger logger) + { + _validator = validator; + _logger = logger; + } + + /// + /// Atomically updates configuration with validation and rollback. + /// + public async Task UpdateAsync( + T currentConfig, + T newConfig, + Func applyAction, + Func? rollbackAction = null) + where T : class + { + // Validate new configuration + var validation = _validator.Validate(newConfig); + if (!validation.IsValid) + { + _logger.LogWarning( + "Configuration update rejected: {Errors}", + string.Join(", ", validation.Errors)); + return false; + } + + _lock.EnterWriteLock(); + try + { + // Store current config for rollback + var backup = currentConfig; + + try + { + await applyAction(newConfig); + _logger.LogInformation("Configuration updated successfully"); + return true; + } + catch (Exception ex) + { + _logger.LogError(ex, "Configuration update failed, rolling back"); + + if (rollbackAction != null) + { + try + { + await rollbackAction(backup); + _logger.LogInformation("Configuration rolled back successfully"); + } + catch (Exception rollbackEx) + { + _logger.LogError(rollbackEx, "Rollback failed!"); + } + } + + return false; + } + } + finally + { + _lock.ExitWriteLock(); + } + } +} +``` + +--- + +## Configuration API Endpoints + +```csharp +namespace StellaOps.Router.Gateway; + +/// +/// API endpoints for configuration management. +/// +public static class ConfigurationEndpoints +{ + public static IEndpointRouteBuilder MapConfigurationEndpoints( + this IEndpointRouteBuilder endpoints, + string basePath = "/api/config") + { + var group = endpoints.MapGroup(basePath) + .RequireAuthorization("admin"); + + group.MapGet("/", GetConfiguration); + group.MapGet("/{section}", GetConfigurationSection); + group.MapPost("/reload", ReloadConfiguration); + group.MapPost("/validate", ValidateConfiguration); + + return endpoints; + } + + private static async Task GetConfiguration( + IConfiguration configuration) + { + var sections = new Dictionary(); + + foreach (var child in configuration.GetChildren()) + { + sections[child.Key] = GetSectionValue(child); + } + + return Results.Ok(sections); + } + + private static object GetSectionValue(IConfigurationSection section) + { + var children = section.GetChildren().ToList(); + + if (!children.Any()) + { + return section.Value ?? ""; + } + + if (children.All(c => int.TryParse(c.Key, out _))) + { + // Array + return children.Select(c => GetSectionValue(c)).ToList(); + } + + // Object + return children.ToDictionary(c => c.Key, c => GetSectionValue(c)); + } + + private static IResult GetConfigurationSection( + string section, + IConfiguration configuration) + { + var configSection = configuration.GetSection(section); + + if (!configSection.Exists()) + { + return Results.NotFound(new { error = $"Section '{section}' not found" }); + } + + return Results.Ok(GetSectionValue(configSection)); + } + + private static async Task ReloadConfiguration( + ConfigurationWatcher watcher, + ILogger logger) + { + logger.LogInformation("Manual configuration reload triggered"); + + // Trigger reload notification + // In practice, would re-read configuration files + + return Results.Ok(new { message = "Configuration reload triggered" }); + } + + private static async Task ValidateConfiguration( + HttpRequest request, + IConfigurationValidator validator) + { + var body = await request.ReadFromJsonAsync>(); + + if (body == null) + { + return Results.BadRequest(new { error = "Invalid request body" }); + } + + // Basic syntax validation + return Results.Ok(new { valid = true }); + } +} +``` + +--- + +## YAML Configuration + +```yaml +Configuration: + # Enable hot-reload + HotReload: + Enabled: true + DebounceInterval: "00:00:00.500" + ValidateBeforeApply: true + + # Files to watch + WatchPaths: + - "/etc/stellaops/router.yaml" + - "/etc/stellaops/routes.yaml" + - "/etc/stellaops/ratelimits.yaml" + + # JWKS refresh settings + Jwks: + RefreshInterval: "00:05:00" + RefreshOnError: true + MaxRetries: 3 +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Configuration/ConfigurationWatcher.cs` +2. `StellaOps.Router.Configuration/RouteConfigurationReloader.cs` +3. `StellaOps.Router.Configuration/RateLimitConfigurationReloader.cs` +4. `StellaOps.Router.Configuration/JwksReloader.cs` +5. `StellaOps.Router.Configuration/IConfigurationValidator.cs` +6. `StellaOps.Router.Configuration/ConfigurationValidator.cs` +7. `StellaOps.Router.Configuration/AtomicConfigurationUpdater.cs` +8. `StellaOps.Router.Gateway/ConfigurationEndpoints.cs` +9. Configuration reload tests +10. Validation tests + +--- + +## Next Step + +Proceed to [Step 26: End-to-End Testing](26-Step.md) to implement comprehensive integration tests. diff --git a/docs/router/26-Step.md b/docs/router/26-Step.md new file mode 100644 index 000000000..4f266b7dc --- /dev/null +++ b/docs/router/26-Step.md @@ -0,0 +1,683 @@ +# Step 26: End-to-End Testing + +**Phase 7: Testing & Documentation** +**Estimated Complexity:** High +**Dependencies:** All implementation steps + +--- + +## Overview + +End-to-end testing validates the complete request flow from HTTP client through the gateway, transport layer, microservice, and back. Tests cover all handlers, authentication, rate limiting, streaming, and failure scenarios. + +--- + +## Goals + +1. Validate complete request/response flow +2. Test all route handlers +3. Verify authentication and authorization +4. Test rate limiting behavior +5. Validate streaming and large payloads +6. Test failure scenarios and resilience + +--- + +## Test Infrastructure + +```csharp +namespace StellaOps.Router.Tests; + +/// +/// End-to-end test fixture providing gateway and microservice hosts. +/// +public sealed class EndToEndTestFixture : IAsyncLifetime +{ + private IHost? _gatewayHost; + private IHost? _microserviceHost; + private InMemoryTransportHub? _transportHub; + + public HttpClient GatewayClient { get; private set; } = null!; + public string GatewayBaseUrl { get; private set; } = null!; + + public async Task InitializeAsync() + { + // Shared transport hub for InMemory testing + _transportHub = new InMemoryTransportHub( + NullLoggerFactory.Instance.CreateLogger()); + + // Start gateway + _gatewayHost = await CreateGatewayHostAsync(); + await _gatewayHost.StartAsync(); + + GatewayBaseUrl = "http://localhost:5000"; + GatewayClient = new HttpClient { BaseAddress = new Uri(GatewayBaseUrl) }; + + // Start test microservice + _microserviceHost = await CreateMicroserviceHostAsync(); + await _microserviceHost.StartAsync(); + + // Wait for connection + await Task.Delay(500); + } + + private async Task CreateGatewayHostAsync() + { + return Host.CreateDefaultBuilder() + .ConfigureWebHostDefaults(web => + { + web.UseUrls("http://localhost:5000"); + web.ConfigureServices((context, services) => + { + services.AddSingleton(_transportHub!); + services.AddStellaGateway(context.Configuration); + services.AddInMemoryTransport(); + + // Use in-memory rate limiter + services.AddSingleton(); + + // Mock Authority + services.AddSingleton(); + }); + + web.Configure(app => + { + app.UseRouting(); + app.UseStellaGateway(); + app.UseEndpoints(endpoints => + { + endpoints.MapStellaRoutes(); + }); + }); + }) + .Build(); + } + + private async Task CreateMicroserviceHostAsync() + { + var host = StellaMicroserviceBuilder + .Create("test-service") + .ConfigureServices(services => + { + services.AddSingleton(_transportHub!); + services.AddScoped(); + }) + .ConfigureTransport(t => t.Default = "InMemory") + .ConfigureEndpoints(e => + { + e.AutoDiscover = true; + e.BasePath = "/api"; + }) + .Build(); + + return (IHost)host; + } + + public async Task DisposeAsync() + { + GatewayClient.Dispose(); + + if (_microserviceHost != null) + { + await _microserviceHost.StopAsync(); + _microserviceHost.Dispose(); + } + + if (_gatewayHost != null) + { + await _gatewayHost.StopAsync(); + _gatewayHost.Dispose(); + } + + _transportHub?.Dispose(); + } +} +``` + +--- + +## Test Endpoint Handler + +```csharp +namespace StellaOps.Router.Tests; + +[StellaEndpoint(BasePath = "/test")] +public class TestEndpointHandler : EndpointHandler +{ + [StellaGet("echo")] + public ResponsePayload Echo() + { + return Ok(new + { + method = Context.Method, + path = Context.Path, + query = Context.Query.ToDictionary(q => q.Key, q => q.Value.ToString()), + headers = Context.Headers.ToDictionary(h => h.Key, h => h.Value.ToString()), + claims = Context.Claims + }); + } + + [StellaPost("echo")] + public async Task EchoBody() + { + var body = Context.ReadBodyAsString(); + return Ok(new { body }); + } + + [StellaGet("items/{id}")] + public ResponsePayload GetItem([FromPath] string id) + { + return Ok(new { id }); + } + + [StellaGet("slow")] + public async Task SlowEndpoint(CancellationToken cancellationToken) + { + await Task.Delay(5000, cancellationToken); + return Ok(new { completed = true }); + } + + [StellaGet("error")] + public ResponsePayload ThrowError() + { + throw new InvalidOperationException("Test error"); + } + + [StellaGet("status/{code}")] + public ResponsePayload ReturnStatus([FromPath] int code) + { + return Response().WithStatus(code).WithJson(new { statusCode = code }).Build(); + } + + [StellaGet("protected")] + [StellaAuth(RequiredClaims = new[] { "admin" })] + public ResponsePayload ProtectedEndpoint() + { + return Ok(new { message = "Access granted" }); + } + + [StellaPost("upload")] + public ResponsePayload HandleUpload() + { + var size = Context.ContentLength ?? Context.RawBody?.Length ?? 0; + return Ok(new { bytesReceived = size }); + } + + [StellaGet("stream")] + public ResponsePayload StreamResponse() + { + var data = new byte[1024 * 1024]; // 1MB + Random.Shared.NextBytes(data); + return Response() + .WithBytes(data, "application/octet-stream") + .Build(); + } +} +``` + +--- + +## Basic Request/Response Tests + +```csharp +namespace StellaOps.Router.Tests; + +public class BasicRequestResponseTests : IClassFixture +{ + private readonly EndToEndTestFixture _fixture; + + public BasicRequestResponseTests(EndToEndTestFixture fixture) + { + _fixture = fixture; + } + + [Fact] + public async Task Get_Echo_ReturnsRequestDetails() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/test/echo"); + var content = await response.Content.ReadFromJsonAsync(); + + // Assert + Assert.True(response.IsSuccessStatusCode); + Assert.Equal("GET", content?.Method); + Assert.Equal("/api/test/echo", content?.Path); + } + + [Fact] + public async Task Post_Echo_ReturnsBody() + { + // Arrange + var client = _fixture.GatewayClient; + var body = new StringContent("{\"test\": true}", Encoding.UTF8, "application/json"); + + // Act + var response = await client.PostAsync("/api/test/echo", body); + var content = await response.Content.ReadFromJsonAsync(); + + // Assert + Assert.True(response.IsSuccessStatusCode); + Assert.Contains("test", content?.Body); + } + + [Fact] + public async Task Get_WithPathParameter_ExtractsParameter() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/test/items/12345"); + var content = await response.Content.ReadFromJsonAsync(); + + // Assert + Assert.True(response.IsSuccessStatusCode); + Assert.Equal("12345", content?.Id); + } + + [Fact] + public async Task Get_NonExistentPath_Returns404() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/nonexistent"); + + // Assert + Assert.Equal(HttpStatusCode.NotFound, response.StatusCode); + } + + private record EchoResponse( + string Method, + string Path, + Dictionary Query, + Dictionary Claims); + + private record EchoBodyResponse(string Body); + private record ItemResponse(string Id); +} +``` + +--- + +## Authentication Tests + +```csharp +namespace StellaOps.Router.Tests; + +public class AuthenticationTests : IClassFixture +{ + private readonly EndToEndTestFixture _fixture; + + public AuthenticationTests(EndToEndTestFixture fixture) + { + _fixture = fixture; + } + + [Fact] + public async Task Protected_WithoutToken_Returns401() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/test/protected"); + + // Assert + Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode); + } + + [Fact] + public async Task Protected_WithValidToken_Returns200() + { + // Arrange + var client = _fixture.GatewayClient; + var token = CreateTestToken(new Dictionary { ["admin"] = "true" }); + client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token); + + // Act + var response = await client.GetAsync("/api/test/protected"); + + // Assert + Assert.True(response.IsSuccessStatusCode); + } + + [Fact] + public async Task Protected_WithInvalidToken_Returns401() + { + // Arrange + var client = _fixture.GatewayClient; + client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", "invalid-token"); + + // Act + var response = await client.GetAsync("/api/test/protected"); + + // Assert + Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode); + } + + [Fact] + public async Task Protected_WithMissingClaim_Returns403() + { + // Arrange + var client = _fixture.GatewayClient; + var token = CreateTestToken(new Dictionary { ["user"] = "true" }); // No admin claim + client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token); + + // Act + var response = await client.GetAsync("/api/test/protected"); + + // Assert + Assert.Equal(HttpStatusCode.Forbidden, response.StatusCode); + } + + private string CreateTestToken(Dictionary claims) + { + // Create a test JWT (would use test key in real implementation) + var handler = new JwtSecurityTokenHandler(); + var key = new SymmetricSecurityKey(Encoding.UTF8.GetBytes("test-key-for-testing-only-12345")); + var creds = new SigningCredentials(key, SecurityAlgorithms.HmacSha256); + + var claimsList = claims.Select(c => new Claim(c.Key, c.Value)).ToList(); + claimsList.Add(new Claim("sub", "test-user")); + + var token = new JwtSecurityToken( + issuer: "test", + audience: "test", + claims: claimsList, + expires: DateTime.UtcNow.AddHours(1), + signingCredentials: creds); + + return handler.WriteToken(token); + } +} +``` + +--- + +## Rate Limiting Tests + +```csharp +namespace StellaOps.Router.Tests; + +public class RateLimitingTests : IClassFixture +{ + private readonly EndToEndTestFixture _fixture; + + public RateLimitingTests(EndToEndTestFixture fixture) + { + _fixture = fixture; + } + + [Fact] + public async Task RateLimit_ExceedingLimit_Returns429() + { + // Arrange + var client = _fixture.GatewayClient; + var tasks = new List>(); + + // Act - Send 100 requests quickly + for (int i = 0; i < 100; i++) + { + tasks.Add(client.GetAsync("/api/test/echo")); + } + + var responses = await Task.WhenAll(tasks); + + // Assert - Some should be rate limited + var rateLimited = responses.Count(r => r.StatusCode == HttpStatusCode.TooManyRequests); + Assert.True(rateLimited > 0, "Expected some requests to be rate limited"); + } + + [Fact] + public async Task RateLimit_Headers_ArePresent() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/test/echo"); + + // Assert + Assert.True(response.Headers.Contains("X-RateLimit-Limit")); + Assert.True(response.Headers.Contains("X-RateLimit-Remaining")); + } + + [Fact] + public async Task RateLimit_PerUser_IsolatesUsers() + { + // Arrange + var client1 = new HttpClient { BaseAddress = new Uri(_fixture.GatewayBaseUrl) }; + var client2 = new HttpClient { BaseAddress = new Uri(_fixture.GatewayBaseUrl) }; + + client1.DefaultRequestHeaders.Add("X-API-Key", "user1-key"); + client2.DefaultRequestHeaders.Add("X-API-Key", "user2-key"); + + // Act - Exhaust rate limit for user1 + for (int i = 0; i < 50; i++) + { + await client1.GetAsync("/api/test/echo"); + } + + // User2 should still have quota + var response = await client2.GetAsync("/api/test/echo"); + + // Assert + Assert.True(response.IsSuccessStatusCode); + } +} +``` + +--- + +## Timeout and Cancellation Tests + +```csharp +namespace StellaOps.Router.Tests; + +public class TimeoutAndCancellationTests : IClassFixture +{ + private readonly EndToEndTestFixture _fixture; + + public TimeoutAndCancellationTests(EndToEndTestFixture fixture) + { + _fixture = fixture; + } + + [Fact] + public async Task Slow_Request_TimesOut() + { + // Arrange + var client = new HttpClient + { + BaseAddress = new Uri(_fixture.GatewayBaseUrl), + Timeout = TimeSpan.FromSeconds(1) + }; + + // Act & Assert + await Assert.ThrowsAsync( + () => client.GetAsync("/api/test/slow")); + } + + [Fact] + public async Task Cancelled_Request_PropagatesCancellation() + { + // Arrange + var client = _fixture.GatewayClient; + using var cts = new CancellationTokenSource(); + + // Act + var task = client.GetAsync("/api/test/slow", cts.Token); + await Task.Delay(100); + cts.Cancel(); + + // Assert + await Assert.ThrowsAsync(() => task); + } +} +``` + +--- + +## Streaming and Large Payload Tests + +```csharp +namespace StellaOps.Router.Tests; + +public class StreamingTests : IClassFixture +{ + private readonly EndToEndTestFixture _fixture; + + public StreamingTests(EndToEndTestFixture fixture) + { + _fixture = fixture; + } + + [Fact] + public async Task LargeUpload_Succeeds() + { + // Arrange + var client = _fixture.GatewayClient; + var data = new byte[1024 * 1024]; // 1MB + Random.Shared.NextBytes(data); + var content = new ByteArrayContent(data); + content.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream"); + + // Act + var response = await client.PostAsync("/api/test/upload", content); + var result = await response.Content.ReadFromJsonAsync(); + + // Assert + Assert.True(response.IsSuccessStatusCode); + Assert.Equal(data.Length, result?.BytesReceived); + } + + [Fact] + public async Task LargeDownload_Succeeds() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/test/stream"); + var data = await response.Content.ReadAsByteArrayAsync(); + + // Assert + Assert.True(response.IsSuccessStatusCode); + Assert.Equal(1024 * 1024, data.Length); + } + + private record UploadResponse(long BytesReceived); +} +``` + +--- + +## Error Handling Tests + +```csharp +namespace StellaOps.Router.Tests; + +public class ErrorHandlingTests : IClassFixture +{ + private readonly EndToEndTestFixture _fixture; + + public ErrorHandlingTests(EndToEndTestFixture fixture) + { + _fixture = fixture; + } + + [Fact] + public async Task Handler_Exception_Returns500() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/test/error"); + + // Assert + Assert.Equal(HttpStatusCode.InternalServerError, response.StatusCode); + } + + [Fact] + public async Task Custom_StatusCode_IsPreserved() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/test/status/418"); + + // Assert + Assert.Equal((HttpStatusCode)418, response.StatusCode); + } + + [Fact] + public async Task Error_Response_HasCorrectFormat() + { + // Arrange + var client = _fixture.GatewayClient; + + // Act + var response = await client.GetAsync("/api/nonexistent"); + var content = await response.Content.ReadFromJsonAsync(); + + // Assert + Assert.NotNull(content?.Error); + } + + private record ErrorResponse(string Error); +} +``` + +--- + +## YAML Configuration + +```yaml +# Test configuration +Router: + Transports: + - Type: InMemory + Enabled: true + +RateLimiting: + Enabled: true + DefaultTier: free + Tiers: + free: + RequestsPerMinute: 60 + authenticated: + RequestsPerMinute: 600 + +Authentication: + Enabled: true + AllowAnonymous: false + TestMode: true +``` + +--- + +## Deliverables + +1. `StellaOps.Router.Tests/EndToEndTestFixture.cs` +2. `StellaOps.Router.Tests/TestEndpointHandler.cs` +3. `StellaOps.Router.Tests/BasicRequestResponseTests.cs` +4. `StellaOps.Router.Tests/AuthenticationTests.cs` +5. `StellaOps.Router.Tests/RateLimitingTests.cs` +6. `StellaOps.Router.Tests/TimeoutAndCancellationTests.cs` +7. `StellaOps.Router.Tests/StreamingTests.cs` +8. `StellaOps.Router.Tests/ErrorHandlingTests.cs` +9. Mock implementations for Authority, Rate Limiter +10. CI integration configuration + +--- + +## Next Step + +Proceed to [Step 27: Reference Example & Migration Skeleton](27-Step.md) to create example implementations. diff --git a/docs/router/27-Step.md b/docs/router/27-Step.md new file mode 100644 index 000000000..8806556a1 --- /dev/null +++ b/docs/router/27-Step.md @@ -0,0 +1,1524 @@ +# Step 27: Reference Example & Migration Skeleton + +## Overview + +This step provides a complete reference implementation demonstrating all router features, along with migration tooling and patterns for gradually transitioning existing monolithic services to the Stella Router microservice architecture. + +## Goals + +1. Create a fully-functional reference microservice with all features +2. Provide migration skeleton for existing ASP.NET Core services +3. Document step-by-step migration patterns +4. Provide code scaffolding tools for new services +5. Create compatibility shims for gradual adoption + +## Reference Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Reference Example Layout │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ src/ │ +│ └── Examples/ │ +│ └── StellaOps.Router.Examples/ │ +│ ├── ReferenceService/ # Complete reference impl │ +│ │ ├── Handlers/ # Endpoint handlers │ +│ │ ├── Services/ # Business logic │ +│ │ ├── Models/ # Domain models │ +│ │ └── Program.cs # Host configuration │ +│ │ │ +│ ├── MigrationTemplates/ # Migration scaffolds │ +│ │ ├── BasicMigration/ # Minimal migration │ +│ │ ├── DualModeMigration/ # Parallel run mode │ +│ │ └── GradualMigration/ # Incremental endpoint migration │ +│ │ │ +│ └── Scaffolding/ # Code generation tools │ +│ ├── Templates/ # T4/Scriban templates │ +│ └── Generator/ # CLI scaffolding tool │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Reference Service Implementation + +### Complete Reference Microservice + +```csharp +// ReferenceService/Program.cs +using StellaOps.Router.Microservice; +using StellaOps.Router.Examples.ReferenceService; + +var builder = StellaMicroservice.CreateBuilder(args); + +// Configure all features +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); + +// Add handlers +builder.AddHandler(); +builder.AddHandler(); +builder.AddHandler(); +builder.AddHandler(); +builder.AddHandler(); + +// Configure transports +builder.UseTransport("tcp", options => +{ + options.Host = "0.0.0.0"; + options.Port = 9100; +}); + +// Enable dual exposure for direct HTTP access +builder.EnableDualExposure(http => +{ + http.Port = 8080; + http.DefaultClaims = new Dictionary + { + ["role"] = "internal-service", + ["permissions"] = new[] { "read", "write" } + }; +}); + +// Configure resilience +builder.ConfigureResilience(resilience => +{ + resilience.CircuitBreaker.FailureThreshold = 5; + resilience.CircuitBreaker.RecoveryTimeout = TimeSpan.FromSeconds(30); + resilience.Retry.MaxAttempts = 3; + resilience.Retry.BaseDelay = TimeSpan.FromMilliseconds(100); +}); + +// Configure observability +builder.ConfigureObservability(obs => +{ + obs.EnableMetrics = true; + obs.EnableTracing = true; + obs.ServiceName = "reference-service"; + obs.OtlpEndpoint = "http://otel-collector:4317"; +}); + +var host = builder.Build(); +await host.RunAsync(); +``` + +### User Management Handler + +```csharp +// ReferenceService/Handlers/UserHandler.cs +namespace StellaOps.Router.Examples.ReferenceService.Handlers; + +[StellaEndpoint(ServiceName = "users", Version = "v1")] +public class UserHandler : EndpointHandler +{ + private readonly IUserRepository _repository; + private readonly ILogger _logger; + + public UserHandler(IUserRepository repository, ILogger logger) + { + _repository = repository; + _logger = logger; + } + + [StellaRoute("GET", "/users")] + [StellaAuth(RequiredClaims = ["user:list"])] + [StellaRateLimit(RequestsPerMinute = 100)] + public async Task ListUsers( + StellaRequestContext context, + [FromQuery("page")] int page = 1, + [FromQuery("size")] int pageSize = 20, + [FromQuery("search")] string? search = null) + { + _logger.LogInformation("Listing users: page={Page}, size={Size}, search={Search}", + page, pageSize, search); + + var (users, total) = await _repository.ListAsync(page, pageSize, search); + + return Ok(new UserListResponse + { + Users = users.Select(u => u.ToDto()).ToList(), + Page = page, + PageSize = pageSize, + TotalCount = total, + TotalPages = (int)Math.Ceiling((double)total / pageSize) + }); + } + + [StellaRoute("GET", "/users/{id}")] + [StellaAuth(RequiredClaims = ["user:read"])] + [StellaCache(Duration = 60, VaryByPath = true)] + public async Task GetUser( + StellaRequestContext context, + [FromPath("id")] Guid id) + { + var user = await _repository.GetByIdAsync(id); + + if (user == null) + { + return NotFound(new ErrorResponse("USER_NOT_FOUND", $"User {id} not found")); + } + + // Enrich response with claims-based data + var canViewPrivate = context.HasClaim("user:admin") || + context.GetClaim("user_id") == id; + + return Ok(user.ToDto(includePrivate: canViewPrivate)); + } + + [StellaRoute("POST", "/users")] + [StellaAuth(RequiredClaims = ["user:create"])] + [StellaRateLimit(RequestsPerMinute = 10)] + public async Task CreateUser( + StellaRequestContext context, + [FromBody] CreateUserRequest request) + { + // Validate request + var validation = ValidateCreateRequest(request); + if (!validation.IsValid) + { + return BadRequest(new ValidationErrorResponse(validation.Errors)); + } + + // Check for duplicate email + var existing = await _repository.GetByEmailAsync(request.Email); + if (existing != null) + { + return Conflict(new ErrorResponse("EMAIL_EXISTS", "Email already registered")); + } + + var user = new User + { + Id = Guid.NewGuid(), + Email = request.Email, + Name = request.Name, + Role = request.Role ?? "user", + CreatedAt = DateTime.UtcNow, + CreatedBy = context.GetClaim("user_id") + }; + + await _repository.CreateAsync(user); + + _logger.LogInformation("User created: {UserId} by {CreatedBy}", user.Id, user.CreatedBy); + + return Created($"/users/{user.Id}", user.ToDto()); + } + + [StellaRoute("PUT", "/users/{id}")] + [StellaAuth(RequiredClaims = ["user:update"])] + public async Task UpdateUser( + StellaRequestContext context, + [FromPath("id")] Guid id, + [FromBody] UpdateUserRequest request) + { + var user = await _repository.GetByIdAsync(id); + if (user == null) + { + return NotFound(new ErrorResponse("USER_NOT_FOUND", $"User {id} not found")); + } + + // Check authorization - users can update themselves, admins can update anyone + var currentUserId = context.GetClaim("user_id"); + var isAdmin = context.HasClaim("user:admin"); + + if (currentUserId != id && !isAdmin) + { + return Forbidden(new ErrorResponse("FORBIDDEN", "Cannot update other users")); + } + + // Apply updates + if (request.Name != null) user.Name = request.Name; + if (request.Email != null) user.Email = request.Email; + if (request.Role != null && isAdmin) user.Role = request.Role; + + user.UpdatedAt = DateTime.UtcNow; + user.UpdatedBy = currentUserId; + + await _repository.UpdateAsync(user); + + return Ok(user.ToDto()); + } + + [StellaRoute("DELETE", "/users/{id}")] + [StellaAuth(RequiredClaims = ["user:admin"])] + public async Task DeleteUser( + StellaRequestContext context, + [FromPath("id")] Guid id) + { + var user = await _repository.GetByIdAsync(id); + if (user == null) + { + return NotFound(new ErrorResponse("USER_NOT_FOUND", $"User {id} not found")); + } + + // Soft delete + user.DeletedAt = DateTime.UtcNow; + user.DeletedBy = context.GetClaim("user_id"); + + await _repository.UpdateAsync(user); + + _logger.LogInformation("User deleted: {UserId} by {DeletedBy}", id, user.DeletedBy); + + return NoContent(); + } + + private ValidationResult ValidateCreateRequest(CreateUserRequest request) + { + var errors = new List(); + + if (string.IsNullOrWhiteSpace(request.Email)) + errors.Add(new("email", "Email is required")); + else if (!IsValidEmail(request.Email)) + errors.Add(new("email", "Invalid email format")); + + if (string.IsNullOrWhiteSpace(request.Name)) + errors.Add(new("name", "Name is required")); + else if (request.Name.Length < 2 || request.Name.Length > 100) + errors.Add(new("name", "Name must be 2-100 characters")); + + return new ValidationResult(errors); + } + + private static bool IsValidEmail(string email) => + System.Text.RegularExpressions.Regex.IsMatch(email, + @"^[^@\s]+@[^@\s]+\.[^@\s]+$"); +} +``` + +### Product Catalog Handler + +```csharp +// ReferenceService/Handlers/ProductHandler.cs +namespace StellaOps.Router.Examples.ReferenceService.Handlers; + +[StellaEndpoint(ServiceName = "products", Version = "v1")] +public class ProductHandler : EndpointHandler +{ + private readonly IProductCatalog _catalog; + private readonly ILogger _logger; + + public ProductHandler(IProductCatalog catalog, ILogger logger) + { + _catalog = catalog; + _logger = logger; + } + + [StellaRoute("GET", "/products")] + [StellaRateLimit(RequestsPerMinute = 200)] + [StellaCache(Duration = 300, VaryByQuery = ["category", "brand"])] + public async Task ListProducts( + StellaRequestContext context, + [FromQuery("category")] string? category = null, + [FromQuery("brand")] string? brand = null, + [FromQuery("minPrice")] decimal? minPrice = null, + [FromQuery("maxPrice")] decimal? maxPrice = null, + [FromQuery("page")] int page = 1, + [FromQuery("size")] int pageSize = 20, + [FromQuery("sort")] string sort = "name") + { + var filter = new ProductFilter + { + Category = category, + Brand = brand, + MinPrice = minPrice, + MaxPrice = maxPrice + }; + + var sortOptions = ParseSortOptions(sort); + var (products, total) = await _catalog.SearchAsync(filter, sortOptions, page, pageSize); + + return Ok(new ProductListResponse + { + Products = products.Select(p => p.ToDto()).ToList(), + Page = page, + PageSize = pageSize, + TotalCount = total, + Facets = await _catalog.GetFacetsAsync(filter) + }); + } + + [StellaRoute("GET", "/products/{id}")] + [StellaCache(Duration = 600, VaryByPath = true)] + public async Task GetProduct( + StellaRequestContext context, + [FromPath("id")] string id) + { + var product = await _catalog.GetByIdAsync(id); + + if (product == null) + { + return NotFound(new ErrorResponse("PRODUCT_NOT_FOUND", $"Product {id} not found")); + } + + // Track view for analytics (fire-and-forget) + _ = _catalog.RecordViewAsync(id, context.CorrelationId); + + return Ok(product.ToDetailDto()); + } + + [StellaRoute("POST", "/products")] + [StellaAuth(RequiredClaims = ["product:create"])] + [StellaRateLimit(RequestsPerMinute = 30)] + public async Task CreateProduct( + StellaRequestContext context, + [FromBody] CreateProductRequest request) + { + var product = new Product + { + Id = GenerateProductId(request.Category), + Name = request.Name, + Description = request.Description, + Category = request.Category, + Brand = request.Brand, + Price = request.Price, + Sku = request.Sku, + Inventory = request.InitialInventory, + CreatedAt = DateTime.UtcNow + }; + + await _catalog.CreateAsync(product); + + return Created($"/products/{product.Id}", product.ToDto()); + } + + [StellaRoute("PUT", "/products/{id}/inventory")] + [StellaAuth(RequiredClaims = ["product:inventory"])] + public async Task UpdateInventory( + StellaRequestContext context, + [FromPath("id")] string id, + [FromBody] UpdateInventoryRequest request) + { + var product = await _catalog.GetByIdAsync(id); + if (product == null) + { + return NotFound(new ErrorResponse("PRODUCT_NOT_FOUND", $"Product {id} not found")); + } + + var newQuantity = request.Operation switch + { + "set" => request.Quantity, + "add" => product.Inventory + request.Quantity, + "subtract" => product.Inventory - request.Quantity, + _ => throw new ArgumentException($"Invalid operation: {request.Operation}") + }; + + if (newQuantity < 0) + { + return BadRequest(new ErrorResponse("INSUFFICIENT_INVENTORY", + $"Cannot reduce inventory below 0. Current: {product.Inventory}")); + } + + product.Inventory = newQuantity; + product.UpdatedAt = DateTime.UtcNow; + + await _catalog.UpdateAsync(product); + + return Ok(new { ProductId = id, NewInventory = newQuantity }); + } + + private static string GenerateProductId(string category) + { + var prefix = category.ToUpperInvariant()[..3]; + var suffix = Guid.NewGuid().ToString("N")[..8].ToUpperInvariant(); + return $"{prefix}-{suffix}"; + } + + private static SortOptions ParseSortOptions(string sort) + { + var descending = sort.StartsWith('-'); + var field = descending ? sort[1..] : sort; + return new SortOptions(field, descending); + } +} +``` + +### Order Processing Handler + +```csharp +// ReferenceService/Handlers/OrderHandler.cs +namespace StellaOps.Router.Examples.ReferenceService.Handlers; + +[StellaEndpoint(ServiceName = "orders", Version = "v1")] +public class OrderHandler : EndpointHandler +{ + private readonly IOrderService _orderService; + private readonly IProductCatalog _catalog; + private readonly INotificationService _notifications; + private readonly ILogger _logger; + + public OrderHandler( + IOrderService orderService, + IProductCatalog catalog, + INotificationService notifications, + ILogger logger) + { + _orderService = orderService; + _catalog = catalog; + _notifications = notifications; + _logger = logger; + } + + [StellaRoute("POST", "/orders")] + [StellaAuth(RequiredClaims = ["order:create"])] + [StellaRateLimit(RequestsPerMinute = 20, Key = "user_id")] + public async Task CreateOrder( + StellaRequestContext context, + [FromBody] CreateOrderRequest request) + { + var userId = context.GetClaim("user_id"); + + // Validate products and calculate total + var lineItems = new List(); + decimal total = 0; + + foreach (var item in request.Items) + { + var product = await _catalog.GetByIdAsync(item.ProductId); + if (product == null) + { + return BadRequest(new ErrorResponse("INVALID_PRODUCT", + $"Product {item.ProductId} not found")); + } + + if (product.Inventory < item.Quantity) + { + return BadRequest(new ErrorResponse("INSUFFICIENT_INVENTORY", + $"Product {item.ProductId} has only {product.Inventory} units available")); + } + + lineItems.Add(new OrderLineItem + { + ProductId = product.Id, + ProductName = product.Name, + UnitPrice = product.Price, + Quantity = item.Quantity, + Subtotal = product.Price * item.Quantity + }); + + total += product.Price * item.Quantity; + } + + // Create order + var order = new Order + { + Id = Guid.NewGuid(), + UserId = userId, + Items = lineItems, + Subtotal = total, + Tax = total * 0.08m, // 8% tax + Total = total * 1.08m, + Status = OrderStatus.Pending, + ShippingAddress = request.ShippingAddress, + CreatedAt = DateTime.UtcNow + }; + + // Reserve inventory + foreach (var item in request.Items) + { + await _catalog.ReserveInventoryAsync(item.ProductId, item.Quantity, order.Id); + } + + await _orderService.CreateAsync(order); + + // Send notification (fire-and-forget) + _ = _notifications.SendOrderConfirmationAsync(userId, order); + + _logger.LogInformation("Order created: {OrderId} for user {UserId}, total: {Total}", + order.Id, userId, order.Total); + + return Created($"/orders/{order.Id}", order.ToDto()); + } + + [StellaRoute("GET", "/orders")] + [StellaAuth(RequiredClaims = ["order:list"])] + public async Task ListOrders( + StellaRequestContext context, + [FromQuery("status")] string? status = null, + [FromQuery("page")] int page = 1, + [FromQuery("size")] int pageSize = 20) + { + var userId = context.GetClaim("user_id"); + var isAdmin = context.HasClaim("order:admin"); + + // Non-admins can only see their own orders + Guid? filterUserId = isAdmin ? null : userId; + OrderStatus? filterStatus = status != null ? Enum.Parse(status, true) : null; + + var (orders, total) = await _orderService.ListAsync(filterUserId, filterStatus, page, pageSize); + + return Ok(new OrderListResponse + { + Orders = orders.Select(o => o.ToSummaryDto()).ToList(), + Page = page, + PageSize = pageSize, + TotalCount = total + }); + } + + [StellaRoute("GET", "/orders/{id}")] + [StellaAuth(RequiredClaims = ["order:read"])] + public async Task GetOrder( + StellaRequestContext context, + [FromPath("id")] Guid id) + { + var order = await _orderService.GetByIdAsync(id); + if (order == null) + { + return NotFound(new ErrorResponse("ORDER_NOT_FOUND", $"Order {id} not found")); + } + + // Check authorization + var userId = context.GetClaim("user_id"); + var isAdmin = context.HasClaim("order:admin"); + + if (order.UserId != userId && !isAdmin) + { + return Forbidden(new ErrorResponse("FORBIDDEN", "Cannot access this order")); + } + + return Ok(order.ToDetailDto()); + } + + [StellaRoute("POST", "/orders/{id}/cancel")] + [StellaAuth(RequiredClaims = ["order:cancel"])] + public async Task CancelOrder( + StellaRequestContext context, + [FromPath("id")] Guid id, + [FromBody] CancelOrderRequest request) + { + var order = await _orderService.GetByIdAsync(id); + if (order == null) + { + return NotFound(new ErrorResponse("ORDER_NOT_FOUND", $"Order {id} not found")); + } + + // Check authorization + var userId = context.GetClaim("user_id"); + var isAdmin = context.HasClaim("order:admin"); + + if (order.UserId != userId && !isAdmin) + { + return Forbidden(new ErrorResponse("FORBIDDEN", "Cannot cancel this order")); + } + + // Check if cancellable + if (order.Status is OrderStatus.Shipped or OrderStatus.Delivered or OrderStatus.Cancelled) + { + return BadRequest(new ErrorResponse("NOT_CANCELLABLE", + $"Order with status {order.Status} cannot be cancelled")); + } + + // Release inventory + foreach (var item in order.Items) + { + await _catalog.ReleaseInventoryAsync(item.ProductId, item.Quantity, order.Id); + } + + order.Status = OrderStatus.Cancelled; + order.CancelledAt = DateTime.UtcNow; + order.CancellationReason = request.Reason; + + await _orderService.UpdateAsync(order); + + // Send notification + _ = _notifications.SendOrderCancellationAsync(order.UserId, order); + + return Ok(order.ToDto()); + } +} +``` + +### Admin Handler + +```csharp +// ReferenceService/Handlers/AdminHandler.cs +namespace StellaOps.Router.Examples.ReferenceService.Handlers; + +[StellaEndpoint(ServiceName = "admin", Version = "v1")] +[StellaAuth(RequiredClaims = ["admin:access"])] +public class AdminHandler : EndpointHandler +{ + private readonly IUserRepository _users; + private readonly IOrderService _orders; + private readonly IProductCatalog _products; + + public AdminHandler( + IUserRepository users, + IOrderService orders, + IProductCatalog products) + { + _users = users; + _orders = orders; + _products = products; + } + + [StellaRoute("GET", "/admin/stats")] + [StellaCache(Duration = 60)] + public async Task GetStats(StellaRequestContext context) + { + var userCount = await _users.CountAsync(); + var orderStats = await _orders.GetStatsAsync(); + var productStats = await _products.GetStatsAsync(); + + return Ok(new AdminStatsResponse + { + Users = new UserStats + { + Total = userCount, + ActiveLast30Days = await _users.CountActiveAsync(TimeSpan.FromDays(30)) + }, + Orders = orderStats, + Products = productStats, + GeneratedAt = DateTime.UtcNow + }); + } + + [StellaRoute("POST", "/admin/users/{id}/impersonate")] + [StellaAuth(RequiredClaims = ["admin:impersonate"])] + public async Task ImpersonateUser( + StellaRequestContext context, + [FromPath("id")] Guid id) + { + var user = await _users.GetByIdAsync(id); + if (user == null) + { + return NotFound(new ErrorResponse("USER_NOT_FOUND", $"User {id} not found")); + } + + var adminId = context.GetClaim("user_id"); + + // Generate impersonation token (normally would call Authority) + var impersonationClaims = new Dictionary + { + ["user_id"] = id, + ["impersonated_by"] = adminId, + ["impersonation_started"] = DateTime.UtcNow.ToString("O") + }; + + return Ok(new ImpersonationResponse + { + UserId = id, + ImpersonatedBy = adminId, + Message = "Impersonation token generated - integrate with Authority for actual token" + }); + } + + [StellaRoute("GET", "/admin/audit-log")] + [StellaAuth(RequiredClaims = ["admin:audit"])] + public async Task GetAuditLog( + StellaRequestContext context, + [FromQuery("startDate")] DateTime? startDate = null, + [FromQuery("endDate")] DateTime? endDate = null, + [FromQuery("action")] string? action = null, + [FromQuery("userId")] Guid? userId = null, + [FromQuery("page")] int page = 1, + [FromQuery("size")] int pageSize = 50) + { + // This would integrate with actual audit logging system + return Ok(new AuditLogResponse + { + Entries = new List(), + Page = page, + PageSize = pageSize, + TotalCount = 0 + }); + } +} +``` + +## Migration Templates + +### Basic Migration Template + +```csharp +// MigrationTemplates/BasicMigration/Program.cs +// Minimal migration from ASP.NET Core controller to Stella Router + +/* + * BEFORE: Traditional ASP.NET Core Controller + * + * [ApiController] + * [Route("api/[controller]")] + * public class UsersController : ControllerBase + * { + * [HttpGet("{id}")] + * public async Task GetUser(Guid id) + * { + * var user = await _repository.GetByIdAsync(id); + * return user == null ? NotFound() : Ok(user); + * } + * } + */ + +// AFTER: Stella Router Handler +using StellaOps.Router.Microservice; + +var builder = StellaMicroservice.CreateBuilder(args); +builder.AddHandler(); +builder.UseTransport("tcp", o => o.Port = 9100); + +var host = builder.Build(); +await host.RunAsync(); + +[StellaEndpoint(ServiceName = "users")] +public class MigratedUserHandler : EndpointHandler +{ + private readonly IUserRepository _repository; + + public MigratedUserHandler(IUserRepository repository) + { + _repository = repository; + } + + // Route pattern: GET /api/users/{id} → GET /users/{id} + [StellaRoute("GET", "/users/{id}")] + public async Task GetUser( + StellaRequestContext context, + [FromPath("id")] Guid id) + { + var user = await _repository.GetByIdAsync(id); + return user == null ? NotFound() : Ok(user); + } +} +``` + +### Dual Mode Migration Template + +```csharp +// MigrationTemplates/DualModeMigration/Program.cs +// Run both old ASP.NET Core endpoints and new Stella endpoints simultaneously + +using StellaOps.Router.Microservice; +using StellaOps.Router.Compatibility; + +var builder = WebApplication.CreateBuilder(args); + +// Configure both ASP.NET Core and Stella +builder.Services.AddControllers(); +builder.Services.AddStellaMicroservice(stella => +{ + stella.AddHandler(); + stella.UseTransport("tcp", o => o.Port = 9100); +}); + +var app = builder.Build(); + +// Mount old controllers at legacy path +app.MapControllers(); // /api/v1/users - old implementation + +// Mount Stella handlers +app.UseStellaMicroservice(); // /users - new implementation + +// Compatibility middleware: route based on header or path version +app.UseMiddleware(); + +await app.RunAsync(); + +// Version routing middleware for gradual migration +public class VersionRoutingMiddleware +{ + private readonly RequestDelegate _next; + + public VersionRoutingMiddleware(RequestDelegate next) + { + _next = next; + } + + public async Task InvokeAsync(HttpContext context) + { + // Check for explicit version header + if (context.Request.Headers.TryGetValue("X-API-Version", out var version)) + { + if (version == "2") + { + // Rewrite path to Stella endpoints + context.Request.Path = context.Request.Path.Value? + .Replace("/api/v1/", "/") + .Replace("/api/", "/"); + } + } + + await _next(context); + } +} +``` + +### Gradual Migration Template + +```csharp +// MigrationTemplates/GradualMigration/MigrationConfig.cs +// Configure which endpoints use old vs new implementation + +namespace StellaOps.Router.Migration; + +public class MigrationConfig +{ + public Dictionary Endpoints { get; set; } = new(); + + public enum EndpointMigrationState + { + Legacy, // Use old ASP.NET Core implementation + Shadow, // Call both, compare results, return legacy + Canary, // Route X% to new implementation + Migrated // Use new Stella implementation + } +} + +// MigrationTemplates/GradualMigration/MigrationMiddleware.cs +public class MigrationMiddleware +{ + private readonly RequestDelegate _next; + private readonly MigrationConfig _config; + private readonly IStellaDispatcher _stellaDispatcher; + private readonly ILogger _logger; + + public MigrationMiddleware( + RequestDelegate next, + MigrationConfig config, + IStellaDispatcher stellaDispatcher, + ILogger logger) + { + _next = next; + _config = config; + _stellaDispatcher = stellaDispatcher; + _logger = logger; + } + + public async Task InvokeAsync(HttpContext context) + { + var path = context.Request.Path.Value ?? ""; + var state = GetMigrationState(path); + + switch (state) + { + case MigrationConfig.EndpointMigrationState.Legacy: + await _next(context); + break; + + case MigrationConfig.EndpointMigrationState.Shadow: + await ExecuteShadowMode(context); + break; + + case MigrationConfig.EndpointMigrationState.Canary: + await ExecuteCanaryMode(context); + break; + + case MigrationConfig.EndpointMigrationState.Migrated: + await ExecuteMigratedMode(context); + break; + } + } + + private async Task ExecuteShadowMode(HttpContext context) + { + // Execute both implementations + var legacyTask = ExecuteLegacyAsync(context); + var stellaTask = ExecuteStellaAsync(context); + + await Task.WhenAll(legacyTask, stellaTask); + + var legacyResult = await legacyTask; + var stellaResult = await stellaTask; + + // Compare results for monitoring + if (!ResultsMatch(legacyResult, stellaResult)) + { + _logger.LogWarning( + "Shadow mode mismatch for {Path}: Legacy={LegacyStatus}, Stella={StellaStatus}", + context.Request.Path, + legacyResult.StatusCode, + stellaResult.StatusCode); + } + + // Return legacy result + await WriteResponse(context, legacyResult); + } + + private async Task ExecuteCanaryMode(HttpContext context) + { + var path = context.Request.Path.Value ?? ""; + var canaryPercentage = GetCanaryPercentage(path); + + // Determine which implementation to use based on request hash + var requestHash = ComputeRequestHash(context); + var useStella = (requestHash % 100) < canaryPercentage; + + if (useStella) + { + await ExecuteMigratedMode(context); + } + else + { + await _next(context); + } + } + + private async Task ExecuteMigratedMode(HttpContext context) + { + var stellaResult = await ExecuteStellaAsync(context); + await WriteResponse(context, stellaResult); + } + + private MigrationConfig.EndpointMigrationState GetMigrationState(string path) + { + foreach (var (pattern, state) in _config.Endpoints) + { + if (PathMatchesPattern(path, pattern)) + { + return state; + } + } + return MigrationConfig.EndpointMigrationState.Legacy; + } + + private async Task ExecuteLegacyAsync(HttpContext context) + { + // Capture legacy response + var originalBody = context.Response.Body; + using var memoryStream = new MemoryStream(); + context.Response.Body = memoryStream; + + try + { + await _next(context); + + memoryStream.Position = 0; + var body = await new StreamReader(memoryStream).ReadToEndAsync(); + + return new MigrationResult + { + StatusCode = context.Response.StatusCode, + Body = body, + Headers = context.Response.Headers.ToDictionary(h => h.Key, h => h.Value.ToString()) + }; + } + finally + { + context.Response.Body = originalBody; + } + } + + private async Task ExecuteStellaAsync(HttpContext context) + { + var request = await ConvertToStellaRequest(context); + var response = await _stellaDispatcher.DispatchAsync(request); + + return new MigrationResult + { + StatusCode = response.StatusCode, + Body = response.GetBodyAsString(), + Headers = response.Headers.ToDictionary() + }; + } + + private static bool PathMatchesPattern(string path, string pattern) + { + // Simple glob matching + var regex = "^" + Regex.Escape(pattern) + .Replace("\\*\\*", ".*") + .Replace("\\*", "[^/]*") + "$"; + return Regex.IsMatch(path, regex); + } + + private static bool ResultsMatch(MigrationResult legacy, MigrationResult stella) + { + return legacy.StatusCode == stella.StatusCode; + // Could add body comparison with normalization + } + + private static int ComputeRequestHash(HttpContext context) + { + var key = context.Request.Path + context.Request.QueryString; + return Math.Abs(key.GetHashCode()) % 100; + } + + private int GetCanaryPercentage(string path) => 10; // Default 10% + + private static async Task WriteResponse(HttpContext context, MigrationResult result) + { + context.Response.StatusCode = result.StatusCode; + foreach (var (key, value) in result.Headers) + { + context.Response.Headers[key] = value; + } + await context.Response.WriteAsync(result.Body); + } + + private static async Task ConvertToStellaRequest(HttpContext context) + { + using var reader = new StreamReader(context.Request.Body); + var body = await reader.ReadToEndAsync(); + + return new StellaRequest + { + Method = context.Request.Method, + Path = context.Request.Path.Value ?? "/", + Headers = context.Request.Headers.ToDictionary(h => h.Key, h => h.Value.ToString()), + Body = Encoding.UTF8.GetBytes(body) + }; + } +} + +public class MigrationResult +{ + public int StatusCode { get; set; } + public string Body { get; set; } = ""; + public Dictionary Headers { get; set; } = new(); +} +``` + +## Code Scaffolding Tool + +### CLI Scaffolding Generator + +```csharp +// Scaffolding/Generator/StellaScaffold.cs +namespace StellaOps.Router.Scaffolding; + +public class StellaScaffold +{ + private readonly ITemplateEngine _templateEngine; + + public StellaScaffold(ITemplateEngine templateEngine) + { + _templateEngine = templateEngine; + } + + public async Task GenerateServiceAsync(ServiceScaffoldOptions options) + { + var context = new ScaffoldContext + { + ServiceName = options.ServiceName, + Namespace = options.Namespace ?? $"StellaOps.{options.ServiceName}", + OutputPath = options.OutputPath, + Features = options.Features + }; + + // Generate project structure + await GenerateProjectFileAsync(context); + await GenerateProgramAsync(context); + + // Generate handlers + foreach (var handler in options.Handlers) + { + await GenerateHandlerAsync(context, handler); + } + + // Generate models + foreach (var model in options.Models) + { + await GenerateModelAsync(context, model); + } + + // Generate configuration + await GenerateConfigurationAsync(context); + + // Generate tests if requested + if (options.Features.Contains("tests")) + { + await GenerateTestsAsync(context, options); + } + } + + private async Task GenerateProjectFileAsync(ScaffoldContext context) + { + var template = await _templateEngine.LoadAsync("project.csproj.scriban"); + var content = await template.RenderAsync(context); + + var path = Path.Combine(context.OutputPath, $"{context.ServiceName}.csproj"); + await File.WriteAllTextAsync(path, content); + } + + private async Task GenerateProgramAsync(ScaffoldContext context) + { + var template = await _templateEngine.LoadAsync("Program.cs.scriban"); + var content = await template.RenderAsync(context); + + var path = Path.Combine(context.OutputPath, "Program.cs"); + await File.WriteAllTextAsync(path, content); + } + + private async Task GenerateHandlerAsync(ScaffoldContext context, HandlerDefinition handler) + { + var template = await _templateEngine.LoadAsync("Handler.cs.scriban"); + var handlerContext = new HandlerScaffoldContext + { + Base = context, + Handler = handler + }; + + var content = await template.RenderAsync(handlerContext); + + var dir = Path.Combine(context.OutputPath, "Handlers"); + Directory.CreateDirectory(dir); + + var path = Path.Combine(dir, $"{handler.Name}Handler.cs"); + await File.WriteAllTextAsync(path, content); + } + + private async Task GenerateModelAsync(ScaffoldContext context, ModelDefinition model) + { + var template = await _templateEngine.LoadAsync("Model.cs.scriban"); + var modelContext = new ModelScaffoldContext + { + Base = context, + Model = model + }; + + var content = await template.RenderAsync(modelContext); + + var dir = Path.Combine(context.OutputPath, "Models"); + Directory.CreateDirectory(dir); + + var path = Path.Combine(dir, $"{model.Name}.cs"); + await File.WriteAllTextAsync(path, content); + } + + private async Task GenerateConfigurationAsync(ScaffoldContext context) + { + // Generate appsettings.json + var appSettings = new + { + Stella = new + { + Transport = new + { + Type = "tcp", + Host = "0.0.0.0", + Port = 9100 + }, + DualExposure = new + { + Enabled = true, + Port = 8080 + } + }, + Logging = new + { + LogLevel = new { Default = "Information" } + } + }; + + var json = JsonSerializer.Serialize(appSettings, new JsonSerializerOptions + { + WriteIndented = true + }); + + var path = Path.Combine(context.OutputPath, "appsettings.json"); + await File.WriteAllTextAsync(path, json); + } + + private async Task GenerateTestsAsync(ScaffoldContext context, ServiceScaffoldOptions options) + { + var testContext = new TestScaffoldContext + { + Base = context, + Handlers = options.Handlers + }; + + var testDir = Path.Combine(context.OutputPath, "..", $"{context.ServiceName}.Tests"); + Directory.CreateDirectory(testDir); + + // Generate test project + var testProjectTemplate = await _templateEngine.LoadAsync("test-project.csproj.scriban"); + var testProjectContent = await testProjectTemplate.RenderAsync(testContext); + await File.WriteAllTextAsync( + Path.Combine(testDir, $"{context.ServiceName}.Tests.csproj"), + testProjectContent); + + // Generate test fixtures + var fixtureTemplate = await _templateEngine.LoadAsync("TestFixture.cs.scriban"); + var fixtureContent = await fixtureTemplate.RenderAsync(testContext); + await File.WriteAllTextAsync( + Path.Combine(testDir, "TestFixture.cs"), + fixtureContent); + + // Generate handler tests + foreach (var handler in options.Handlers) + { + var handlerTestTemplate = await _templateEngine.LoadAsync("HandlerTests.cs.scriban"); + var handlerTestContext = new { Base = testContext, Handler = handler }; + var handlerTestContent = await handlerTestTemplate.RenderAsync(handlerTestContext); + await File.WriteAllTextAsync( + Path.Combine(testDir, $"{handler.Name}HandlerTests.cs"), + handlerTestContent); + } + } +} + +public class ServiceScaffoldOptions +{ + public string ServiceName { get; set; } = ""; + public string? Namespace { get; set; } + public string OutputPath { get; set; } = ""; + public List Features { get; set; } = new(); + public List Handlers { get; set; } = new(); + public List Models { get; set; } = new(); +} + +public class HandlerDefinition +{ + public string Name { get; set; } = ""; + public string ResourceName { get; set; } = ""; + public List Endpoints { get; set; } = new(); +} + +public class EndpointDefinition +{ + public string Method { get; set; } = "GET"; + public string Path { get; set; } = ""; + public string Name { get; set; } = ""; + public List RequiredClaims { get; set; } = new(); + public int? RateLimit { get; set; } +} + +public class ModelDefinition +{ + public string Name { get; set; } = ""; + public List Properties { get; set; } = new(); +} + +public class PropertyDefinition +{ + public string Name { get; set; } = ""; + public string Type { get; set; } = "string"; + public bool Required { get; set; } +} +``` + +### Scriban Templates + +``` +{{# Templates/Handler.cs.scriban }} +// Auto-generated by StellaScaffold +using StellaOps.Router.Microservice; + +namespace {{ base.namespace }}.Handlers; + +[StellaEndpoint(ServiceName = "{{ handler.resource_name }}")] +public class {{ handler.name }}Handler : EndpointHandler +{ + private readonly ILogger<{{ handler.name }}Handler> _logger; + + public {{ handler.name }}Handler(ILogger<{{ handler.name }}Handler> logger) + { + _logger = logger; + } + +{{~ for endpoint in handler.endpoints ~}} + [StellaRoute("{{ endpoint.method }}", "{{ endpoint.path }}")] +{{~ if endpoint.required_claims.size > 0 ~}} + [StellaAuth(RequiredClaims = [{{ endpoint.required_claims | array.join '", "' | prepend '"' | append '"' }}])] +{{~ end ~}} +{{~ if endpoint.rate_limit ~}} + [StellaRateLimit(RequestsPerMinute = {{ endpoint.rate_limit }})] +{{~ end ~}} + public async Task {{ endpoint.name }}(StellaRequestContext context) + { + _logger.LogInformation("{{ endpoint.name }} called"); + + // TODO: Implement {{ endpoint.name }} + throw new NotImplementedException(); + } + +{{~ end ~}} +} +``` + +## CLI Commands + +```bash +# Generate a new microservice +stella-scaffold new MyService \ + --namespace StellaOps.MyService \ + --handler User:users \ + --handler Product:products \ + --features tests,docker + +# Generate a single handler +stella-scaffold handler Order \ + --resource orders \ + --endpoint "GET /orders" \ + --endpoint "POST /orders" \ + --endpoint "GET /orders/{id}" + +# Generate migration shim +stella-scaffold migrate \ + --source Controllers/UsersController.cs \ + --output Handlers/UserHandler.cs +``` + +## YAML Configuration + +```yaml +# config/scaffold-config.yaml +scaffolding: + defaultNamespace: "StellaOps.Services" + defaultFeatures: + - tests + - docker + - healthchecks + + templates: + path: "./templates" + customTemplates: + - name: "enterprise-handler" + file: "custom/EnterpriseHandler.cs.scriban" + + conventions: + handlerSuffix: "Handler" + modelSuffix: "" + testSuffix: "Tests" + +migration: + defaultMode: "gradual" + shadowModeEnabled: true + canaryPercentage: 10 + + pathMappings: + "/api/v1/": "/" + "/api/": "/" + + endpoints: + "/users/**": "shadow" + "/products/**": "canary" + "/orders/**": "legacy" +``` + +## Testing + +```csharp +[Fact] +public async Task ReferenceService_HandlesCompleteWorkflow() +{ + // Arrange + using var fixture = new ReferenceServiceFixture(); + await fixture.StartAsync(); + + var client = fixture.CreateClient(); + + // Create user + var createUserResponse = await client.PostAsync("/users", new CreateUserRequest + { + Email = "test@example.com", + Name = "Test User" + }); + + Assert.Equal(201, createUserResponse.StatusCode); + var user = await createUserResponse.ReadAsAsync(); + + // Create product + var createProductResponse = await client.PostAsync("/products", new CreateProductRequest + { + Name = "Test Product", + Price = 99.99m, + Category = "TEST", + InitialInventory = 100 + }); + + Assert.Equal(201, createProductResponse.StatusCode); + var product = await createProductResponse.ReadAsAsync(); + + // Create order + var createOrderResponse = await client.PostAsync("/orders", new CreateOrderRequest + { + Items = new[] + { + new OrderItemRequest { ProductId = product.Id, Quantity = 2 } + }, + ShippingAddress = new AddressDto { Street = "123 Main St", City = "Test City" } + }); + + Assert.Equal(201, createOrderResponse.StatusCode); + var order = await createOrderResponse.ReadAsAsync(); + + Assert.Equal(user.Id, order.UserId); + Assert.Equal(199.98m * 1.08m, order.Total, 2); // 2 * 99.99 * 1.08 tax +} + +[Fact] +public async Task MigrationMiddleware_ShadowMode_ExecutesBothImplementations() +{ + // Arrange + var config = new MigrationConfig + { + Endpoints = new Dictionary + { + ["/users/**"] = MigrationConfig.EndpointMigrationState.Shadow + } + }; + + using var fixture = new MigrationTestFixture(config); + await fixture.StartAsync(); + + // Act + var response = await fixture.Client.GetAsync("/users/123"); + + // Assert + Assert.Equal(200, response.StatusCode); + Assert.True(fixture.LegacyWasCalled); + Assert.True(fixture.StellaWasCalled); +} + +[Fact] +public async Task Scaffolding_GeneratesValidService() +{ + // Arrange + var options = new ServiceScaffoldOptions + { + ServiceName = "TestService", + Namespace = "Test.Service", + OutputPath = Path.Combine(Path.GetTempPath(), Guid.NewGuid().ToString()), + Features = new List { "tests" }, + Handlers = new List + { + new() + { + Name = "User", + ResourceName = "users", + Endpoints = new List + { + new() { Method = "GET", Path = "/users", Name = "ListUsers" }, + new() { Method = "GET", Path = "/users/{id}", Name = "GetUser" } + } + } + } + }; + + var scaffold = new StellaScaffold(new ScribanTemplateEngine()); + + try + { + // Act + await scaffold.GenerateServiceAsync(options); + + // Assert + Assert.True(File.Exists(Path.Combine(options.OutputPath, "TestService.csproj"))); + Assert.True(File.Exists(Path.Combine(options.OutputPath, "Program.cs"))); + Assert.True(File.Exists(Path.Combine(options.OutputPath, "Handlers", "UserHandler.cs"))); + + // Verify generated code compiles (optional) + var compilation = await CompileGeneratedCodeAsync(options.OutputPath); + Assert.Empty(compilation.Errors); + } + finally + { + Directory.Delete(options.OutputPath, recursive: true); + } +} +``` + +## Deliverables + +| Artifact | Path | +|----------|------| +| Reference Service | `src/Examples/StellaOps.Router.Examples/ReferenceService/` | +| Migration Templates | `src/Examples/StellaOps.Router.Examples/MigrationTemplates/` | +| Scaffolding Tool | `src/Examples/StellaOps.Router.Examples/Scaffolding/` | +| Scriban Templates | `src/Examples/StellaOps.Router.Examples/Scaffolding/Templates/` | +| Tests | `src/Examples/StellaOps.Router.Examples.Tests/` | + +## Next Step + +[Step 28: Agent Process Guidelines →](28-Step.md) diff --git a/docs/router/28-Step.md b/docs/router/28-Step.md new file mode 100644 index 000000000..91ea72113 --- /dev/null +++ b/docs/router/28-Step.md @@ -0,0 +1,755 @@ +# Step 28: Agent Process Guidelines + +## Overview + +This document provides comprehensive guidelines for AI agents (Claude, Copilot, etc.) implementing the Stella Router. It establishes conventions, patterns, and decision frameworks to ensure consistent, high-quality implementations across all phases. + +## Goals + +1. Define clear coding standards and patterns for Router implementation +2. Establish decision frameworks for common scenarios +3. Provide checklists for implementation quality +4. Document testing requirements and coverage expectations +5. Define commit and PR conventions + +## Implementation Standards + +### Code Organization + +``` +src/Router/ +├── StellaOps.Router.Core/ # Core abstractions and contracts +│ ├── Abstractions/ # Interfaces +│ ├── Configuration/ # Config models +│ ├── Extensions/ # Extension methods +│ └── Primitives/ # Value types +│ +├── StellaOps.Router.Gateway/ # Gateway implementation +│ ├── Routing/ # Route matching +│ ├── Handlers/ # Route handlers +│ ├── Pipeline/ # Request pipeline +│ └── Middleware/ # Gateway middleware +│ +├── StellaOps.Router.Transport/ # Transport implementations +│ ├── InMemory/ # In-process transport +│ ├── Tcp/ # TCP transport +│ └── Tls/ # TLS transport +│ +├── StellaOps.Router.Microservice/ # Microservice SDK +│ ├── Hosting/ # Host builder +│ ├── Endpoints/ # Endpoint handling +│ └── Context/ # Request context +│ +├── StellaOps.Router.Security/ # Security components +│ ├── Jwt/ # JWT validation +│ ├── Claims/ # Claim hydration +│ └── RateLimiting/ # Rate limiting +│ +└── StellaOps.Router.Observability/ # Observability + ├── Logging/ # Structured logging + ├── Metrics/ # Prometheus metrics + └── Tracing/ # OpenTelemetry tracing +``` + +### Naming Conventions + +| Element | Convention | Example | +|---------|------------|---------| +| Interfaces | `I` prefix, noun/adjective | `IRouteHandler`, `IConnectable` | +| Classes | PascalCase, noun | `JwtValidator`, `RouteTable` | +| Async methods | `Async` suffix | `ValidateTokenAsync`, `SendAsync` | +| Config classes | `Options` or `Configuration` suffix | `JwtValidationOptions` | +| Event handlers | `On` prefix | `OnConnectionEstablished` | +| Factory methods | `Create` prefix | `CreateHandler`, `CreateConnection` | +| Boolean properties | `Is`/`Has`/`Can` prefix | `IsValid`, `HasExpired`, `CanRetry` | + +### File Structure + +```csharp +// File: StellaOps.Router.Core/Abstractions/IRouteHandler.cs + +// 1. License header (if required) +// 2. Using statements (sorted: System, Microsoft, Third-party, Internal) +using System; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.Extensions.Logging; +using StellaOps.Router.Core.Configuration; + +// 3. Namespace (one per file, matches folder structure) +namespace StellaOps.Router.Core.Abstractions; + +// 4. XML documentation +/// +/// Handles requests for a specific route type. +/// +/// +/// Implementations must be thread-safe and support concurrent request handling. +/// +public interface IRouteHandler +{ + // 5. Interface members (properties, then methods) + + /// + /// Gets the handler type identifier. + /// + string HandlerType { get; } + + /// + /// Determines if this handler can process the given route. + /// + bool CanHandle(RouteConfiguration route); + + /// + /// Processes an incoming request. + /// + Task HandleAsync( + RequestPayload request, + RouteConfiguration route, + CancellationToken cancellationToken = default); +} +``` + +### Error Handling Patterns + +```csharp +// Pattern 1: Result types for expected failures +public readonly struct Result +{ + public T? Value { get; } + public Error? Error { get; } + public bool IsSuccess => Error == null; + + private Result(T? value, Error? error) + { + Value = value; + Error = error; + } + + public static Result Success(T value) => new(value, null); + public static Result Failure(Error error) => new(default, error); + + public Result Map(Func map) => + IsSuccess ? Result.Success(map(Value!)) : Result.Failure(Error!); + + public async Task> MapAsync(Func> map) => + IsSuccess ? Result.Success(await map(Value!)) : Result.Failure(Error!); +} + +public record Error(string Code, string Message, Exception? Inner = null); + +// Usage +public async Task> ValidateTokenAsync(string token) +{ + try + { + var claims = await _validator.ValidateAsync(token); + return Result.Success(claims); + } + catch (SecurityTokenExpiredException ex) + { + return Result.Failure(new Error("TOKEN_EXPIRED", "JWT has expired", ex)); + } + catch (SecurityTokenInvalidSignatureException ex) + { + return Result.Failure(new Error("INVALID_SIGNATURE", "JWT signature invalid", ex)); + } +} + +// Pattern 2: Exceptions for unexpected failures +public class RouterException : Exception +{ + public string ErrorCode { get; } + public int StatusCode { get; } + + public RouterException(string errorCode, string message, int statusCode = 500) + : base(message) + { + ErrorCode = errorCode; + StatusCode = statusCode; + } +} + +public class ConfigurationException : RouterException +{ + public ConfigurationException(string message) + : base("CONFIG_ERROR", message, 500) { } +} + +public class TransportException : RouterException +{ + public TransportException(string message, Exception? inner = null) + : base("TRANSPORT_ERROR", message, 503) { } +} +``` + +### Async Patterns + +```csharp +// Pattern 1: CancellationToken propagation +public async Task HandleAsync( + RequestPayload request, + CancellationToken cancellationToken = default) +{ + // Always check at start of long operations + cancellationToken.ThrowIfCancellationRequested(); + + // Propagate to all async calls + var validated = await _validator.ValidateAsync(request, cancellationToken); + var enriched = await _enricher.EnrichAsync(validated, cancellationToken); + var response = await _handler.ProcessAsync(enriched, cancellationToken); + + return response; +} + +// Pattern 2: Timeout handling +public async Task WithTimeoutAsync( + Func> operation, + TimeSpan timeout, + CancellationToken cancellationToken = default) +{ + using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); + cts.CancelAfter(timeout); + + try + { + return await operation(cts.Token); + } + catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested) + { + throw new TimeoutException($"Operation timed out after {timeout}"); + } +} + +// Pattern 3: Fire-and-forget with logging +public void FireAndForget(Func operation, ILogger logger, string operationName) +{ + _ = Task.Run(async () => + { + try + { + await operation(); + } + catch (Exception ex) + { + logger.LogError(ex, "Fire-and-forget operation {Operation} failed", operationName); + } + }); +} +``` + +### Dependency Injection Patterns + +```csharp +// Pattern 1: Constructor injection with validation +public class JwtValidator : IJwtValidator +{ + private readonly JwtValidationOptions _options; + private readonly IKeyProvider _keyProvider; + private readonly ILogger _logger; + + public JwtValidator( + IOptions options, + IKeyProvider keyProvider, + ILogger logger) + { + _options = options?.Value ?? throw new ArgumentNullException(nameof(options)); + _keyProvider = keyProvider ?? throw new ArgumentNullException(nameof(keyProvider)); + _logger = logger ?? throw new ArgumentNullException(nameof(logger)); + + ValidateOptions(_options); + } + + private static void ValidateOptions(JwtValidationOptions options) + { + if (string.IsNullOrEmpty(options.Issuer)) + throw new ConfigurationException("JWT issuer is required"); + if (options.ClockSkew < TimeSpan.Zero) + throw new ConfigurationException("Clock skew cannot be negative"); + } +} + +// Pattern 2: Factory registration for complex objects +public static class ServiceCollectionExtensions +{ + public static IServiceCollection AddStellaRouter( + this IServiceCollection services, + Action configure) + { + services.Configure(configure); + + // Core services + services.AddSingleton(); + services.AddSingleton(); + + // Keyed services for handlers + services.AddKeyedSingleton("microservice"); + services.AddKeyedSingleton("graphql"); + services.AddKeyedSingleton("proxy"); + + // Factory for route handler resolution + services.AddSingleton(sp => new RouteHandlerFactory( + sp.GetServices().ToDictionary(h => h.HandlerType))); + + return services; + } +} + +// Pattern 3: Scoped services for request context +public static class RequestScopeExtensions +{ + public static IServiceCollection AddRequestScope(this IServiceCollection services) + { + services.AddScoped(); + services.AddScoped(sp => sp.GetRequiredService().User); + services.AddScoped(sp => sp.GetRequiredService().CorrelationId); + + return services; + } +} +``` + +## Decision Framework + +### When to Create New Types vs. Reuse + +| Scenario | Decision | Rationale | +|----------|----------|-----------| +| Similar data, different context | Create new type | Type safety, clear intent | +| Same data, same context | Reuse type | DRY, reduce cognitive load | +| Third-party type | Create wrapper | Abstraction, testability | +| Config vs. runtime | Separate types | Immutability guarantees | + +```csharp +// Example: Separate types for config vs runtime +public record RouteConfiguration( + string Path, + string Method, + string HandlerType, + Dictionary Metadata); + +public class CompiledRoute +{ + public RouteConfiguration Config { get; } + public Regex PathPattern { get; } + public IRouteHandler Handler { get; } + // Runtime-computed fields +} +``` + +### When to Use Interfaces vs. Abstract Classes + +| Use Interface | Use Abstract Class | +|---------------|-------------------| +| Multiple inheritance needed | Shared implementation | +| Contract-only definition | Template method pattern | +| Third-party implementation | Internal hierarchy only | +| Mocking/testing priority | Code reuse priority | + +### Logging Level Guidelines + +| Level | When to Use | Example | +|-------|-------------|---------| +| `Trace` | Internal flow details | `"Route matching attempt for {Path}"` | +| `Debug` | Diagnostic information | `"Cache hit for key {Key}"` | +| `Information` | Significant events | `"Request completed: {Method} {Path} → {Status}"` | +| `Warning` | Recoverable issues | `"Rate limit approaching: {Current}/{Max}"` | +| `Error` | Failures requiring attention | `"Failed to connect to Authority: {Error}"` | +| `Critical` | System-wide failures | `"Configuration invalid, router cannot start"` | + +```csharp +// Structured logging patterns +_logger.LogInformation( + "Request processed: {Method} {Path} → {StatusCode} in {ElapsedMs}ms", + request.Method, + request.Path, + response.StatusCode, + stopwatch.ElapsedMilliseconds); + +// Use LoggerMessage for high-performance paths +private static readonly Action LogRequestComplete = + LoggerMessage.Define( + LogLevel.Information, + new EventId(1001, "RequestComplete"), + "Request processed: {Method} {Path} → {StatusCode} in {ElapsedMs}ms"); + +// Usage +LogRequestComplete(_logger, method, path, statusCode, elapsed, null); +``` + +## Implementation Checklists + +### Before Starting a Component + +- [ ] Read the step documentation thoroughly +- [ ] Understand dependencies on previous steps +- [ ] Review related existing code patterns +- [ ] Identify configuration requirements +- [ ] Plan test coverage strategy + +### During Implementation + +- [ ] Follow naming conventions +- [ ] Add XML documentation to public APIs +- [ ] Implement `IDisposable`/`IAsyncDisposable` where needed +- [ ] Add structured logging at appropriate levels +- [ ] Handle cancellation tokens throughout +- [ ] Use result types for expected failures +- [ ] Validate all configuration at startup + +### Before Marking Complete + +- [ ] All public types have XML documentation +- [ ] Unit tests achieve >80% coverage +- [ ] Integration tests cover happy path + error cases +- [ ] No compiler warnings +- [ ] Code passes all linting rules +- [ ] Configuration is validated +- [ ] README/documentation updated if needed + +### Pull Request Checklist + +- [ ] PR title follows convention: `feat(router): description` +- [ ] Description explains what and why +- [ ] All tests pass +- [ ] No unrelated changes +- [ ] Breaking changes documented +- [ ] Reviewable size (<500 lines preferred) + +## Testing Requirements + +### Unit Test Coverage Targets + +| Component Type | Target Coverage | +|---------------|-----------------| +| Core logic | 90% | +| Handlers | 85% | +| Middleware | 80% | +| Configuration | 75% | +| Extensions | 70% | + +### Test Structure + +```csharp +// Test file naming: {ClassName}Tests.cs +// Test method naming: {Method}_{Scenario}_{ExpectedResult} + +public class JwtValidatorTests +{ + private readonly JwtValidator _sut; // System Under Test + private readonly Mock _keyProviderMock; + private readonly Mock> _loggerMock; + + public JwtValidatorTests() + { + _keyProviderMock = new Mock(); + _loggerMock = new Mock>(); + + var options = Options.Create(new JwtValidationOptions + { + Issuer = "https://auth.example.com", + Audience = "stella-router" + }); + + _sut = new JwtValidator(options, _keyProviderMock.Object, _loggerMock.Object); + } + + [Fact] + public async Task ValidateAsync_ValidToken_ReturnsSuccessWithClaims() + { + // Arrange + var token = GenerateValidToken(); + _keyProviderMock + .Setup(x => x.GetSigningKeyAsync(It.IsAny())) + .ReturnsAsync(TestKeys.ValidKey); + + // Act + var result = await _sut.ValidateAsync(token); + + // Assert + Assert.True(result.IsSuccess); + Assert.NotNull(result.Value); + Assert.Equal("test-user", result.Value.Subject); + } + + [Fact] + public async Task ValidateAsync_ExpiredToken_ReturnsFailure() + { + // Arrange + var token = GenerateExpiredToken(); + + // Act + var result = await _sut.ValidateAsync(token); + + // Assert + Assert.False(result.IsSuccess); + Assert.Equal("TOKEN_EXPIRED", result.Error!.Code); + } + + [Theory] + [InlineData(null)] + [InlineData("")] + [InlineData(" ")] + public async Task ValidateAsync_NullOrEmptyToken_ReturnsFailure(string? token) + { + // Act + var result = await _sut.ValidateAsync(token!); + + // Assert + Assert.False(result.IsSuccess); + Assert.Equal("INVALID_TOKEN", result.Error!.Code); + } +} +``` + +### Integration Test Patterns + +```csharp +public class RouterIntegrationTests : IClassFixture +{ + private readonly RouterTestFixture _fixture; + + public RouterIntegrationTests(RouterTestFixture fixture) + { + _fixture = fixture; + } + + [Fact] + public async Task EndToEnd_AuthenticatedRequest_ReturnsSuccess() + { + // Arrange + var client = _fixture.CreateAuthenticatedClient(claims: new() + { + ["sub"] = "test-user", + ["role"] = "admin" + }); + + // Act + var response = await client.GetAsync("/api/users/123"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + + var user = await response.Content.ReadFromJsonAsync(); + Assert.NotNull(user); + Assert.Equal("123", user.Id); + } +} + +// Test fixture +public class RouterTestFixture : IAsyncLifetime +{ + private IHost? _gatewayHost; + private IHost? _microserviceHost; + + public async Task InitializeAsync() + { + // Start microservice + _microserviceHost = await CreateMicroserviceHost(); + await _microserviceHost.StartAsync(); + + // Start gateway + _gatewayHost = await CreateGatewayHost(); + await _gatewayHost.StartAsync(); + } + + public async Task DisposeAsync() + { + if (_gatewayHost != null) + await _gatewayHost.StopAsync(); + if (_microserviceHost != null) + await _microserviceHost.StopAsync(); + + _gatewayHost?.Dispose(); + _microserviceHost?.Dispose(); + } + + public HttpClient CreateAuthenticatedClient(Dictionary claims) + { + var token = GenerateTestToken(claims); + var client = new HttpClient + { + BaseAddress = new Uri("http://localhost:5000") + }; + client.DefaultRequestHeaders.Authorization = + new AuthenticationHeaderValue("Bearer", token); + return client; + } +} +``` + +## Git and PR Conventions + +### Branch Naming + +``` +feat/router-- +fix/router- +refactor/router- +test/router- +docs/router- +``` + +### Commit Messages + +``` +(): + +[optional body] + +[optional footer] +``` + +Types: `feat`, `fix`, `refactor`, `test`, `docs`, `chore` + +Examples: +``` +feat(router): implement JWT validation with per-endpoint keys + +- Add JwtValidator with configurable key sources +- Support RS256 and ES256 algorithms +- Add JWKS endpoint caching with TTL + +Closes #123 +``` + +### PR Template + +```markdown +## Summary +Brief description of what this PR does. + +## Changes +- Change 1 +- Change 2 +- Change 3 + +## Testing +- [ ] Unit tests added/updated +- [ ] Integration tests added/updated +- [ ] Manual testing performed + +## Checklist +- [ ] Code follows project conventions +- [ ] Documentation updated +- [ ] No breaking changes (or documented if any) +- [ ] All tests pass +``` + +## Common Pitfalls to Avoid + +### Performance + +```csharp +// ❌ BAD: Allocating in hot path +public bool MatchRoute(string path) +{ + var parts = path.Split('/'); // Allocation + // ... +} + +// ✅ GOOD: Use Span for parsing +public bool MatchRoute(ReadOnlySpan path) +{ + // Zero-allocation parsing + foreach (var segment in path.Split('/')) + { + // ... + } +} + +// ❌ BAD: Synchronous I/O blocking async context +public async Task ProcessAsync() +{ + var config = File.ReadAllText("config.json"); // Blocking! +} + +// ✅ GOOD: Async all the way +public async Task ProcessAsync() +{ + var config = await File.ReadAllTextAsync("config.json"); +} +``` + +### Thread Safety + +```csharp +// ❌ BAD: Non-thread-safe collection +private readonly Dictionary _routes = new(); + +public void AddRoute(string key, Route route) +{ + _routes[key] = route; // Not thread-safe! +} + +// ✅ GOOD: Thread-safe collection +private readonly ConcurrentDictionary _routes = new(); + +public void AddRoute(string key, Route route) +{ + _routes[key] = route; // Thread-safe +} + +// ✅ GOOD: Immutable update +private ImmutableDictionary _routes = + ImmutableDictionary.Empty; + +public void AddRoute(string key, Route route) +{ + ImmutableInterlocked.AddOrUpdate(ref _routes, key, route, (_, _) => route); +} +``` + +### Resource Management + +```csharp +// ❌ BAD: Not disposing resources +public async Task SendAsync(byte[] data) +{ + var client = new TcpClient(); + await client.ConnectAsync("host", 9100); + await client.GetStream().WriteAsync(data); + // client never disposed! +} + +// ✅ GOOD: Proper disposal +public async Task SendAsync(byte[] data) +{ + using var client = new TcpClient(); + await client.ConnectAsync("host", 9100); + await using var stream = client.GetStream(); + await stream.WriteAsync(data); +} + +// ✅ GOOD: Connection pooling +public class ConnectionPool : IDisposable +{ + private readonly Channel _pool; + + public async Task RentAsync() + { + if (_pool.Reader.TryRead(out var client)) + return client; + return await CreateNewConnectionAsync(); + } + + public void Return(TcpClient client) + { + if (!_pool.Writer.TryWrite(client)) + client.Dispose(); + } +} +``` + +## Deliverables + +| Artifact | Purpose | +|----------|---------| +| This document | Agent implementation guidelines | +| Code templates | Consistent starting points | +| Checklists | Quality gates | +| Test patterns | Consistent testing approach | + +## Next Step + +[Step 29: Integration Testing & CI →](29-Step.md) diff --git a/docs/router/29-Step.md b/docs/router/29-Step.md new file mode 100644 index 000000000..e6b5f7e2c --- /dev/null +++ b/docs/router/29-Step.md @@ -0,0 +1,1684 @@ +# Step 29: Integration Testing & CI + +## Overview + +This final step establishes the comprehensive integration testing framework and CI/CD pipeline for the Stella Router. It ensures all components work together correctly in realistic deployment scenarios and provides automated quality gates for every change. + +## Goals + +1. Create integration test suites covering all component interactions +2. Implement performance benchmarks with regression detection +3. Configure CI/CD pipelines for automated testing +4. Establish deployment validation tests +5. Create chaos testing for resilience verification + +## Integration Test Architecture + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ Integration Test Layers │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ E2E Tests │ │ +│ │ Full deployment simulation with external services │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ Component Integration Tests │ │ +│ │ Gateway + Transport + Microservice + Security │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ Contract Tests │ │ +│ │ API contracts, Protocol compatibility │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ Unit Tests │ │ +│ │ Individual component tests (covered in previous steps) │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +## Component Integration Tests + +### Test Infrastructure + +```csharp +// Tests/Integration/StellaOps.Router.Integration.Tests/Infrastructure/IntegrationTestBase.cs +namespace StellaOps.Router.Integration.Tests.Infrastructure; + +public abstract class IntegrationTestBase : IAsyncLifetime +{ + protected IHost GatewayHost { get; private set; } = null!; + protected IHost MicroserviceHost { get; private set; } = null!; + protected HttpClient HttpClient { get; private set; } = null!; + protected ITransportClient TransportClient { get; private set; } = null!; + + protected virtual int GatewayHttpPort => 15000 + Random.Shared.Next(1000); + protected virtual int GatewayTransportPort => 19000 + Random.Shared.Next(1000); + protected virtual int MicroservicePort => 19500 + Random.Shared.Next(1000); + + protected virtual void ConfigureGateway(WebApplicationBuilder builder) { } + protected virtual void ConfigureMicroservice(StellaMicroserviceBuilder builder) { } + + public async Task InitializeAsync() + { + // Start microservice first + MicroserviceHost = await CreateMicroserviceHostAsync(); + await MicroserviceHost.StartAsync(); + + // Then start gateway + GatewayHost = await CreateGatewayHostAsync(); + await GatewayHost.StartAsync(); + + // Create clients + HttpClient = new HttpClient + { + BaseAddress = new Uri($"http://localhost:{GatewayHttpPort}") + }; + + TransportClient = GatewayHost.Services.GetRequiredService(); + + // Wait for services to be ready + await WaitForReadyAsync(); + } + + public async Task DisposeAsync() + { + HttpClient?.Dispose(); + + if (GatewayHost != null) + { + await GatewayHost.StopAsync(); + GatewayHost.Dispose(); + } + + if (MicroserviceHost != null) + { + await MicroserviceHost.StopAsync(); + MicroserviceHost.Dispose(); + } + } + + private async Task CreateGatewayHostAsync() + { + var builder = WebApplication.CreateBuilder(); + + builder.WebHost.UseUrls($"http://localhost:{GatewayHttpPort}"); + + builder.Services.AddStellaRouter(options => + { + options.Routes = GetRouteConfiguration(); + options.Transport.DefaultPort = MicroservicePort; + }); + + builder.Services.AddStellaJwtValidation(options => + { + options.Issuer = "https://test.auth.local"; + options.Audience = "stella-router-test"; + options.SigningKey = TestKeys.SymmetricKey; + }); + + builder.Services.AddStellaRateLimiting(options => + { + options.DefaultLimits = new RateLimitConfiguration + { + RequestsPerMinute = 1000, + WindowSize = TimeSpan.FromMinutes(1) + }; + }); + + ConfigureGateway(builder); + + var app = builder.Build(); + + app.UseRouting(); + app.UseStellaRouter(); + app.MapStellaHealthChecks(); + + return app; + } + + private async Task CreateMicroserviceHostAsync() + { + var builder = StellaMicroservice.CreateBuilder(); + + builder.UseTransport("tcp", options => + { + options.Host = "127.0.0.1"; + options.Port = MicroservicePort; + }); + + builder.AddHandler(); + builder.AddHandler(); + builder.AddHandler(); + + ConfigureMicroservice(builder); + + return builder.Build(); + } + + protected virtual List GetRouteConfiguration() + { + return new List + { + new() + { + Path = "/users/**", + Method = "*", + Handler = "microservice", + Target = "users-service", + RequiredClaims = new[] { "user:read" } + }, + new() + { + Path = "/products/**", + Method = "*", + Handler = "microservice", + Target = "products-service" + }, + new() + { + Path = "/orders/**", + Method = "*", + Handler = "microservice", + Target = "orders-service", + RequiredClaims = new[] { "order:access" }, + RateLimitKey = "user_id", + RateLimitRequests = 100 + } + }; + } + + private async Task WaitForReadyAsync() + { + var timeout = TimeSpan.FromSeconds(30); + var sw = Stopwatch.StartNew(); + + while (sw.Elapsed < timeout) + { + try + { + var response = await HttpClient.GetAsync("/health/ready"); + if (response.IsSuccessStatusCode) + return; + } + catch + { + // Keep trying + } + + await Task.Delay(100); + } + + throw new TimeoutException("Services failed to become ready"); + } + + protected HttpClient CreateAuthenticatedClient(Dictionary? claims = null) + { + var token = TestTokenGenerator.Generate(claims ?? new Dictionary + { + ["sub"] = "test-user", + ["user:read"] = true, + ["user:write"] = true + }); + + var client = new HttpClient + { + BaseAddress = new Uri($"http://localhost:{GatewayHttpPort}") + }; + client.DefaultRequestHeaders.Authorization = + new AuthenticationHeaderValue("Bearer", token); + + return client; + } +} +``` + +### Gateway + Microservice Integration Tests + +```csharp +// Tests/Integration/StellaOps.Router.Integration.Tests/GatewayMicroserviceTests.cs +namespace StellaOps.Router.Integration.Tests; + +public class GatewayMicroserviceTests : IntegrationTestBase +{ + [Fact] + public async Task Request_FlowsThroughGatewayToMicroservice() + { + // Arrange + using var client = CreateAuthenticatedClient(); + + // Act + var response = await client.GetAsync("/users/123"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + + var user = await response.Content.ReadFromJsonAsync(); + Assert.NotNull(user); + Assert.Equal("123", user.Id); + } + + [Fact] + public async Task Claims_ArePropagatedToMicroservice() + { + // Arrange + var claims = new Dictionary + { + ["sub"] = "claim-test-user", + ["user:read"] = true, + ["custom_claim"] = "custom_value", + ["role"] = "admin" + }; + + using var client = CreateAuthenticatedClient(claims); + + // Act + var response = await client.GetAsync("/users/me/claims"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + + var receivedClaims = await response.Content + .ReadFromJsonAsync>(); + + Assert.Equal("claim-test-user", receivedClaims!["sub"].ToString()); + Assert.Equal("custom_value", receivedClaims["custom_claim"].ToString()); + Assert.Equal("admin", receivedClaims["role"].ToString()); + } + + [Fact] + public async Task LargePayload_HandledCorrectly() + { + // Arrange + using var client = CreateAuthenticatedClient(new Dictionary + { + ["sub"] = "test-user", + ["user:write"] = true + }); + + var largeData = new string('x', 1_000_000); // 1MB payload + + // Act + var response = await client.PostAsJsonAsync("/users/data", new { Data = largeData }); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + + var result = await response.Content.ReadFromJsonAsync(); + Assert.Equal(1_000_000, result!.ReceivedLength); + } + + [Fact] + public async Task ConcurrentRequests_HandledCorrectly() + { + // Arrange + using var client = CreateAuthenticatedClient(); + var requestCount = 100; + + // Act + var tasks = Enumerable.Range(0, requestCount) + .Select(i => client.GetAsync($"/users/{i}")); + + var responses = await Task.WhenAll(tasks); + + // Assert + Assert.All(responses, r => Assert.Equal(HttpStatusCode.OK, r.StatusCode)); + } + + [Fact] + public async Task MicroserviceTimeout_ReturnsGatewayTimeout() + { + // Arrange + using var client = CreateAuthenticatedClient(); + + // Act - endpoint that simulates slow response + var response = await client.GetAsync("/users/slow?delay=35000"); // 35 seconds + + // Assert - should timeout at 30 seconds + Assert.Equal(HttpStatusCode.GatewayTimeout, response.StatusCode); + } + + [Fact] + public async Task MicroserviceError_ReturnsBadGateway() + { + // Arrange + using var client = CreateAuthenticatedClient(); + + // Act - endpoint that throws exception + var response = await client.GetAsync("/users/error"); + + // Assert + Assert.Equal(HttpStatusCode.BadGateway, response.StatusCode); + } +} +``` + +### Security Integration Tests + +```csharp +// Tests/Integration/StellaOps.Router.Integration.Tests/SecurityIntegrationTests.cs +namespace StellaOps.Router.Integration.Tests; + +public class SecurityIntegrationTests : IntegrationTestBase +{ + [Fact] + public async Task NoToken_ReturnsUnauthorized() + { + // Arrange - no auth header + + // Act + var response = await HttpClient.GetAsync("/users/123"); + + // Assert + Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode); + } + + [Fact] + public async Task InvalidToken_ReturnsUnauthorized() + { + // Arrange + HttpClient.DefaultRequestHeaders.Authorization = + new AuthenticationHeaderValue("Bearer", "invalid.token.here"); + + // Act + var response = await HttpClient.GetAsync("/users/123"); + + // Assert + Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode); + } + + [Fact] + public async Task ExpiredToken_ReturnsUnauthorized() + { + // Arrange + var expiredToken = TestTokenGenerator.GenerateExpired(); + HttpClient.DefaultRequestHeaders.Authorization = + new AuthenticationHeaderValue("Bearer", expiredToken); + + // Act + var response = await HttpClient.GetAsync("/users/123"); + + // Assert + Assert.Equal(HttpStatusCode.Unauthorized, response.StatusCode); + } + + [Fact] + public async Task MissingRequiredClaim_ReturnsForbidden() + { + // Arrange - token without required 'user:read' claim + var token = TestTokenGenerator.Generate(new Dictionary + { + ["sub"] = "test-user" + // Missing user:read claim + }); + + HttpClient.DefaultRequestHeaders.Authorization = + new AuthenticationHeaderValue("Bearer", token); + + // Act + var response = await HttpClient.GetAsync("/users/123"); + + // Assert + Assert.Equal(HttpStatusCode.Forbidden, response.StatusCode); + } + + [Fact] + public async Task ValidTokenWithClaims_ReturnsSuccess() + { + // Arrange + using var client = CreateAuthenticatedClient(new Dictionary + { + ["sub"] = "test-user", + ["user:read"] = true + }); + + // Act + var response = await client.GetAsync("/users/123"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + } + + [Fact] + public async Task ClaimHydration_EnrichesTokenClaims() + { + // Arrange + using var client = CreateAuthenticatedClient(new Dictionary + { + ["sub"] = "hydration-test-user", + ["user:read"] = true + }); + + // Act + var response = await client.GetAsync("/users/me/enriched-claims"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + + var claims = await response.Content.ReadFromJsonAsync>(); + + // Hydrated claims should be present + Assert.True(claims!.ContainsKey("org_id")); + Assert.True(claims.ContainsKey("permissions")); + Assert.True(claims.ContainsKey("feature_flags")); + } +} +``` + +### Rate Limiting Integration Tests + +```csharp +// Tests/Integration/StellaOps.Router.Integration.Tests/RateLimitingIntegrationTests.cs +namespace StellaOps.Router.Integration.Tests; + +public class RateLimitingIntegrationTests : IntegrationTestBase +{ + protected override void ConfigureGateway(WebApplicationBuilder builder) + { + builder.Services.Configure(options => + { + options.DefaultLimits = new RateLimitConfiguration + { + RequestsPerMinute = 10, + WindowSize = TimeSpan.FromSeconds(10) + }; + }); + } + + [Fact] + public async Task ExceedingRateLimit_ReturnsTooManyRequests() + { + // Arrange + using var client = CreateAuthenticatedClient(new Dictionary + { + ["sub"] = "rate-limit-test-user", + ["order:access"] = true, + ["user_id"] = "rl-user-1" + }); + + // Act - send requests beyond the limit + var responses = new List(); + for (int i = 0; i < 15; i++) + { + responses.Add(await client.GetAsync("/orders")); + } + + // Assert - first 10 should succeed, rest should be rate limited + var successCount = responses.Count(r => r.StatusCode == HttpStatusCode.OK); + var rateLimitedCount = responses.Count(r => r.StatusCode == HttpStatusCode.TooManyRequests); + + Assert.Equal(10, successCount); + Assert.Equal(5, rateLimitedCount); + } + + [Fact] + public async Task RateLimitHeaders_AreReturned() + { + // Arrange + using var client = CreateAuthenticatedClient(new Dictionary + { + ["sub"] = "header-test-user", + ["order:access"] = true, + ["user_id"] = "rl-user-2" + }); + + // Act + var response = await client.GetAsync("/orders"); + + // Assert + Assert.True(response.Headers.Contains("X-RateLimit-Limit")); + Assert.True(response.Headers.Contains("X-RateLimit-Remaining")); + Assert.True(response.Headers.Contains("X-RateLimit-Reset")); + + var limit = int.Parse(response.Headers.GetValues("X-RateLimit-Limit").First()); + var remaining = int.Parse(response.Headers.GetValues("X-RateLimit-Remaining").First()); + + Assert.Equal(10, limit); + Assert.Equal(9, remaining); + } + + [Fact] + public async Task DifferentUsers_HaveIndependentLimits() + { + // Arrange + using var client1 = CreateAuthenticatedClient(new Dictionary + { + ["sub"] = "user-1", + ["order:access"] = true, + ["user_id"] = "independent-user-1" + }); + + using var client2 = CreateAuthenticatedClient(new Dictionary + { + ["sub"] = "user-2", + ["order:access"] = true, + ["user_id"] = "independent-user-2" + }); + + // Act - exhaust limit for user 1 + for (int i = 0; i < 12; i++) + { + await client1.GetAsync("/orders"); + } + + // User 2 should still have their full limit + var response = await client2.GetAsync("/orders"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + + var remaining = int.Parse(response.Headers.GetValues("X-RateLimit-Remaining").First()); + Assert.Equal(9, remaining); + } + + [Fact] + public async Task RateLimit_ResetsAfterWindow() + { + // Arrange + using var client = CreateAuthenticatedClient(new Dictionary + { + ["sub"] = "reset-test-user", + ["order:access"] = true, + ["user_id"] = "rl-reset-user" + }); + + // Act - exhaust limit + for (int i = 0; i < 12; i++) + { + await client.GetAsync("/orders"); + } + + // Wait for window to reset + await Task.Delay(TimeSpan.FromSeconds(11)); + + // Should be able to make requests again + var response = await client.GetAsync("/orders"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + } +} +``` + +### Transport Integration Tests + +```csharp +// Tests/Integration/StellaOps.Router.Integration.Tests/TransportIntegrationTests.cs +namespace StellaOps.Router.Integration.Tests; + +public class TransportIntegrationTests : IntegrationTestBase +{ + [Fact] + public async Task TcpTransport_HandlesConnectionDrop() + { + // Arrange + using var client = CreateAuthenticatedClient(); + + // First request establishes connection + await client.GetAsync("/users/123"); + + // Simulate connection drop by restarting microservice + await MicroserviceHost.StopAsync(); + await MicroserviceHost.StartAsync(); + await Task.Delay(1000); // Wait for reconnection + + // Act - should auto-reconnect + var response = await client.GetAsync("/users/456"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + } + + [Fact] + public async Task Heartbeat_KeepsConnectionAlive() + { + // Arrange + using var client = CreateAuthenticatedClient(); + + // Establish connection + await client.GetAsync("/users/123"); + + // Wait longer than idle timeout but within heartbeat interval + await Task.Delay(TimeSpan.FromSeconds(45)); + + // Act - connection should still be alive + var response = await client.GetAsync("/users/456"); + + // Assert + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + } + + [Fact] + public async Task ConnectionPooling_ReusesConnections() + { + // Arrange + using var client = CreateAuthenticatedClient(); + var connectionCountBefore = GetActiveConnectionCount(); + + // Act - make many requests + var tasks = Enumerable.Range(0, 50) + .Select(_ => client.GetAsync("/products/1")); + + await Task.WhenAll(tasks); + + var connectionCountAfter = GetActiveConnectionCount(); + + // Assert - should use pooled connections, not 50 new ones + Assert.True(connectionCountAfter - connectionCountBefore < 10); + } + + private int GetActiveConnectionCount() + { + var metrics = GatewayHost.Services.GetRequiredService(); + return metrics.ActiveConnections; + } +} +``` + +## Performance Benchmarks + +### Benchmark Framework + +```csharp +// Tests/Benchmarks/StellaOps.Router.Benchmarks/RouterBenchmarks.cs +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Running; + +namespace StellaOps.Router.Benchmarks; + +[MemoryDiagnoser] +[ThreadingDiagnoser] +public class RouterBenchmarks +{ + private HttpClient _client = null!; + private IHost _gatewayHost = null!; + private IHost _microserviceHost = null!; + private string _validToken = null!; + + [GlobalSetup] + public async Task Setup() + { + _microserviceHost = await CreateMicroserviceHostAsync(); + await _microserviceHost.StartAsync(); + + _gatewayHost = await CreateGatewayHostAsync(); + await _gatewayHost.StartAsync(); + + _client = new HttpClient + { + BaseAddress = new Uri("http://localhost:5000") + }; + + _validToken = TestTokenGenerator.Generate(new Dictionary + { + ["sub"] = "bench-user", + ["user:read"] = true + }); + + _client.DefaultRequestHeaders.Authorization = + new AuthenticationHeaderValue("Bearer", _validToken); + + // Warmup + await _client.GetAsync("/users/1"); + } + + [GlobalCleanup] + public async Task Cleanup() + { + _client.Dispose(); + await _gatewayHost.StopAsync(); + await _microserviceHost.StopAsync(); + _gatewayHost.Dispose(); + _microserviceHost.Dispose(); + } + + [Benchmark(Baseline = true)] + public async Task SimpleGetRequest() + { + var response = await _client.GetAsync("/users/123"); + response.EnsureSuccessStatusCode(); + } + + [Benchmark] + public async Task GetRequestWithQueryParams() + { + var response = await _client.GetAsync("/users?page=1&size=20&search=test"); + response.EnsureSuccessStatusCode(); + } + + [Benchmark] + public async Task PostRequestWithSmallBody() + { + var content = new StringContent( + """{"name": "Test User", "email": "test@example.com"}""", + Encoding.UTF8, + "application/json"); + + var response = await _client.PostAsync("/users", content); + response.EnsureSuccessStatusCode(); + } + + [Benchmark] + public async Task PostRequestWithLargeBody() + { + var data = new string('x', 100_000); + var content = new StringContent( + $$"""{"data": "{{data}}"}""", + Encoding.UTF8, + "application/json"); + + var response = await _client.PostAsync("/users/data", content); + response.EnsureSuccessStatusCode(); + } + + [Benchmark] + [Arguments(10)] + [Arguments(50)] + [Arguments(100)] + public async Task ConcurrentRequests(int concurrency) + { + var tasks = Enumerable.Range(0, concurrency) + .Select(_ => _client.GetAsync("/users/123")); + + var responses = await Task.WhenAll(tasks); + + foreach (var response in responses) + { + response.EnsureSuccessStatusCode(); + } + } +} + +[MemoryDiagnoser] +public class JwtValidationBenchmarks +{ + private IJwtValidator _validator = null!; + private string _validToken = null!; + private string _tokenWithManyClaims = null!; + + [GlobalSetup] + public void Setup() + { + var options = Options.Create(new JwtValidationOptions + { + Issuer = "https://auth.example.com", + Audience = "stella-router", + SigningKey = TestKeys.SymmetricKey + }); + + _validator = new JwtValidator(options, new InMemoryKeyProvider(), + NullLogger.Instance); + + _validToken = TestTokenGenerator.Generate(new Dictionary + { + ["sub"] = "test-user" + }); + + _tokenWithManyClaims = TestTokenGenerator.Generate( + Enumerable.Range(0, 50) + .ToDictionary(i => $"claim_{i}", i => (object)$"value_{i}")); + } + + [Benchmark(Baseline = true)] + public async Task ValidateSimpleToken() + { + await _validator.ValidateAsync(_validToken); + } + + [Benchmark] + public async Task ValidateTokenWithManyClaims() + { + await _validator.ValidateAsync(_tokenWithManyClaims); + } +} + +[MemoryDiagnoser] +public class SerializationBenchmarks +{ + private readonly IPayloadSerializer _messagePackSerializer = new MessagePackPayloadSerializer(); + private readonly IPayloadSerializer _jsonSerializer = new JsonPayloadSerializer(); + private RequestPayload _smallPayload = null!; + private RequestPayload _largePayload = null!; + + [GlobalSetup] + public void Setup() + { + _smallPayload = new RequestPayload + { + Method = "GET", + Path = "/users/123", + Headers = new Dictionary + { + ["Content-Type"] = "application/json", + ["Authorization"] = "Bearer token" + }, + Body = Array.Empty() + }; + + _largePayload = new RequestPayload + { + Method = "POST", + Path = "/users", + Headers = new Dictionary + { + ["Content-Type"] = "application/json" + }, + Body = new byte[100_000] + }; + } + + [Benchmark] + public byte[] MessagePack_SerializeSmall() + { + return _messagePackSerializer.Serialize(_smallPayload); + } + + [Benchmark] + public byte[] Json_SerializeSmall() + { + return _jsonSerializer.Serialize(_smallPayload); + } + + [Benchmark] + public byte[] MessagePack_SerializeLarge() + { + return _messagePackSerializer.Serialize(_largePayload); + } + + [Benchmark] + public byte[] Json_SerializeLarge() + { + return _jsonSerializer.Serialize(_largePayload); + } +} +``` + +### Performance Regression Detection + +```csharp +// Tests/Benchmarks/StellaOps.Router.Benchmarks/RegressionDetector.cs +namespace StellaOps.Router.Benchmarks; + +public class RegressionDetector +{ + private readonly string _baselinePath; + private readonly double _regressionThreshold; + + public RegressionDetector(string baselinePath, double regressionThreshold = 0.10) + { + _baselinePath = baselinePath; + _regressionThreshold = regressionThreshold; + } + + public RegressionReport Compare(BenchmarkReport current) + { + var baseline = LoadBaseline(); + var regressions = new List(); + + foreach (var currentResult in current.Results) + { + if (baseline.TryGetValue(currentResult.Name, out var baselineResult)) + { + var percentChange = (currentResult.MeanNs - baselineResult.MeanNs) + / baselineResult.MeanNs; + + if (percentChange > _regressionThreshold) + { + regressions.Add(new BenchmarkRegression + { + BenchmarkName = currentResult.Name, + BaselineMeanNs = baselineResult.MeanNs, + CurrentMeanNs = currentResult.MeanNs, + PercentChange = percentChange * 100 + }); + } + } + } + + return new RegressionReport + { + Regressions = regressions, + HasRegressions = regressions.Count > 0 + }; + } + + private Dictionary LoadBaseline() + { + if (!File.Exists(_baselinePath)) + return new Dictionary(); + + var json = File.ReadAllText(_baselinePath); + return JsonSerializer.Deserialize>(json) + ?? new Dictionary(); + } + + public void SaveBaseline(BenchmarkReport report) + { + var baseline = report.Results.ToDictionary(r => r.Name); + var json = JsonSerializer.Serialize(baseline, new JsonSerializerOptions + { + WriteIndented = true + }); + File.WriteAllText(_baselinePath, json); + } +} + +public record BenchmarkResult(string Name, double MeanNs, double StdDevNs, long AllocatedBytes); +public record BenchmarkRegression +{ + public string BenchmarkName { get; init; } = ""; + public double BaselineMeanNs { get; init; } + public double CurrentMeanNs { get; init; } + public double PercentChange { get; init; } +} + +public record RegressionReport +{ + public List Regressions { get; init; } = new(); + public bool HasRegressions { get; init; } +} +``` + +## Chaos Testing + +### Chaos Test Framework + +```csharp +// Tests/Chaos/StellaOps.Router.Chaos.Tests/ChaosTestBase.cs +namespace StellaOps.Router.Chaos.Tests; + +public abstract class ChaosTestBase : IntegrationTestBase +{ + protected IChaosMonkey ChaosMonkey { get; private set; } = null!; + + public override async Task InitializeAsync() + { + await base.InitializeAsync(); + + ChaosMonkey = GatewayHost.Services.GetRequiredService(); + } + + protected override void ConfigureGateway(WebApplicationBuilder builder) + { + builder.Services.AddStellaChaos(options => + { + options.Enabled = true; + }); + } +} + +public interface IChaosMonkey +{ + void InjectLatency(string target, TimeSpan delay, double probability = 1.0); + void InjectError(string target, Exception error, double probability = 1.0); + void InjectConnectionDrop(string target, double probability = 1.0); + void DisableTarget(string target); + void EnableTarget(string target); + void Reset(); +} + +public class ChaosMonkey : IChaosMonkey +{ + private readonly ConcurrentDictionary _rules = new(); + + public void InjectLatency(string target, TimeSpan delay, double probability = 1.0) + { + _rules[target] = new ChaosRule + { + Type = ChaosType.Latency, + Delay = delay, + Probability = probability + }; + } + + public void InjectError(string target, Exception error, double probability = 1.0) + { + _rules[target] = new ChaosRule + { + Type = ChaosType.Error, + Error = error, + Probability = probability + }; + } + + public void InjectConnectionDrop(string target, double probability = 1.0) + { + _rules[target] = new ChaosRule + { + Type = ChaosType.ConnectionDrop, + Probability = probability + }; + } + + public void DisableTarget(string target) + { + _rules[target] = new ChaosRule + { + Type = ChaosType.Disabled, + Probability = 1.0 + }; + } + + public void EnableTarget(string target) + { + _rules.TryRemove(target, out _); + } + + public void Reset() + { + _rules.Clear(); + } + + public async Task ApplyAsync(string target, CancellationToken cancellationToken) + { + if (!_rules.TryGetValue(target, out var rule)) + return; + + if (Random.Shared.NextDouble() > rule.Probability) + return; + + switch (rule.Type) + { + case ChaosType.Latency: + await Task.Delay(rule.Delay, cancellationToken); + break; + case ChaosType.Error: + throw rule.Error!; + case ChaosType.ConnectionDrop: + throw new IOException("Connection forcibly closed by chaos monkey"); + case ChaosType.Disabled: + throw new ServiceUnavailableException($"Target {target} is disabled"); + } + } +} + +public enum ChaosType { Latency, Error, ConnectionDrop, Disabled } + +public class ChaosRule +{ + public ChaosType Type { get; init; } + public TimeSpan Delay { get; init; } + public Exception? Error { get; init; } + public double Probability { get; init; } +} +``` + +### Chaos Test Scenarios + +```csharp +// Tests/Chaos/StellaOps.Router.Chaos.Tests/ResilienceChaosTests.cs +namespace StellaOps.Router.Chaos.Tests; + +public class ResilienceChaosTests : ChaosTestBase +{ + [Fact] + public async Task CircuitBreaker_OpensOnRepeatedFailures() + { + // Arrange + using var client = CreateAuthenticatedClient(); + ChaosMonkey.InjectError("users-service", new Exception("Service unavailable")); + + // Act - trigger circuit breaker + var responses = new List(); + for (int i = 0; i < 10; i++) + { + responses.Add(await client.GetAsync("/users/123")); + } + + // Assert - circuit should be open now + var lastResponse = responses.Last(); + Assert.Equal(HttpStatusCode.ServiceUnavailable, lastResponse.StatusCode); + + // Verify circuit breaker is open by checking fast fail + var sw = Stopwatch.StartNew(); + var fastFailResponse = await client.GetAsync("/users/456"); + sw.Stop(); + + Assert.True(sw.ElapsedMilliseconds < 100); // Should fail fast + Assert.Equal(HttpStatusCode.ServiceUnavailable, fastFailResponse.StatusCode); + } + + [Fact] + public async Task CircuitBreaker_HalfOpensAndRecovers() + { + // Arrange + using var client = CreateAuthenticatedClient(); + ChaosMonkey.InjectError("users-service", new Exception("Service unavailable")); + + // Trip the circuit breaker + for (int i = 0; i < 10; i++) + { + await client.GetAsync("/users/123"); + } + + // Wait for half-open state + await Task.Delay(TimeSpan.FromSeconds(31)); + + // Remove chaos + ChaosMonkey.Reset(); + + // Act - next request should test the circuit + var response = await client.GetAsync("/users/789"); + + // Assert - should succeed and close circuit + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + } + + [Fact] + public async Task Retry_SucceedsAfterTransientFailure() + { + // Arrange + using var client = CreateAuthenticatedClient(); + var failureCount = 0; + + // Inject 2 failures then succeed + ChaosMonkey.InjectError("users-service", + new TransientException("Temporary failure"), + probability: 0.66); // ~66% chance of failure (2/3 requests fail) + + // Act + var response = await client.GetAsync("/users/123"); + + // Assert - should eventually succeed due to retry + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + } + + [Fact] + public async Task HighLatency_TriggersTimeout() + { + // Arrange + using var client = CreateAuthenticatedClient(); + ChaosMonkey.InjectLatency("users-service", TimeSpan.FromSeconds(35)); + + // Act + var sw = Stopwatch.StartNew(); + var response = await client.GetAsync("/users/123"); + sw.Stop(); + + // Assert + Assert.Equal(HttpStatusCode.GatewayTimeout, response.StatusCode); + Assert.True(sw.ElapsedMilliseconds >= 30000 && sw.ElapsedMilliseconds < 35000); + } + + [Fact] + public async Task PartialOutage_RoutesToHealthyInstances() + { + // This test requires multiple microservice instances + // Arrange + ChaosMonkey.DisableTarget("users-service-1"); + // users-service-2 and users-service-3 remain healthy + + using var client = CreateAuthenticatedClient(); + + // Act + var tasks = Enumerable.Range(0, 30) + .Select(_ => client.GetAsync("/users/123")); + + var responses = await Task.WhenAll(tasks); + + // Assert - all should succeed via healthy instances + Assert.All(responses, r => Assert.Equal(HttpStatusCode.OK, r.StatusCode)); + } + + [Fact] + public async Task GracefulDegradation_WhenAuthorityUnavailable() + { + // Arrange + ChaosMonkey.DisableTarget("authority-service"); + + // Use endpoint that allows degraded mode + using var client = CreateAuthenticatedClient(); + + // Act + var response = await client.GetAsync("/products/123"); // No auth required + + // Assert - should succeed with default/limited functionality + Assert.Equal(HttpStatusCode.OK, response.StatusCode); + + // But authenticated endpoint should fail gracefully + var authResponse = await client.GetAsync("/users/123"); + Assert.Equal(HttpStatusCode.ServiceUnavailable, authResponse.StatusCode); + } +} +``` + +## CI/CD Pipeline Configuration + +### GitHub Actions Workflow + +```yaml +# .github/workflows/router-ci.yml +name: Router CI + +on: + push: + branches: [main] + paths: + - 'src/Router/**' + - 'tests/Router/**' + pull_request: + branches: [main] + paths: + - 'src/Router/**' + - 'tests/Router/**' + +env: + DOTNET_VERSION: '10.0.x' + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Restore dependencies + run: dotnet restore src/Router/StellaOps.Router.sln + + - name: Build + run: dotnet build src/Router/StellaOps.Router.sln --configuration Release --no-restore + + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: router-build + path: src/Router/**/bin/Release/ + + unit-tests: + needs: build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Run unit tests + run: | + dotnet test src/Router/StellaOps.Router.sln \ + --configuration Release \ + --filter "Category!=Integration&Category!=Chaos&Category!=Benchmark" \ + --logger "trx;LogFileName=unit-tests.trx" \ + --collect:"XPlat Code Coverage" + + - name: Upload test results + uses: actions/upload-artifact@v4 + if: always() + with: + name: unit-test-results + path: '**/TestResults/**/*.trx' + + - name: Upload coverage + uses: codecov/codecov-action@v4 + with: + files: '**/coverage.cobertura.xml' + flags: unit-tests + + integration-tests: + needs: build + runs-on: ubuntu-latest + services: + redis: + image: redis:7 + ports: + - 6379:6379 + steps: + - uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Run integration tests + run: | + dotnet test src/Router/StellaOps.Router.sln \ + --configuration Release \ + --filter "Category=Integration" \ + --logger "trx;LogFileName=integration-tests.trx" \ + --collect:"XPlat Code Coverage" + env: + REDIS_CONNECTION: localhost:6379 + + - name: Upload test results + uses: actions/upload-artifact@v4 + if: always() + with: + name: integration-test-results + path: '**/TestResults/**/*.trx' + + chaos-tests: + needs: integration-tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Run chaos tests + run: | + dotnet test tests/Router/StellaOps.Router.Chaos.Tests \ + --configuration Release \ + --logger "trx;LogFileName=chaos-tests.trx" + + - name: Upload test results + uses: actions/upload-artifact@v4 + if: always() + with: + name: chaos-test-results + path: '**/TestResults/**/*.trx' + + benchmarks: + needs: build + runs-on: ubuntu-latest + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + steps: + - uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Download baseline + uses: actions/cache@v4 + with: + path: benchmarks/baseline.json + key: benchmark-baseline-${{ github.sha }} + restore-keys: | + benchmark-baseline- + + - name: Run benchmarks + run: | + dotnet run --project tests/Benchmarks/StellaOps.Router.Benchmarks \ + --configuration Release \ + -- --exporters json + + - name: Check for regressions + run: | + dotnet run --project tests/Benchmarks/StellaOps.Router.Benchmarks.Analyzer \ + -- check \ + --baseline benchmarks/baseline.json \ + --current BenchmarkDotNet.Artifacts/results/*.json \ + --threshold 0.10 + + - name: Update baseline + if: success() + run: | + cp BenchmarkDotNet.Artifacts/results/*.json benchmarks/baseline.json + + - name: Upload benchmark results + uses: actions/upload-artifact@v4 + with: + name: benchmark-results + path: BenchmarkDotNet.Artifacts/ + + security-scan: + needs: build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run security scan + uses: snyk/actions/dotnet@master + with: + args: --file=src/Router/StellaOps.Router.sln + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + + publish: + needs: [unit-tests, integration-tests, chaos-tests, security-scan] + runs-on: ubuntu-latest + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + steps: + - uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Pack NuGet packages + run: | + dotnet pack src/Router/StellaOps.Router.Core \ + --configuration Release \ + --output ./packages + + dotnet pack src/Router/StellaOps.Router.Gateway \ + --configuration Release \ + --output ./packages + + dotnet pack src/Router/StellaOps.Router.Microservice \ + --configuration Release \ + --output ./packages + + - name: Push to NuGet + run: | + dotnet nuget push ./packages/*.nupkg \ + --source https://api.nuget.org/v3/index.json \ + --api-key ${{ secrets.NUGET_API_KEY }} +``` + +### Gitea Actions Workflow + +```yaml +# .gitea/workflows/router-ci.yml +name: Router CI + +on: + push: + branches: [main] + paths: + - 'src/Router/**' + - 'tests/Router/**' + pull_request: + branches: [main] + +jobs: + build-and-test: + runs-on: ubuntu-latest + container: + image: mcr.microsoft.com/dotnet/sdk:10.0 + steps: + - uses: actions/checkout@v4 + + - name: Restore + run: dotnet restore src/Router/StellaOps.Router.sln + + - name: Build + run: dotnet build src/Router/StellaOps.Router.sln -c Release --no-restore + + - name: Test + run: | + dotnet test src/Router/StellaOps.Router.sln \ + -c Release \ + --no-build \ + --logger "trx" \ + --collect:"XPlat Code Coverage" + + - name: Publish coverage + run: | + dotnet tool install -g dotnet-reportgenerator-globaltool + reportgenerator \ + -reports:**/coverage.cobertura.xml \ + -targetdir:coverage \ + -reporttypes:Html + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: coverage-report + path: coverage/ +``` + +## Test Data Generation + +```csharp +// Tests/Common/StellaOps.Router.Testing/TestDataGenerators.cs +namespace StellaOps.Router.Testing; + +public static class TestTokenGenerator +{ + private static readonly byte[] Key = Encoding.UTF8.GetBytes( + "this-is-a-test-key-for-jwt-tokens-32-bytes!"); + + public static string Generate(Dictionary? claims = null) + { + var tokenHandler = new JwtSecurityTokenHandler(); + var tokenDescriptor = new SecurityTokenDescriptor + { + Issuer = "https://test.auth.local", + Audience = "stella-router-test", + Expires = DateTime.UtcNow.AddHours(1), + SigningCredentials = new SigningCredentials( + new SymmetricSecurityKey(Key), + SecurityAlgorithms.HmacSha256Signature), + Claims = claims ?? new Dictionary() + }; + + var token = tokenHandler.CreateToken(tokenDescriptor); + return tokenHandler.WriteToken(token); + } + + public static string GenerateExpired() + { + var tokenHandler = new JwtSecurityTokenHandler(); + var tokenDescriptor = new SecurityTokenDescriptor + { + Issuer = "https://test.auth.local", + Audience = "stella-router-test", + Expires = DateTime.UtcNow.AddHours(-1), // Expired + SigningCredentials = new SigningCredentials( + new SymmetricSecurityKey(Key), + SecurityAlgorithms.HmacSha256Signature) + }; + + var token = tokenHandler.CreateToken(tokenDescriptor); + return tokenHandler.WriteToken(token); + } +} + +public class TestUserFactory +{ + private int _counter = 0; + + public TestUserDto Create(Action? configure = null) + { + var user = new TestUserDto + { + Id = Guid.NewGuid().ToString(), + Name = $"Test User {Interlocked.Increment(ref _counter)}", + Email = $"user{_counter}@test.local", + CreatedAt = DateTime.UtcNow + }; + + configure?.Invoke(user); + return user; + } + + public IEnumerable CreateMany(int count) + { + return Enumerable.Range(0, count).Select(_ => Create()); + } +} +``` + +## YAML Configuration + +```yaml +# config/test-config.yaml +testing: + integration: + gatewayPort: 15000 + microservicePort: 19000 + timeout: 30s + retryAttempts: 3 + + chaos: + enabled: true + defaultLatency: 100ms + defaultFailureProbability: 0.1 + circuitBreakerThreshold: 5 + circuitBreakerTimeout: 30s + + benchmarks: + warmupIterations: 3 + targetIterations: 10 + regressionThreshold: 0.10 + + coverage: + minimumCoverage: 80 + excludePatterns: + - "**/Generated/**" + - "**/Migrations/**" +``` + +## Deliverables + +| Artifact | Path | +|----------|------| +| Integration Test Base | `tests/Router/StellaOps.Router.Integration.Tests/` | +| Chaos Test Framework | `tests/Router/StellaOps.Router.Chaos.Tests/` | +| Benchmarks | `tests/Router/StellaOps.Router.Benchmarks/` | +| CI Workflows | `.gitea/workflows/router-ci.yml` | +| Test Utilities | `tests/Router/StellaOps.Router.Testing/` | + +## Implementation Complete + +Congratulations! You have completed all 29 steps of the Stella Router implementation plan. The router now includes: + +**Phase 1-2: Core Infrastructure** +- Route configuration and matching +- Route table with concurrent updates +- Request pipeline and middleware +- JWT authentication with per-endpoint keys +- Claim hydration system +- Tiered rate limiting + +**Phase 3-4: Transport Layer** +- Request/response serialization (MessagePack) +- Frame-based protocol +- InMemory, TCP, and TLS transports +- Connection pooling and management + +**Phase 5: Route Handlers** +- Microservice handler +- GraphQL handler +- S3/Storage handler +- Reverse proxy handler + +**Phase 6: Microservice SDK** +- Host builder with fluent API +- Endpoint discovery and registration +- Request/response context +- Dual exposure mode + +**Phase 7: Observability & Resilience** +- Structured logging with correlation +- OpenTelemetry tracing +- Prometheus metrics +- Health checks +- Circuit breaker and retry policies +- Configuration hot-reload + +**Phase 8: Quality & Deployment** +- Reference implementation and examples +- Migration tooling +- Agent guidelines +- Integration testing and CI/CD + +[← Back to Specs](Specs.md) | [Start Implementation →](01-Step.md) diff --git a/docs/router/SPRINT_7000_0001_0001_router_skeleton.md b/docs/router/SPRINT_7000_0001_0001_router_skeleton.md index 44a7a25c4..829e8ee13 100644 --- a/docs/router/SPRINT_7000_0001_0001_router_skeleton.md +++ b/docs/router/SPRINT_7000_0001_0001_router_skeleton.md @@ -1,41 +1,121 @@ -# Sprint 7000·0001·0001 · Router Skeleton +# Sprint 7000-0001-0001 · Router Foundation · Project Skeleton ## Topic & Scope -- Stand up the dedicated StellaOps Router repo skeleton under `docs/router` as per `specs.md` / `01-Step.md`. -- Produce the empty solution structure, projects, references, and placeholder docs ready for future transport/SDK work. -- Enforce .NET 10 (`net10.0`) across all new projects; ignore prior net8 defaults. -- **Working directory:** `docs/router`. + +Phase 1 of Router implementation: establish the project skeleton with all required directories, solution files, and empty stubs. This sprint creates the structural foundation that all subsequent router sprints depend on. + +**Goal:** Get a clean, compiling skeleton in place that matches the spec and folder conventions, with zero real logic and minimal dependencies. + +**Working directories:** +- `src/__Libraries/StellaOps.Router.Common/` +- `src/__Libraries/StellaOps.Router.Config/` +- `src/__Libraries/StellaOps.Microservice/` +- `src/__Libraries/StellaOps.Microservice.SourceGen/` +- `src/Gateway/StellaOps.Gateway.WebService/` +- `tests/StellaOps.Router.Common.Tests/` +- `tests/StellaOps.Gateway.WebService.Tests/` +- `tests/StellaOps.Microservice.Tests/` + +**Isolation strategy:** Router uses a separate `StellaOps.Router.sln` solution file to enable fully independent building and testing. This prevents any impact on the main `StellaOps.sln` until the migration phase. ## Dependencies & Concurrency -- Depends on `docs/router/specs.md` remaining the authoritative requirements source. -- No upstream sprint blockers; this spin-off is self-contained. -- Can run in parallel with other repo work because it writes only under `docs/router`. + +- **Upstream:** None. This is the first router sprint. +- **Downstream:** All other router sprints depend on this skeleton. +- **Parallel work:** None possible until this sprint completes. +- **Cross-module impact:** None. All work is in new directories. ## Documentation Prerequisites -- `docs/router/specs.md` -- `docs/router/implplan.md` -- `docs/router/01-Step.md` + +- `docs/router/specs.md` (canonical specification - READ FIRST) +- `docs/router/implplan.md` (implementation plan overview) +- `docs/router/01-Step.md` (detailed task breakdown for this sprint) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Invariants (from specs.md) + +Before coding, acknowledge these non-negotiables: +- Method + Path identity for endpoints +- Strict semver for versions +- Region from `GatewayNodeConfig.Region` (no host/header derivation) +- No HTTP transport for microservice-to-router communications +- Single connection carrying HELLO + HEARTBEAT + REQUEST/RESPONSE + CANCEL +- Router treats body as opaque bytes/streams +- `RequiringClaims` replaces any form of `AllowedRoles` ## Delivery Tracker -| # | Task ID | Status | Key dependency / next step | Owners | Task Definition | -| --- | --- | --- | --- | --- | --- | -| 1 | ROUTER-SKEL-SETUP | TODO | Read specs + step docs | Skeleton Agent | Create repo folders (`src/`, `src/__Libraries/`, `tests/`, `docs/router`) & add `README.md` pointer. | -| 2 | ROUTER-SKEL-SOLUTION | TODO | Task 1 | Skeleton Agent | Generate `StellaOps.Router.sln`, add Gateway + library + test projects targeting `net10.0`. | -| 3 | ROUTER-SKEL-REFS | TODO | Task 2 | Skeleton Agent | Wire project references per plan (Gateway→Common+Config, etc.). | -| 4 | ROUTER-SKEL-BUILDPROPS | TODO | Task 2 | Infra Agent | Add repo-level `Directory.Build.props` pinning `net10.0`, nullable, implicit usings. | -| 5 | ROUTER-SKEL-STUBS | TODO | Tasks 2-4 | Common/Microservice Agents | Add placeholder types/extension methods per `01-Step.md` (no logic). | -| 6 | ROUTER-SKEL-TESTS | TODO | Task 5 | QA Agent | Create dummy `[Fact]` tests in each test project so `dotnet test` passes. | -| 7 | ROUTER-SKEL-CI | TODO | Tasks 2-6 | Infra Agent | Configure CI pipeline running `dotnet restore/build/test` on solution. | + +| # | Task ID | Status | Description | Working Directory | +|---|---------|--------|-------------|-------------------| +| 1 | SKEL-001 | TODO | Create directory structure (`src/__Libraries/`, `src/Gateway/`, `tests/`) | repo root | +| 2 | SKEL-002 | TODO | Create `StellaOps.Router.sln` solution file at repo root | repo root | +| 3 | SKEL-003 | TODO | Create `StellaOps.Router.Common` classlib project | `src/__Libraries/StellaOps.Router.Common/` | +| 4 | SKEL-004 | TODO | Create `StellaOps.Router.Config` classlib project | `src/__Libraries/StellaOps.Router.Config/` | +| 5 | SKEL-005 | TODO | Create `StellaOps.Microservice` classlib project | `src/__Libraries/StellaOps.Microservice/` | +| 6 | SKEL-006 | TODO | Create `StellaOps.Microservice.SourceGen` classlib stub | `src/__Libraries/StellaOps.Microservice.SourceGen/` | +| 7 | SKEL-007 | TODO | Create `StellaOps.Gateway.WebService` webapi project | `src/Gateway/StellaOps.Gateway.WebService/` | +| 8 | SKEL-008 | TODO | Create xunit test projects for Common, Gateway, Microservice | `tests/` | +| 9 | SKEL-009 | TODO | Wire project references per dependency graph | all projects | +| 10 | SKEL-010 | TODO | Add `Directory.Build.props` with common settings (net10.0, nullable, LangVersion) | repo root (router scope) | +| 11 | SKEL-011 | TODO | Stub empty placeholder types in each project (no logic) | all projects | +| 12 | SKEL-012 | TODO | Add dummy smoke tests so CI passes | `tests/` | +| 13 | SKEL-013 | TODO | Verify `dotnet build StellaOps.Router.sln` succeeds | repo root | +| 14 | SKEL-014 | TODO | Verify `dotnet test StellaOps.Router.sln` passes | repo root | +| 15 | SKEL-015 | TODO | Update `docs/router/README.md` with solution overview | `docs/router/` | + +## Project Reference Graph + +``` +StellaOps.Gateway.WebService + ├── StellaOps.Router.Common + └── StellaOps.Router.Config + └── StellaOps.Router.Common + +StellaOps.Microservice + └── StellaOps.Router.Common + +StellaOps.Microservice.SourceGen + (no references yet - stub only) + +Test projects reference their corresponding main projects. +``` + +## Stub Types to Create + +### StellaOps.Router.Common +- Enums: `TransportType`, `FrameType`, `InstanceHealthStatus` +- Models: `ClaimRequirement`, `EndpointDescriptor`, `InstanceDescriptor`, `ConnectionState`, `Frame` +- Interfaces: `IGlobalRoutingState`, `IRoutingPlugin`, `ITransportServer`, `ITransportClient` + +### StellaOps.Router.Config +- `RouterConfig`, `ServiceConfig`, `PayloadLimits` (property-only classes) + +### StellaOps.Microservice +- `StellaMicroserviceOptions`, `RouterEndpointConfig` +- `ServiceCollectionExtensions.AddStellaMicroservice()` (empty body) + +### StellaOps.Gateway.WebService +- `GatewayNodeConfig` with Region, NodeId, Environment +- Minimal `Program.cs` that builds and runs (no logic) + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] `dotnet build StellaOps.Router.sln` succeeds with zero warnings +2. [ ] `dotnet test StellaOps.Router.sln` passes (even with dummy tests) +3. [ ] All project names match spec: `StellaOps.Gateway.WebService`, `StellaOps.Router.Common`, `StellaOps.Router.Config`, `StellaOps.Microservice` +4. [ ] No real business logic exists (no transport logic, no routing decisions, no YAML parsing) +5. [ ] `docs/router/README.md` exists and points to `specs.md` ## Execution Log + | Date (UTC) | Update | Owner | -| --- | --- | --- | -| 2025-12-02 | Created sprint skeleton per router spin-off instructions. | Planning | +|------------|--------|-------| +| | | | ## Decisions & Risks -- Use .NET 10 baseline even though other modules still target net8; future agents must not downgrade frameworks. -- Scope intentionally limited to `docs/router` to avoid cross-repo conflicts; any shared assets must be duplicated or referenced via documentation until later alignment. -- Risk: missing AGENTS.md for this folder—future sprint should establish one if work extends beyond skeleton. -## Next Checkpoints -- 2025-12-04: Verify solution + CI scaffold committed and passing. +- Router uses a separate solution file (`StellaOps.Router.sln`) to enable isolated development. This will be merged into main `StellaOps.sln` during the migration phase. +- Target framework is `net10.0` to match the rest of StellaOps. +- `StellaOps.Microservice.SourceGen` is created as a plain classlib for now; it will be converted to a Source Generator project in a later sprint. diff --git a/docs/router/SPRINT_7000_0001_0002_router_common.md b/docs/router/SPRINT_7000_0001_0002_router_common.md new file mode 100644 index 000000000..87eacfa5a --- /dev/null +++ b/docs/router/SPRINT_7000_0001_0002_router_common.md @@ -0,0 +1,157 @@ +# Sprint 7000-0001-0002 · Router Foundation · Common Library Models + +## Topic & Scope + +Phase 2 of Router implementation: implement the shared core model in `StellaOps.Router.Common`. This sprint makes Common the single, stable contract layer that Gateway, Microservice SDK, and transports all depend on. + +**Goal:** Lock down the domain vocabulary. Implement all data types and interfaces with **no behavior** - just shapes that match `specs.md`. + +**Working directory:** `src/__Libraries/StellaOps.Router.Common/` + +**Key principle:** Changes to `StellaOps.Router.Common` after this sprint must be rare and reviewed. Everything else depends on it. + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0001_0001 (skeleton must be complete) +- **Downstream:** All other router sprints depend on these contracts +- **Parallel work:** None possible until this sprint completes +- **Cross-module impact:** None. All work is in `StellaOps.Router.Common` + +## Documentation Prerequisites + +- `docs/router/specs.md` (canonical specification - READ FIRST, sections 2-13) +- `docs/router/02-Step.md` (detailed task breakdown for this sprint) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | CMN-001 | TODO | Create `/Enums/TransportType.cs` with `[Udp, Tcp, Certificate, RabbitMq]` | No HTTP type per spec | +| 2 | CMN-002 | TODO | Create `/Enums/FrameType.cs` with Hello, Heartbeat, EndpointsUpdate, Request, RequestStreamData, Response, ResponseStreamData, Cancel | | +| 3 | CMN-003 | TODO | Create `/Enums/InstanceHealthStatus.cs` with Unknown, Healthy, Degraded, Draining, Unhealthy | | +| 4 | CMN-010 | TODO | Create `/Models/ClaimRequirement.cs` with Type (required) and Value (optional) | Replaces AllowedRoles | +| 5 | CMN-011 | TODO | Create `/Models/EndpointDescriptor.cs` with ServiceName, Version, Method, Path, DefaultTimeout, SupportsStreaming, RequiringClaims | | +| 6 | CMN-012 | TODO | Create `/Models/InstanceDescriptor.cs` with InstanceId, ServiceName, Version, Region | | +| 7 | CMN-013 | TODO | Create `/Models/ConnectionState.cs` with ConnectionId, Instance, Status, LastHeartbeatUtc, AveragePingMs, TransportType, Endpoints | | +| 8 | CMN-014 | TODO | Create `/Models/RoutingContext.cs` matching spec (neutral context, no ASP.NET dependency) | | +| 9 | CMN-015 | TODO | Create `/Models/RoutingDecision.cs` with Endpoint, Connection, TransportType, EffectiveTimeout | | +| 10 | CMN-016 | TODO | Create `/Models/PayloadLimits.cs` with MaxRequestBytesPerCall, MaxRequestBytesPerConnection, MaxAggregateInflightBytes | | +| 11 | CMN-020 | TODO | Create `/Models/Frame.cs` with Type, CorrelationId, Payload | | +| 12 | CMN-021 | TODO | Create `/Models/HelloPayload.cs` with InstanceDescriptor and list of EndpointDescriptors | | +| 13 | CMN-022 | TODO | Create `/Models/HeartbeatPayload.cs` with InstanceId, Status, metrics | | +| 14 | CMN-023 | TODO | Create `/Models/CancelPayload.cs` with Reason | | +| 15 | CMN-030 | TODO | Create `/Abstractions/IGlobalRoutingState.cs` interface | | +| 16 | CMN-031 | TODO | Create `/Abstractions/IRoutingPlugin.cs` interface | | +| 17 | CMN-032 | TODO | Create `/Abstractions/ITransportServer.cs` interface | | +| 18 | CMN-033 | TODO | Create `/Abstractions/ITransportClient.cs` interface | | +| 19 | CMN-034 | TODO | Create `/Abstractions/IRegionProvider.cs` interface (optional, if spec requires) | | +| 20 | CMN-040 | TODO | Write shape tests for EndpointDescriptor, ConnectionState | | +| 21 | CMN-041 | TODO | Write enum completeness tests for FrameType | | +| 22 | CMN-042 | TODO | Verify Common compiles with zero warnings (nullable enabled) | | +| 23 | CMN-043 | TODO | Verify Common only references BCL (no ASP.NET, no serializers) | | + +## File Layout + +``` +/src/__Libraries/StellaOps.Router.Common/ + /Enums/ + TransportType.cs + FrameType.cs + InstanceHealthStatus.cs + /Models/ + ClaimRequirement.cs + EndpointDescriptor.cs + InstanceDescriptor.cs + ConnectionState.cs + RoutingContext.cs + RoutingDecision.cs + PayloadLimits.cs + Frame.cs + HelloPayload.cs + HeartbeatPayload.cs + CancelPayload.cs + /Abstractions/ + IGlobalRoutingState.cs + IRoutingPlugin.cs + ITransportClient.cs + ITransportServer.cs + IRegionProvider.cs +``` + +## Interface Signatures (from specs.md) + +### IGlobalRoutingState +```csharp +public interface IGlobalRoutingState +{ + EndpointDescriptor? ResolveEndpoint(string method, string path); + IReadOnlyList GetConnectionsFor( + string serviceName, string version, string method, string path); +} +``` + +### IRoutingPlugin +```csharp +public interface IRoutingPlugin +{ + Task ChooseInstanceAsync( + RoutingContext context, CancellationToken cancellationToken); +} +``` + +### ITransportServer +```csharp +public interface ITransportServer +{ + Task StartAsync(CancellationToken cancellationToken); + Task StopAsync(CancellationToken cancellationToken); +} +``` + +### ITransportClient +```csharp +public interface ITransportClient +{ + Task SendRequestAsync( + ConnectionState connection, Frame requestFrame, + TimeSpan timeout, CancellationToken cancellationToken); + Task SendCancelAsync( + ConnectionState connection, Guid correlationId, string? reason = null); + Task SendStreamingAsync( + ConnectionState connection, Frame requestHeader, Stream requestBody, + Func readResponseBody, PayloadLimits limits, + CancellationToken cancellationToken); +} +``` + +## Design Constraints + +1. **No behavior:** Only shapes - no LINQ-heavy methods, no routing algorithms, no network code +2. **No serialization:** No JSON/MessagePack references; Common only defines shapes +3. **Immutability preferred:** Use `init` properties for descriptors; `ConnectionState` health fields may be mutable +4. **BCL only:** No ASP.NET or third-party package dependencies +5. **Nullable enabled:** All code must compile with zero nullable warnings + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] All types from `specs.md` Common section exist with matching names and properties +2. [ ] Common compiles with zero warnings +3. [ ] Common only references BCL (verify no package references in .csproj) +4. [ ] No behavior/logic in any type (pure DTOs and interfaces) +5. [ ] `StellaOps.Router.Common.Tests` runs and passes +6. [ ] `docs/router/specs.md` is updated if any discrepancy found (or code matches spec) + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- `RoutingContext` uses a neutral model (not ASP.NET `HttpContext`) to keep Common free of web dependencies. Gateway will adapt from `HttpContext` to this neutral model. +- `ConnectionState.Endpoints` uses `(string Method, string Path)` tuple as key for dictionary lookups. +- Frame payloads are `byte[]` - serialization happens at the transport layer, not in Common. diff --git a/docs/router/SPRINT_7000_0002_0001_inmemory_transport.md b/docs/router/SPRINT_7000_0002_0001_inmemory_transport.md new file mode 100644 index 000000000..be25880db --- /dev/null +++ b/docs/router/SPRINT_7000_0002_0001_inmemory_transport.md @@ -0,0 +1,121 @@ +# Sprint 7000-0002-0001 · Router Transport · InMemory Plugin + +## Topic & Scope + +Build a fake "in-memory" transport plugin for development and testing. This transport proves the HELLO/HEARTBEAT/REQUEST/RESPONSE/CANCEL semantics and routing logic **without** dealing with sockets and RabbitMQ yet. + +**Goal:** Enable unit and integration testing of the router and SDK by providing an in-process transport where frames are passed via channels/queues in memory. + +**Working directory:** `src/__Libraries/StellaOps.Router.Transport.InMemory/` + +**Key principle:** This plugin will never ship to production; it's only for dev tests and CI. It must fully implement all transport abstractions so that switching to real transports later requires zero changes to Gateway or Microservice SDK code. + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0001_0002 (Common models must be complete) +- **Downstream:** SDK and Gateway sprints depend on this for testing +- **Parallel work:** Can run in parallel with CMN-040/041/042/043 test tasks if Common models are done +- **Cross-module impact:** None. Creates new directory only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (sections 5, 10 - Transport and Cancellation requirements) +- `docs/router/03-Step.md` (detailed task breakdown) +- `docs/router/implplan.md` (phase 3 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | MEM-001 | TODO | Create `StellaOps.Router.Transport.InMemory` classlib project | Add to StellaOps.Router.sln | +| 2 | MEM-002 | TODO | Add project reference to `StellaOps.Router.Common` | | +| 3 | MEM-010 | TODO | Implement `InMemoryTransportServer` : `ITransportServer` | Gateway side | +| 4 | MEM-011 | TODO | Implement `InMemoryTransportClient` : `ITransportClient` | Microservice side | +| 5 | MEM-012 | TODO | Create shared `InMemoryConnectionRegistry` (concurrent dictionary keyed by ConnectionId) | Thread-safe | +| 6 | MEM-013 | TODO | Create `InMemoryChannel` for bidirectional frame passing | Use System.Threading.Channels | +| 7 | MEM-020 | TODO | Implement HELLO frame handling (client → server) | | +| 8 | MEM-021 | TODO | Implement HEARTBEAT frame handling (client → server) | | +| 9 | MEM-022 | TODO | Implement REQUEST frame handling (server → client) | | +| 10 | MEM-023 | TODO | Implement RESPONSE frame handling (client → server) | | +| 11 | MEM-024 | TODO | Implement CANCEL frame handling (bidirectional) | | +| 12 | MEM-025 | TODO | Implement REQUEST_STREAM_DATA / RESPONSE_STREAM_DATA frame handling | For streaming support | +| 13 | MEM-030 | TODO | Create `InMemoryTransportOptions` for configuration | Timeouts, buffer sizes | +| 14 | MEM-031 | TODO | Create DI registration extension `AddInMemoryTransport()` | | +| 15 | MEM-040 | TODO | Write integration tests for HELLO/HEARTBEAT flow | | +| 16 | MEM-041 | TODO | Write integration tests for REQUEST/RESPONSE flow | | +| 17 | MEM-042 | TODO | Write integration tests for CANCEL flow | | +| 18 | MEM-043 | TODO | Write integration tests for streaming flow | | +| 19 | MEM-050 | TODO | Create test project `StellaOps.Router.Transport.InMemory.Tests` | | + +## Architecture + +``` +┌──────────────────────┐ InMemoryConnectionRegistry ┌──────────────────────┐ +│ Gateway │ (ConcurrentDictionary) ────►│ (InMemoryTransport │ +│ Server) │ │ Client) │ +└──────────────────────┘ └──────────────────────┘ + │ │ + │ Channel ToMicroservice ─────────────────────────────────────►│ + │◄─────────────────────────────────────────────── Channel ToGateway + │ │ +``` + +## InMemoryChannel Design + +```csharp +internal sealed class InMemoryChannel +{ + public string ConnectionId { get; } + public Channel ToMicroservice { get; } // Gateway writes, SDK reads + public Channel ToGateway { get; } // SDK writes, Gateway reads + public InstanceDescriptor? Instance { get; set; } + public CancellationTokenSource LifetimeToken { get; } +} +``` + +## Frame Flow Examples + +### HELLO Flow +1. Microservice SDK calls `InMemoryTransportClient.ConnectAsync()` +2. Client creates `InMemoryChannel`, registers in `InMemoryConnectionRegistry` +3. Client sends HELLO frame via `ToGateway` channel +4. Server reads from `ToGateway`, processes HELLO, updates `ConnectionState` + +### REQUEST/RESPONSE Flow +1. Gateway receives HTTP request +2. Gateway sends REQUEST frame via `ToMicroservice` channel +3. SDK reads from `ToMicroservice`, invokes handler +4. SDK sends RESPONSE frame via `ToGateway` channel +5. Gateway reads from `ToGateway`, returns HTTP response + +### CANCEL Flow +1. HTTP client disconnects (or timeout) +2. Gateway sends CANCEL frame via `ToMicroservice` channel +3. SDK reads CANCEL, cancels handler's CancellationToken +4. SDK optionally sends partial RESPONSE or no response + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] `InMemoryTransportServer` fully implements `ITransportServer` +2. [ ] `InMemoryTransportClient` fully implements `ITransportClient` +3. [ ] All frame types (HELLO, HEARTBEAT, REQUEST, RESPONSE, STREAM_DATA, CANCEL) are handled +4. [ ] Thread-safe concurrent access to `InMemoryConnectionRegistry` +5. [ ] All integration tests pass +6. [ ] No external dependencies (only BCL + Router.Common) + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Uses `System.Threading.Channels` for async frame passing (unbounded by default, can add backpressure later) +- InMemory transport simulates latency only if explicitly configured (default: instant) +- Connection lifetime is tied to `CancellationTokenSource`; disposing triggers cleanup +- This transport is explicitly excluded from production deployments via conditional compilation or package separation diff --git a/docs/router/SPRINT_7000_0003_0001_microservice_sdk_core.md b/docs/router/SPRINT_7000_0003_0001_microservice_sdk_core.md new file mode 100644 index 000000000..2be4e4254 --- /dev/null +++ b/docs/router/SPRINT_7000_0003_0001_microservice_sdk_core.md @@ -0,0 +1,135 @@ +# Sprint 7000-0003-0001 · Microservice SDK · Core Infrastructure + +## Topic & Scope + +Implement the core infrastructure of the Microservice SDK: options, endpoint discovery, and router connection management. After this sprint, a microservice can connect to a router and send HELLO with its endpoint list. + +**Goal:** "Connect and say HELLO" - microservice connects to router(s) and registers its identity and endpoints. + +**Working directory:** `src/__Libraries/StellaOps.Microservice/` + +**Parallel track:** This sprint can run in parallel with Gateway sprints (7000-0004-*) once the InMemory transport is complete. + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0001_0002 (Common), SPRINT_7000_0002_0001 (InMemory transport) +- **Downstream:** SPRINT_7000_0003_0002 (request handling) +- **Parallel work:** Can run in parallel with Gateway core sprint +- **Cross-module impact:** None. All work in `src/__Libraries/StellaOps.Microservice/` + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 7 - Microservice SDK requirements) +- `docs/router/04-Step.md` (detailed task breakdown) +- `docs/router/implplan.md` (phase 4 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | SDK-001 | TODO | Implement `StellaMicroserviceOptions` with all required properties | ServiceName, Version, Region, InstanceId, Routers, ConfigFilePath | +| 2 | SDK-002 | TODO | Implement `RouterEndpointConfig` (host, port, transport type) | | +| 3 | SDK-003 | TODO | Validate that Routers list is mandatory (throw if empty) | Per spec | +| 4 | SDK-010 | TODO | Create `[StellaEndpoint]` attribute for endpoint declaration | Method, Path, SupportsStreaming, Timeout | +| 5 | SDK-011 | TODO | Implement runtime reflection endpoint discovery | Scan assemblies for `[StellaEndpoint]` | +| 6 | SDK-012 | TODO | Build in-memory `EndpointDescriptor` list from discovered endpoints | | +| 7 | SDK-013 | TODO | Create `IEndpointDiscoveryProvider` abstraction | For source-gen vs reflection swap | +| 8 | SDK-020 | TODO | Implement `IRouterConnectionManager` interface | | +| 9 | SDK-021 | TODO | Implement `RouterConnectionManager` with connection pool | One connection per router endpoint | +| 10 | SDK-022 | TODO | Implement connection lifecycle (connect, reconnect on failure) | Exponential backoff | +| 11 | SDK-023 | TODO | Implement HELLO frame construction from options + endpoints | | +| 12 | SDK-024 | TODO | Send HELLO on connection establishment | | +| 13 | SDK-025 | TODO | Implement HEARTBEAT sending on timer | Configurable interval | +| 14 | SDK-030 | TODO | Implement `AddStellaMicroservice(IServiceCollection, Action)` | Full DI registration | +| 15 | SDK-031 | TODO | Register `IHostedService` for connection management | Start/stop with host | +| 16 | SDK-032 | TODO | Create `MicroserviceHostedService` that starts connections on app startup | | +| 17 | SDK-040 | TODO | Write unit tests for endpoint discovery | | +| 18 | SDK-041 | TODO | Write integration tests with InMemory transport | Connect, HELLO, HEARTBEAT | + +## Endpoint Discovery + +### Attribute-Based Declaration +```csharp +[StellaEndpoint("POST", "/billing/invoices")] +public sealed class CreateInvoiceEndpoint : IStellaEndpoint +{ + public Task HandleAsync(CreateInvoiceRequest request, CancellationToken ct); +} +``` + +### Discovery Flow +1. On startup, scan loaded assemblies for types with `[StellaEndpoint]` +2. For each type, verify it implements a handler interface +3. Build `EndpointDescriptor` from attribute + defaults +4. Store in `IEndpointRegistry` for lookup and HELLO construction + +### Handler Interface Detection +```csharp +// Typed with request +typeof(IStellaEndpoint) + +// Typed without request +typeof(IStellaEndpoint) + +// Raw handler +typeof(IRawStellaEndpoint) +``` + +## Connection Lifecycle + +``` +┌─────────────┐ Connect ┌─────────────┐ HELLO ┌─────────────┐ +│ Disconnected│────────────────►│ Connected │───────────────►│ Registered │ +└─────────────┘ └─────────────┘ └─────────────┘ + ▲ │ │ + │ │ Error │ Heartbeat timer + │ ▼ ▼ + │ ┌─────────────┐ ┌─────────────┐ + └────────────────────────│ Reconnect │◄───────────────│ Heartbeat │ + Backoff │ (backoff) │ Error │ Active │ + └─────────────┘ └─────────────┘ +``` + +## StellaMicroserviceOptions + +```csharp +public sealed class StellaMicroserviceOptions +{ + public string ServiceName { get; set; } = string.Empty; + public string Version { get; set; } = string.Empty; // Strict semver + public string Region { get; set; } = string.Empty; + public string InstanceId { get; set; } = string.Empty; // Auto-generate if empty + public IList Routers { get; set; } = new List(); + public string? ConfigFilePath { get; set; } // Optional YAML overrides + public TimeSpan HeartbeatInterval { get; set; } = TimeSpan.FromSeconds(10); + public TimeSpan ReconnectBackoffMax { get; set; } = TimeSpan.FromMinutes(1); +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] `StellaMicroserviceOptions` fully implemented with validation +2. [ ] Endpoint discovery works via reflection +3. [ ] Connection manager connects to configured routers +4. [ ] HELLO frame sent on connection with full endpoint list +5. [ ] HEARTBEAT sent periodically on timer +6. [ ] Reconnection with backoff on connection failure +7. [ ] Integration tests pass with InMemory transport +8. [ ] `AddStellaMicroservice()` registers all services correctly + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Endpoint discovery defaults to reflection; source generation comes in a later sprint +- InstanceId auto-generates using `Guid.NewGuid().ToString("N")` if not provided +- Version validation enforces strict semver format +- Routers list cannot be empty - throws `InvalidOperationException` on startup +- YAML config file is optional at this stage (Sprint 7000-0007-0002) diff --git a/docs/router/SPRINT_7000_0003_0002_microservice_sdk_handlers.md b/docs/router/SPRINT_7000_0003_0002_microservice_sdk_handlers.md new file mode 100644 index 000000000..41b80ee2c --- /dev/null +++ b/docs/router/SPRINT_7000_0003_0002_microservice_sdk_handlers.md @@ -0,0 +1,173 @@ +# Sprint 7000-0003-0002 · Microservice SDK · Request Handling + +## Topic & Scope + +Implement request handling in the Microservice SDK: receiving REQUEST frames, dispatching to handlers, and sending RESPONSE frames. Supports both typed and raw handler patterns. + +**Goal:** Complete the request/response flow - microservice receives requests from router and returns responses. + +**Working directory:** `src/__Libraries/StellaOps.Microservice/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0003_0001 (SDK core with connection + HELLO) +- **Downstream:** SPRINT_7000_0005_0003 (cancellation), SPRINT_7000_0005_0004 (streaming) +- **Parallel work:** Can run in parallel with Gateway middleware sprint +- **Cross-module impact:** None. All work in `src/__Libraries/StellaOps.Microservice/` + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 7.2, 7.4, 7.5 - Endpoint definition, Connection behavior, Request handling) +- `docs/router/04-Step.md` (detailed task breakdown - request handling section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | HDL-001 | TODO | Define `IRawStellaEndpoint` interface | Takes RawRequestContext, returns RawResponse | +| 2 | HDL-002 | TODO | Define `IStellaEndpoint` interface | Typed request/response | +| 3 | HDL-003 | TODO | Define `IStellaEndpoint` interface | No request body | +| 4 | HDL-010 | TODO | Implement `RawRequestContext` | Method, Path, Headers, Body stream, CancellationToken | +| 5 | HDL-011 | TODO | Implement `RawResponse` | StatusCode, Headers, Body stream | +| 6 | HDL-012 | TODO | Implement `IHeaderCollection` abstraction | Key-value header access | +| 7 | HDL-020 | TODO | Create `IEndpointRegistry` for handler lookup | (Method, Path) → handler instance | +| 8 | HDL-021 | TODO | Implement path template matching (ASP.NET-style routes) | Handles `{id}` parameters | +| 9 | HDL-022 | TODO | Implement path matching rules (case sensitivity, trailing slash) | Per spec | +| 10 | HDL-030 | TODO | Create `TypedEndpointAdapter` to wrap typed handlers as raw | IStellaEndpoint → IRawStellaEndpoint | +| 11 | HDL-031 | TODO | Implement request deserialization in adapter | JSON by default | +| 12 | HDL-032 | TODO | Implement response serialization in adapter | JSON by default | +| 13 | HDL-040 | TODO | Implement `RequestDispatcher` | Frame → RawRequestContext → Handler → RawResponse → Frame | +| 14 | HDL-041 | TODO | Implement frame-to-context conversion | REQUEST frame → RawRequestContext | +| 15 | HDL-042 | TODO | Implement response-to-frame conversion | RawResponse → RESPONSE frame | +| 16 | HDL-043 | TODO | Wire dispatcher into connection read loop | Process REQUEST frames | +| 17 | HDL-050 | TODO | Implement `IServiceProvider` integration for handler instantiation | DI support | +| 18 | HDL-051 | TODO | Implement handler scoping (per-request scope) | IServiceScope per request | +| 19 | HDL-060 | TODO | Write unit tests for path matching | Various patterns | +| 20 | HDL-061 | TODO | Write unit tests for typed adapter | Serialization round-trip | +| 21 | HDL-062 | TODO | Write integration tests for full REQUEST/RESPONSE flow | With InMemory transport | + +## Handler Interfaces + +### Raw Handler +```csharp +public interface IRawStellaEndpoint +{ + Task HandleAsync(RawRequestContext context, CancellationToken cancellationToken); +} +``` + +### Typed Handlers +```csharp +public interface IStellaEndpoint +{ + Task HandleAsync(TRequest request, CancellationToken cancellationToken); +} + +public interface IStellaEndpoint +{ + Task HandleAsync(CancellationToken cancellationToken); +} +``` + +## RawRequestContext + +```csharp +public sealed class RawRequestContext +{ + public string Method { get; init; } = string.Empty; + public string Path { get; init; } = string.Empty; + public IReadOnlyDictionary PathParameters { get; init; } + = new Dictionary(); + public IHeaderCollection Headers { get; init; } = default!; + public Stream Body { get; init; } = Stream.Null; + public CancellationToken CancellationToken { get; init; } +} +``` + +## RawResponse + +```csharp +public sealed class RawResponse +{ + public int StatusCode { get; init; } = 200; + public IHeaderCollection Headers { get; init; } = default!; + public Stream Body { get; init; } = Stream.Null; + + public static RawResponse Ok(Stream body) => new() { StatusCode = 200, Body = body }; + public static RawResponse NotFound() => new() { StatusCode = 404 }; + public static RawResponse Error(int statusCode, string message) => ...; +} +``` + +## Path Template Matching + +Must use same rules as router (ASP.NET-style): +- `{id}` matches any segment, value captured in PathParameters +- `{id:int}` constraint support (optional for v1) +- Case sensitivity: configurable, default case-insensitive +- Trailing slash: configurable, default treats `/foo` and `/foo/` as equivalent + +## Request Flow + +``` +┌─────────────────┐ ┌────────────────────┐ ┌───────────────────┐ +│ REQUEST Frame │────►│ RequestDispatcher │────►│ IEndpointRegistry │ +│ (from Router) │ │ │ │ (Method, Path) │ +└─────────────────┘ └────────────────────┘ └───────────────────┘ + │ │ + │ ▼ + │ ┌───────────────────┐ + │ │ Handler Instance │ + │ │ (from DI scope) │ + │ └───────────────────┘ + │ │ + │◄─────────────────────────┘ + ▼ + ┌────────────────────┐ + │ RawRequestContext │ + └────────────────────┘ + │ + ▼ + ┌────────────────────┐ + │ Handler.HandleAsync│ + └────────────────────┘ + │ + ▼ + ┌────────────────────┐ + │ RawResponse │ + └────────────────────┘ + │ + ▼ + ┌────────────────────┐ + │ RESPONSE Frame │ + │ (to Router) │ + └────────────────────┘ +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] All handler interfaces defined and documented +2. [ ] `RawRequestContext` and `RawResponse` implemented +3. [ ] Path template matching works for common patterns +4. [ ] Typed handlers wrapped correctly via `TypedEndpointAdapter` +5. [ ] `RequestDispatcher` processes REQUEST frames end-to-end +6. [ ] DI integration works (handlers resolved from service provider) +7. [ ] Integration tests pass with InMemory transport +8. [ ] Body treated as opaque bytes (no interpretation at SDK level for raw handlers) + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Typed handlers use JSON serialization by default; configurable via options +- Path matching is case-insensitive by default (matches ASP.NET Core default) +- Each request gets its own DI scope for handler resolution +- Body stream may be buffered or streaming depending on endpoint configuration (streaming support comes in later sprint) +- Handler exceptions are caught and converted to 500 responses with error details (configurable) diff --git a/docs/router/SPRINT_7000_0004_0001_gateway_core.md b/docs/router/SPRINT_7000_0004_0001_gateway_core.md new file mode 100644 index 000000000..623b233f7 --- /dev/null +++ b/docs/router/SPRINT_7000_0004_0001_gateway_core.md @@ -0,0 +1,135 @@ +# Sprint 7000-0004-0001 · Gateway · Core Infrastructure + +## Topic & Scope + +Implement the core infrastructure of the Gateway: node configuration, global routing state, and basic routing plugin. This sprint creates the foundation for HTTP → transport → microservice routing. + +**Goal:** Gateway can maintain routing state from connected microservices and select instances for routing decisions. + +**Working directory:** `src/Gateway/StellaOps.Gateway.WebService/` + +**Parallel track:** This sprint can run in parallel with Microservice SDK sprints (7000-0003-*) once the InMemory transport is complete. + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0001_0002 (Common), SPRINT_7000_0002_0001 (InMemory transport) +- **Downstream:** SPRINT_7000_0004_0002 (middleware), SPRINT_7000_0004_0003 (connection handling) +- **Parallel work:** Can run in parallel with SDK core sprint +- **Cross-module impact:** None. All work in `src/Gateway/StellaOps.Gateway.WebService/` + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 6 - Gateway requirements) +- `docs/router/05-Step.md` (detailed task breakdown) +- `docs/router/implplan.md` (phase 5 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | GW-001 | TODO | Implement `GatewayNodeConfig` | Region, NodeId, Environment | +| 2 | GW-002 | TODO | Bind `GatewayNodeConfig` from configuration | appsettings.json section | +| 3 | GW-003 | TODO | Validate GatewayNodeConfig on startup | Region required | +| 4 | GW-010 | TODO | Implement `IGlobalRoutingState` as `InMemoryRoutingState` | Thread-safe implementation | +| 5 | GW-011 | TODO | Implement `ConnectionState` storage | ConcurrentDictionary by ConnectionId | +| 6 | GW-012 | TODO | Implement endpoint-to-connections index | (Method, Path) → List | +| 7 | GW-013 | TODO | Implement `ResolveEndpoint(method, path)` | Path template matching | +| 8 | GW-014 | TODO | Implement `GetConnectionsFor(serviceName, version, method, path)` | Filter by criteria | +| 9 | GW-020 | TODO | Create `IRoutingPlugin` implementation `DefaultRoutingPlugin` | Basic instance selection | +| 10 | GW-021 | TODO | Implement version filtering (strict semver equality) | Per spec | +| 11 | GW-022 | TODO | Implement health filtering (Healthy or Degraded only) | Per spec | +| 12 | GW-023 | TODO | Implement region preference (gateway region first) | Use GatewayNodeConfig.Region | +| 13 | GW-024 | TODO | Implement basic tie-breaking (any healthy instance) | Full algorithm in later sprint | +| 14 | GW-030 | TODO | Create `RoutingOptions` for configurable behavior | Default version, neighbor regions | +| 15 | GW-031 | TODO | Register routing services in DI | IGlobalRoutingState, IRoutingPlugin | +| 16 | GW-040 | TODO | Write unit tests for InMemoryRoutingState | | +| 17 | GW-041 | TODO | Write unit tests for DefaultRoutingPlugin | Version, health, region filtering | + +## GatewayNodeConfig + +```csharp +public sealed class GatewayNodeConfig +{ + public string Region { get; set; } = string.Empty; // Required, e.g. "eu1" + public string NodeId { get; set; } = string.Empty; // e.g. "gw-eu1-01" + public string Environment { get; set; } = string.Empty; // e.g. "prod" + public IList NeighborRegions { get; set; } = []; // Fallback regions +} +``` + +**Configuration binding:** +```json +{ + "GatewayNode": { + "Region": "eu1", + "NodeId": "gw-eu1-01", + "Environment": "prod", + "NeighborRegions": ["eu2", "us1"] + } +} +``` + +## InMemoryRoutingState + +```csharp +internal sealed class InMemoryRoutingState : IGlobalRoutingState +{ + private readonly ConcurrentDictionary _connections = new(); + private readonly ConcurrentDictionary<(string Method, string Path), List> _endpointIndex = new(); + + public void AddConnection(ConnectionState connection) { ... } + public void RemoveConnection(string connectionId) { ... } + public void UpdateConnection(string connectionId, Action update) { ... } + + public EndpointDescriptor? ResolveEndpoint(string method, string path) { ... } + public IReadOnlyList GetConnectionsFor( + string serviceName, string version, string method, string path) { ... } +} +``` + +## Routing Algorithm (Phase 1 - Basic) + +``` +1. Filter by ServiceName (exact match) +2. Filter by Version (strict semver equality) +3. Filter by Health (Healthy or Degraded only) +4. If any remain, pick one (random for now) +5. If none, return null (503 Service Unavailable) +``` + +**Note:** Full routing algorithm (region preference, ping-based selection, fallback) is implemented in SPRINT_7000_0005_0002. + +## Region Derivation + +Per spec section 2: +> Routing decisions MUST use `GatewayNodeConfig.Region` as the node's region; the router MUST NOT derive region from HTTP headers or URL host names. + +This is enforced by: +1. GatewayNodeConfig is bound from static configuration only +2. No code path reads region from HttpContext +3. Tests verify region is never extracted from Host header + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] `GatewayNodeConfig` loads and validates from configuration +2. [ ] `InMemoryRoutingState` stores and indexes connections correctly +3. [ ] `ResolveEndpoint` performs path template matching +4. [ ] `DefaultRoutingPlugin` filters by version, health, region +5. [ ] All services registered in DI container +6. [ ] Unit tests pass for routing state and plugin + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Routing state is in-memory only; no persistence or distribution (single gateway node for v1) +- Path template matching reuses logic from SDK (shared in Common or duplicated) +- DefaultRoutingPlugin is intentionally simple; full algorithm comes in SPRINT_7000_0005_0002 +- Region validation: startup fails fast if Region is empty diff --git a/docs/router/SPRINT_7000_0004_0002_gateway_middleware.md b/docs/router/SPRINT_7000_0004_0002_gateway_middleware.md new file mode 100644 index 000000000..23735a007 --- /dev/null +++ b/docs/router/SPRINT_7000_0004_0002_gateway_middleware.md @@ -0,0 +1,172 @@ +# Sprint 7000-0004-0002 · Gateway · HTTP Middleware Pipeline + +## Topic & Scope + +Implement the HTTP middleware pipeline for the Gateway: endpoint resolution, authorization, routing decision, and transport dispatch. After this sprint, HTTP requests flow through the gateway to microservices via the InMemory transport. + +**Goal:** Complete HTTP → transport → microservice → HTTP flow for basic buffered requests. + +**Working directory:** `src/Gateway/StellaOps.Gateway.WebService/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0004_0001 (Gateway core) +- **Downstream:** SPRINT_7000_0004_0003 (connection handling) +- **Parallel work:** Can run in parallel with SDK request handling sprint +- **Cross-module impact:** None. All work in `src/Gateway/StellaOps.Gateway.WebService/` + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 6.1 - HTTP ingress pipeline) +- `docs/router/05-Step.md` (middleware section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | MID-001 | TODO | Create `EndpointResolutionMiddleware` | (Method, Path) → EndpointDescriptor | +| 2 | MID-002 | TODO | Store resolved endpoint in `HttpContext.Items` | For downstream middleware | +| 3 | MID-003 | TODO | Return 404 if endpoint not found | | +| 4 | MID-010 | TODO | Create `AuthorizationMiddleware` stub | Checks authenticated only (full claims later) | +| 5 | MID-011 | TODO | Wire ASP.NET Core authentication | Standard middleware order | +| 6 | MID-012 | TODO | Return 401/403 for unauthorized requests | | +| 7 | MID-020 | TODO | Create `RoutingDecisionMiddleware` | Calls IRoutingPlugin.ChooseInstanceAsync | +| 8 | MID-021 | TODO | Store RoutingDecision in `HttpContext.Items` | | +| 9 | MID-022 | TODO | Return 503 if no instance available | | +| 10 | MID-023 | TODO | Return 504 if routing times out | | +| 11 | MID-030 | TODO | Create `TransportDispatchMiddleware` | Dispatches to selected transport | +| 12 | MID-031 | TODO | Implement buffered request dispatch | Read entire body, send REQUEST frame | +| 13 | MID-032 | TODO | Implement buffered response handling | Read RESPONSE frame, write to HTTP | +| 14 | MID-033 | TODO | Map transport errors to HTTP status codes | | +| 15 | MID-040 | TODO | Create `GlobalErrorHandlerMiddleware` | Catches unhandled exceptions | +| 16 | MID-041 | TODO | Implement structured error responses | JSON error envelope | +| 17 | MID-050 | TODO | Create `RequestLoggingMiddleware` | Correlation ID, service, endpoint, region, instance | +| 18 | MID-051 | TODO | Wire forwarded headers middleware | For reverse proxy support | +| 19 | MID-060 | TODO | Configure middleware pipeline in Program.cs | Correct order | +| 20 | MID-070 | TODO | Write integration tests for full HTTP→transport flow | With InMemory transport + SDK | +| 21 | MID-071 | TODO | Write tests for error scenarios (404, 503, etc.) | | + +## Middleware Pipeline Order + +```csharp +app.UseForwardedHeaders(); // Reverse proxy support +app.UseMiddleware(); +app.UseMiddleware(); +app.UseAuthentication(); // ASP.NET Core auth +app.UseMiddleware(); +app.UseMiddleware(); +app.UseMiddleware(); +app.UseMiddleware(); +``` + +## EndpointResolutionMiddleware + +```csharp +public class EndpointResolutionMiddleware +{ + public async Task InvokeAsync(HttpContext context, IGlobalRoutingState routingState) + { + var method = context.Request.Method; + var path = context.Request.Path.Value ?? "/"; + + var endpoint = routingState.ResolveEndpoint(method, path); + if (endpoint == null) + { + context.Response.StatusCode = 404; + await context.Response.WriteAsJsonAsync(new { error = "Endpoint not found" }); + return; + } + + context.Items["ResolvedEndpoint"] = endpoint; + await _next(context); + } +} +``` + +## TransportDispatchMiddleware (Buffered Mode) + +```csharp +public class TransportDispatchMiddleware +{ + public async Task InvokeAsync(HttpContext context, ITransportClient transport) + { + var decision = (RoutingDecision)context.Items["RoutingDecision"]!; + var endpoint = (EndpointDescriptor)context.Items["ResolvedEndpoint"]!; + + // Build REQUEST frame + using var bodyStream = new MemoryStream(); + await context.Request.Body.CopyToAsync(bodyStream); + var requestFrame = new Frame + { + Type = FrameType.Request, + CorrelationId = Guid.NewGuid(), + Payload = BuildRequestPayload(context, bodyStream.ToArray()) + }; + + // Send and await response + using var cts = CancellationTokenSource.CreateLinkedTokenSource( + context.RequestAborted); + cts.CancelAfter(decision.EffectiveTimeout); + + var responseFrame = await transport.SendRequestAsync( + decision.Connection, + requestFrame, + decision.EffectiveTimeout, + cts.Token); + + // Write response to HTTP + await WriteHttpResponse(context, responseFrame); + } +} +``` + +## Error Mapping + +| Transport/Routing Error | HTTP Status | +|------------------------|-------------| +| Endpoint not found | 404 Not Found | +| No healthy instance | 503 Service Unavailable | +| Timeout | 504 Gateway Timeout | +| Microservice error (5xx) | Pass through status | +| Transport connection lost | 502 Bad Gateway | +| Payload too large | 413 Payload Too Large | +| Unauthorized | 401 Unauthorized | +| Forbidden (claims) | 403 Forbidden | + +## HttpContext.Items Keys + +```csharp +public static class ContextKeys +{ + public const string ResolvedEndpoint = "ResolvedEndpoint"; + public const string RoutingDecision = "RoutingDecision"; + public const string CorrelationId = "CorrelationId"; +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] All middleware classes implemented +2. [ ] Pipeline configured in correct order +3. [ ] EndpointResolutionMiddleware resolves (Method, Path) → endpoint +4. [ ] AuthorizationMiddleware checks authentication (claims in later sprint) +5. [ ] RoutingDecisionMiddleware selects instance via IRoutingPlugin +6. [ ] TransportDispatchMiddleware sends/receives frames (buffered mode) +7. [ ] Error responses use consistent JSON envelope +8. [ ] Integration tests pass with InMemory transport + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Authorization middleware is a stub that only checks `User.Identity?.IsAuthenticated`; full RequiringClaims enforcement comes in SPRINT_7000_0008_0001 +- Streaming support is not implemented in this sprint; TransportDispatchMiddleware only handles buffered mode +- Correlation ID is generated per request and logged throughout +- Request body is fully read into memory for buffered mode; streaming in SPRINT_7000_0005_0004 diff --git a/docs/router/SPRINT_7000_0004_0003_gateway_connections.md b/docs/router/SPRINT_7000_0004_0003_gateway_connections.md new file mode 100644 index 000000000..c0c9e0877 --- /dev/null +++ b/docs/router/SPRINT_7000_0004_0003_gateway_connections.md @@ -0,0 +1,218 @@ +# Sprint 7000-0004-0003 · Gateway · Connection Handling + +## Topic & Scope + +Implement connection handling in the Gateway: processing HELLO frames from microservices, maintaining connection state, and updating the global routing state. After this sprint, microservices can register with the gateway and be routed to. + +**Goal:** Gateway receives HELLO from microservices and maintains live routing state. Combined with previous sprints, this enables full end-to-end HTTP → microservice routing. + +**Working directory:** `src/Gateway/StellaOps.Gateway.WebService/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0004_0002 (middleware), SPRINT_7000_0003_0001 (SDK core with HELLO) +- **Downstream:** SPRINT_7000_0005_0001 (heartbeat/health) +- **Parallel work:** Should coordinate with SDK team for HELLO frame format agreement +- **Cross-module impact:** None. All work in Gateway. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 6.2 - Per-connection state and routing view) +- `docs/router/05-Step.md` (connection handling section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | CON-001 | TODO | Create `IConnectionHandler` interface | Processes frames per connection | +| 2 | CON-002 | TODO | Implement `ConnectionHandler` | Frame type dispatch | +| 3 | CON-010 | TODO | Implement HELLO frame processing | Parse HelloPayload, create ConnectionState | +| 4 | CON-011 | TODO | Validate HELLO payload | ServiceName, Version, InstanceId required | +| 5 | CON-012 | TODO | Register connection in IGlobalRoutingState | AddConnection | +| 6 | CON-013 | TODO | Build endpoint index from HELLO | (Method, Path) → ConnectionId | +| 7 | CON-020 | TODO | Create `TransportServerHost` hosted service | Starts ITransportServer | +| 8 | CON-021 | TODO | Wire transport server to connection handler | Frame routing | +| 9 | CON-022 | TODO | Handle new connections (InMemory: channel registration) | | +| 10 | CON-030 | TODO | Implement connection cleanup on disconnect | RemoveConnection from routing state | +| 11 | CON-031 | TODO | Clean up endpoint index on disconnect | Remove all endpoints for connection | +| 12 | CON-032 | TODO | Log connection lifecycle events | Connect, HELLO, disconnect | +| 13 | CON-040 | TODO | Implement connection ID generation | Unique per connection | +| 14 | CON-041 | TODO | Store connection metadata | Transport type, connect time | +| 15 | CON-050 | TODO | Write integration tests for HELLO flow | SDK → Gateway registration | +| 16 | CON-051 | TODO | Write tests for connection cleanup | | +| 17 | CON-052 | TODO | Write tests for multiple connections from same service | Different instances | + +## Connection Lifecycle + +``` +┌─────────────────┐ +│ New Connection │ (Transport layer signals new connection) +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Awaiting HELLO │ (Connection exists but not registered for routing) +└────────┬────────┘ + │ HELLO frame received + ▼ +┌─────────────────┐ +│ Validate HELLO │ (Check ServiceName, Version, endpoints) +└────────┬────────┘ + │ Valid + ▼ +┌─────────────────┐ +│ Create │ +│ ConnectionState │ (InstanceDescriptor, endpoints, health = Unknown) +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Register in │ (Add to IGlobalRoutingState, index endpoints) +│ RoutingState │ +└────────┬────────┘ + │ + ▼ +┌─────────────────┐ +│ Registered │ (Connection can receive routed requests) +└────────┬────────┘ + │ Disconnect or error + ▼ +┌─────────────────┐ +│ Cleanup State │ (Remove from routing state, clean endpoint index) +└─────────────────┘ +``` + +## HELLO Processing + +```csharp +internal sealed class ConnectionHandler : IConnectionHandler +{ + public async Task HandleFrameAsync(string connectionId, Frame frame) + { + switch (frame.Type) + { + case FrameType.Hello: + await ProcessHelloAsync(connectionId, frame); + break; + case FrameType.Heartbeat: + await ProcessHeartbeatAsync(connectionId, frame); + break; + case FrameType.Response: + case FrameType.ResponseStreamData: + await ProcessResponseAsync(connectionId, frame); + break; + default: + _logger.LogWarning("Unknown frame type {Type} from {ConnectionId}", + frame.Type, connectionId); + break; + } + } + + private async Task ProcessHelloAsync(string connectionId, Frame frame) + { + var payload = DeserializeHelloPayload(frame.Payload); + + // Validate + if (string.IsNullOrEmpty(payload.Instance.ServiceName)) + throw new InvalidHelloException("ServiceName required"); + if (string.IsNullOrEmpty(payload.Instance.Version)) + throw new InvalidHelloException("Version required"); + + // Build ConnectionState + var connection = new ConnectionState + { + ConnectionId = connectionId, + Instance = payload.Instance, + Status = InstanceHealthStatus.Unknown, + LastHeartbeatUtc = DateTime.UtcNow, + TransportType = _currentTransportType, + Endpoints = payload.Endpoints.ToDictionary( + e => (e.Method, e.Path), + e => e) + }; + + // Register + _routingState.AddConnection(connection); + _logger.LogInformation( + "Registered {ServiceName} v{Version} instance {InstanceId} from {Region}", + payload.Instance.ServiceName, + payload.Instance.Version, + payload.Instance.InstanceId, + payload.Instance.Region); + } +} +``` + +## TransportServerHost + +```csharp +internal sealed class TransportServerHost : IHostedService +{ + private readonly ITransportServer _server; + private readonly IConnectionHandler _handler; + + public async Task StartAsync(CancellationToken cancellationToken) + { + _server.OnConnection += HandleNewConnection; + _server.OnFrame += HandleFrame; + _server.OnDisconnect += HandleDisconnect; + + await _server.StartAsync(cancellationToken); + } + + private void HandleNewConnection(string connectionId) + { + _logger.LogInformation("New connection: {ConnectionId}", connectionId); + } + + private async Task HandleFrame(string connectionId, Frame frame) + { + await _handler.HandleFrameAsync(connectionId, frame); + } + + private void HandleDisconnect(string connectionId) + { + _routingState.RemoveConnection(connectionId); + _logger.LogInformation("Connection closed: {ConnectionId}", connectionId); + } +} +``` + +## Multiple Instances + +The gateway must handle multiple instances of the same service: +- Same ServiceName + Version from different InstanceIds +- Each instance has its own ConnectionState +- Routing algorithm selects among available instances + +``` +Service: billing v1.0.0 +├── Instance: billing-01 (Region: eu1) → Connection abc123 +├── Instance: billing-02 (Region: eu1) → Connection def456 +└── Instance: billing-03 (Region: us1) → Connection ghi789 +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] HELLO frames processed correctly +2. [ ] ConnectionState created and stored +3. [ ] Endpoint index updated for routing lookups +4. [ ] Connection cleanup removes all state +5. [ ] TransportServerHost starts/stops with application +6. [ ] Integration tests: SDK registers, Gateway routes, SDK handles request + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Initial health status is `Unknown` until first heartbeat +- Connection ID format: GUID for InMemory, transport-specific for real transports +- HELLO validation failure disconnects the client (logs error) +- Duplicate HELLO from same connection replaces existing state (re-registration) diff --git a/docs/router/SPRINT_7000_0005_0001_heartbeat_health.md b/docs/router/SPRINT_7000_0005_0001_heartbeat_health.md new file mode 100644 index 000000000..b41a48329 --- /dev/null +++ b/docs/router/SPRINT_7000_0005_0001_heartbeat_health.md @@ -0,0 +1,205 @@ +# Sprint 7000-0005-0001 · Protocol Features · Heartbeat & Health + +## Topic & Scope + +Implement heartbeat processing and health tracking. Microservices send HEARTBEAT frames periodically; the gateway updates health status and marks stale instances as unhealthy. + +**Goal:** Gateway maintains accurate health status for all connected instances, enabling health-aware routing. + +**Working directories:** +- `src/__Libraries/StellaOps.Microservice/` (heartbeat sending) +- `src/Gateway/StellaOps.Gateway.WebService/` (heartbeat processing) +- `src/__Libraries/StellaOps.Router.Common/` (if payload changes needed) + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0004_0003 (Gateway connection handling), SPRINT_7000_0003_0001 (SDK core) +- **Downstream:** SPRINT_7000_0005_0002 (routing algorithm uses health) +- **Parallel work:** None. Sequential after connection handling. +- **Cross-module impact:** SDK and Gateway both modified. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 8 - Control/health/ping requirements) +- `docs/router/06-Step.md` (heartbeat section) +- `docs/router/implplan.md` (phase 6 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Working Directory | +|---|---------|--------|-------------|-------------------| +| 1 | HB-001 | TODO | Implement HeartbeatPayload serialization | Common | +| 2 | HB-002 | TODO | Add InstanceHealthStatus to HeartbeatPayload | Common | +| 3 | HB-003 | TODO | Add optional metrics to HeartbeatPayload (inflight count, error rate) | Common | +| 4 | HB-010 | TODO | Implement heartbeat sending timer in SDK | Microservice | +| 5 | HB-011 | TODO | Report current health status in heartbeat | Microservice | +| 6 | HB-012 | TODO | Report optional metrics in heartbeat | Microservice | +| 7 | HB-013 | TODO | Make heartbeat interval configurable | Microservice | +| 8 | HB-020 | TODO | Implement HEARTBEAT frame processing in Gateway | Gateway | +| 9 | HB-021 | TODO | Update LastHeartbeatUtc on heartbeat | Gateway | +| 10 | HB-022 | TODO | Update InstanceHealthStatus from payload | Gateway | +| 11 | HB-023 | TODO | Update optional metrics from payload | Gateway | +| 12 | HB-030 | TODO | Create HealthMonitorService hosted service | Gateway | +| 13 | HB-031 | TODO | Implement stale heartbeat detection | Configurable threshold | +| 14 | HB-032 | TODO | Mark instances Unhealthy when heartbeat stale | Gateway | +| 15 | HB-033 | TODO | Implement Draining status support | For graceful shutdown | +| 16 | HB-040 | TODO | Create HealthOptions for thresholds | StaleThreshold, DegradedThreshold | +| 17 | HB-041 | TODO | Bind HealthOptions from configuration | Gateway | +| 18 | HB-050 | TODO | Implement ping latency measurement (request/response timing) | Gateway | +| 19 | HB-051 | TODO | Update AveragePingMs from timing | Exponential moving average | +| 20 | HB-060 | TODO | Write integration tests for heartbeat flow | | +| 21 | HB-061 | TODO | Write tests for health status transitions | | +| 22 | HB-062 | TODO | Write tests for stale detection | | + +## HeartbeatPayload + +```csharp +public sealed class HeartbeatPayload +{ + public string InstanceId { get; init; } = string.Empty; + public InstanceHealthStatus Status { get; init; } + public int? InflightRequestCount { get; init; } + public double? ErrorRatePercent { get; init; } + public DateTimeOffset Timestamp { get; init; } +} +``` + +## Health Status Transitions + +``` + ┌─────────┐ + First │ Unknown │ + Heartbeat └────┬────┘ + │ Status from payload + ▼ + ┌─────────┐ + ◄────────────────│ Healthy │◄───────────────┐ + │ Degraded └────┬────┘ Healthy │ + │ in payload │ │ + ▼ │ Stale threshold │ + ┌──────────┐ │ exceeded │ + │ Degraded │ ▼ │ + └────┬─────┘ ┌───────────┐ │ + │ │ Unhealthy │───────────────┘ + │ Stale └───────────┘ Heartbeat + │ threshold received + ▼ + ┌───────────┐ + │ Unhealthy │ + └───────────┘ +``` + +**Special case: Draining** +- Microservice explicitly sets status to `Draining` +- Router stops sending new requests but allows in-flight to complete +- Used for graceful shutdown + +## HealthMonitorService + +```csharp +internal sealed class HealthMonitorService : BackgroundService +{ + private readonly IGlobalRoutingState _routingState; + private readonly IOptions _options; + + protected override async Task ExecuteAsync(CancellationToken stoppingToken) + { + var interval = TimeSpan.FromSeconds(5); // Check frequency + + while (!stoppingToken.IsCancellationRequested) + { + CheckStaleConnections(); + await Task.Delay(interval, stoppingToken); + } + } + + private void CheckStaleConnections() + { + var threshold = _options.Value.StaleThreshold; + var now = DateTime.UtcNow; + + foreach (var connection in _routingState.GetAllConnections()) + { + var age = now - connection.LastHeartbeatUtc; + if (age > threshold && connection.Status != InstanceHealthStatus.Unhealthy) + { + _routingState.UpdateConnection(connection.ConnectionId, + c => c.Status = InstanceHealthStatus.Unhealthy); + _logger.LogWarning( + "Instance {InstanceId} marked Unhealthy: no heartbeat for {Age}", + connection.Instance.InstanceId, age); + } + } + } +} +``` + +## HealthOptions + +```csharp +public sealed class HealthOptions +{ + public TimeSpan StaleThreshold { get; set; } = TimeSpan.FromSeconds(30); + public TimeSpan DegradedThreshold { get; set; } = TimeSpan.FromSeconds(15); + public int PingHistorySize { get; set; } = 10; // For moving average +} +``` + +## Ping Latency Measurement + +Measure round-trip time for REQUEST/RESPONSE: +1. Record timestamp when REQUEST frame sent +2. Record timestamp when RESPONSE frame received +3. Calculate RTT = response_time - request_time +4. Update exponential moving average: `avg = 0.8 * avg + 0.2 * rtt` + +```csharp +internal sealed class PingTracker +{ + private readonly ConcurrentDictionary _pendingRequests = new(); + private double _averagePingMs; + + public void RecordRequestSent(Guid correlationId) + { + _pendingRequests[correlationId] = Stopwatch.GetTimestamp(); + } + + public void RecordResponseReceived(Guid correlationId) + { + if (_pendingRequests.TryRemove(correlationId, out var startTicks)) + { + var elapsed = Stopwatch.GetElapsedTime(startTicks); + var rtt = elapsed.TotalMilliseconds; + _averagePingMs = 0.8 * _averagePingMs + 0.2 * rtt; + } + } + + public double AveragePingMs => _averagePingMs; +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] SDK sends HEARTBEAT frames on timer +2. [ ] Gateway processes HEARTBEAT and updates ConnectionState +3. [ ] HealthMonitorService marks stale instances Unhealthy +4. [ ] Draining status stops new requests +5. [ ] Ping latency measured and stored +6. [ ] Health thresholds configurable +7. [ ] Integration tests pass + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Heartbeat interval default: 10 seconds (configurable) +- Stale threshold default: 30 seconds (3 missed heartbeats) +- Ping measurement uses REQUEST/RESPONSE timing, not separate PING frames +- Health status changes are logged for observability diff --git a/docs/router/SPRINT_7000_0005_0002_routing_algorithm.md b/docs/router/SPRINT_7000_0005_0002_routing_algorithm.md new file mode 100644 index 000000000..c51dfc574 --- /dev/null +++ b/docs/router/SPRINT_7000_0005_0002_routing_algorithm.md @@ -0,0 +1,217 @@ +# Sprint 7000-0005-0002 · Protocol Features · Full Routing Algorithm + +## Topic & Scope + +Implement the complete routing algorithm as specified: region preference, ping-based selection, heartbeat recency, and fallback logic. + +**Goal:** Routes prefer closest healthy instances with lowest latency, falling back through region tiers when necessary. + +**Working directory:** `src/Gateway/StellaOps.Gateway.WebService/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0005_0001 (heartbeat/health provides the metrics) +- **Downstream:** SPRINT_7000_0005_0003 (cancellation), SPRINT_7000_0006_* (real transports) +- **Parallel work:** None. Sequential. +- **Cross-module impact:** Gateway only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 4 - Routing algorithm / instance selection) +- `docs/router/06-Step.md` (routing algorithm section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | RTG-001 | TODO | Implement full filter chain in DefaultRoutingPlugin | | +| 2 | RTG-002 | TODO | Filter by ServiceName (exact match) | | +| 3 | RTG-003 | TODO | Filter by Version (strict semver equality) | | +| 4 | RTG-004 | TODO | Filter by Health (Healthy or Degraded only) | | +| 5 | RTG-010 | TODO | Implement region tier logic | | +| 6 | RTG-011 | TODO | Tier 0: Same region as gateway | GatewayNodeConfig.Region | +| 7 | RTG-012 | TODO | Tier 1: Configured neighbor regions | NeighborRegions | +| 8 | RTG-013 | TODO | Tier 2: All other regions | Fallback | +| 9 | RTG-020 | TODO | Implement instance scoring within tier | | +| 10 | RTG-021 | TODO | Primary sort: lower AveragePingMs | | +| 11 | RTG-022 | TODO | Secondary sort: more recent LastHeartbeatUtc | | +| 12 | RTG-023 | TODO | Tie-breaker: random or round-robin | Configurable | +| 13 | RTG-030 | TODO | Implement fallback decision order | | +| 14 | RTG-031 | TODO | Fallback 1: Greater ping (latency) | | +| 15 | RTG-032 | TODO | Fallback 2: Greater heartbeat age | | +| 16 | RTG-033 | TODO | Fallback 3: Less preferred region tier | | +| 17 | RTG-040 | TODO | Create RoutingOptions for algorithm tuning | | +| 18 | RTG-041 | TODO | Add default version configuration | Per service | +| 19 | RTG-042 | TODO | Add health status acceptance set | | +| 20 | RTG-050 | TODO | Write unit tests for each filter | | +| 21 | RTG-051 | TODO | Write unit tests for region tier logic | | +| 22 | RTG-052 | TODO | Write unit tests for scoring and tie-breaking | | +| 23 | RTG-053 | TODO | Write integration tests for routing decisions | | + +## Routing Algorithm + +``` +Input: (ServiceName, Version, Method, Path) +Output: ConnectionState or null + +1. Get all connections from IGlobalRoutingState.GetConnectionsFor(...) + +2. Filter by ServiceName + - connections.Where(c => c.Instance.ServiceName == serviceName) + +3. Filter by Version (strict semver equality) + - connections.Where(c => c.Instance.Version == version) + - If version not specified, use DefaultVersion from config + +4. Filter by Health + - connections.Where(c => c.Status in {Healthy, Degraded}) + - Exclude Unknown, Draining, Unhealthy + +5. Group by Region Tier + - Tier 0: c.Instance.Region == GatewayNodeConfig.Region + - Tier 1: c.Instance.Region in GatewayNodeConfig.NeighborRegions + - Tier 2: All others + +6. For each tier (0, 1, 2), if any candidates exist: + a. Sort by AveragePingMs (ascending) + b. For ties, sort by LastHeartbeatUtc (descending = more recent first) + c. For remaining ties, apply tie-breaker (random or round-robin) + d. Return first candidate + +7. If no candidates in any tier, return null (503) +``` + +## Implementation + +```csharp +public class DefaultRoutingPlugin : IRoutingPlugin +{ + public async Task ChooseInstanceAsync( + RoutingContext context, CancellationToken cancellationToken) + { + var endpoint = context.Endpoint; + var gatewayRegion = context.GatewayRegion; + + // Get all matching connections + var connections = _routingState.GetConnectionsFor( + endpoint.ServiceName, + endpoint.Version, + endpoint.Method, + endpoint.Path); + + // Filter by health + var healthy = connections + .Where(c => c.Status is InstanceHealthStatus.Healthy + or InstanceHealthStatus.Degraded) + .ToList(); + + if (healthy.Count == 0) + return null; + + // Group by region tier + var tier0 = healthy.Where(c => c.Instance.Region == gatewayRegion).ToList(); + var tier1 = healthy.Where(c => + _options.NeighborRegions.Contains(c.Instance.Region)).ToList(); + var tier2 = healthy.Except(tier0).Except(tier1).ToList(); + + // Select from best tier + var selected = SelectFromTier(tier0) + ?? SelectFromTier(tier1) + ?? SelectFromTier(tier2); + + if (selected == null) + return null; + + return new RoutingDecision + { + Endpoint = endpoint, + Connection = selected, + TransportType = selected.TransportType, + EffectiveTimeout = endpoint.DefaultTimeout + }; + } + + private ConnectionState? SelectFromTier(List tier) + { + if (tier.Count == 0) + return null; + + // Sort by ping (asc), then heartbeat (desc) + var sorted = tier + .OrderBy(c => c.AveragePingMs) + .ThenByDescending(c => c.LastHeartbeatUtc) + .ToList(); + + // Tie-breaker for same ping and heartbeat + var best = sorted.First(); + var tied = sorted.TakeWhile(c => + Math.Abs(c.AveragePingMs - best.AveragePingMs) < 0.1 + && c.LastHeartbeatUtc == best.LastHeartbeatUtc).ToList(); + + if (tied.Count == 1) + return tied[0]; + + // Round-robin or random for ties + return _options.TieBreaker == TieBreakerMode.Random + ? tied[Random.Shared.Next(tied.Count)] + : tied[_roundRobinCounter++ % tied.Count]; + } +} +``` + +## RoutingOptions + +```csharp +public sealed class RoutingOptions +{ + public Dictionary DefaultVersions { get; set; } = new(); + public HashSet AcceptableStatuses { get; set; } + = new() { InstanceHealthStatus.Healthy, InstanceHealthStatus.Degraded }; + public TieBreakerMode TieBreaker { get; set; } = TieBreakerMode.RoundRobin; +} + +public enum TieBreakerMode +{ + Random, + RoundRobin +} +``` + +## Spec Compliance Verification + +From specs.md section 4: +> * Region: +> * Prefer instances whose `Region == GatewayNodeConfig.Region`. +> * If none, fall back to configured neighbor regions. +> * If none, fall back to all other regions. +> * Within a chosen region tier: +> * Prefer lower `AveragePingMs`. +> * If several are tied, prefer more recent `LastHeartbeatUtc`. +> * If still tied, use a balancing strategy (e.g. random or round-robin). + +Implementation must match exactly. + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] Full filter chain implemented (service, version, health) +2. [ ] Region tier logic works (same region → neighbors → others) +3. [ ] Scoring within tier (ping, heartbeat, tie-breaker) +4. [ ] RoutingOptions configurable +5. [ ] All unit tests pass +6. [ ] Integration tests verify routing decisions + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Ping tolerance for "ties": 0.1ms difference considered equal +- Round-robin counter is per-endpoint to avoid hot instances +- DefaultVersion lookup is per-service from configuration +- Degraded instances are routed to (may want to prefer Healthy first) diff --git a/docs/router/SPRINT_7000_0005_0003_cancellation.md b/docs/router/SPRINT_7000_0005_0003_cancellation.md new file mode 100644 index 000000000..c4b9887da --- /dev/null +++ b/docs/router/SPRINT_7000_0005_0003_cancellation.md @@ -0,0 +1,230 @@ +# Sprint 7000-0005-0003 · Protocol Features · Cancellation Semantics + +## Topic & Scope + +Implement cancellation semantics on both gateway and microservice sides. When HTTP clients disconnect, timeouts occur, or payload limits are breached, CANCEL frames are sent to stop in-flight work. + +**Goal:** Clean cancellation propagation from HTTP client through gateway to microservice handlers. + +**Working directories:** +- `src/Gateway/StellaOps.Gateway.WebService/` (send CANCEL) +- `src/__Libraries/StellaOps.Microservice/` (receive CANCEL, cancel handler) +- `src/__Libraries/StellaOps.Router.Common/` (CancelPayload) + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0005_0002 (routing algorithm complete) +- **Downstream:** SPRINT_7000_0005_0004 (streaming uses cancellation) +- **Parallel work:** None. Sequential. +- **Cross-module impact:** SDK and Gateway both modified. + +## Documentation Prerequisites + +- `docs/router/specs.md` (sections 7.6, 10 - Cancellation requirements) +- `docs/router/07-Step.md` (cancellation section) +- `docs/router/implplan.md` (phase 7 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Working Directory | +|---|---------|--------|-------------|-------------------| +| 1 | CAN-001 | TODO | Define CancelPayload with Reason code | Common | +| 2 | CAN-002 | TODO | Define cancel reason constants | ClientDisconnected, Timeout, PayloadLimitExceeded, Shutdown | +| 3 | CAN-010 | TODO | Implement CANCEL frame sending in gateway | Gateway | +| 4 | CAN-011 | TODO | Wire HttpContext.RequestAborted to CANCEL | Gateway | +| 5 | CAN-012 | TODO | Implement timeout-triggered CANCEL | Gateway | +| 6 | CAN-013 | TODO | Implement payload-limit-triggered CANCEL | Gateway | +| 7 | CAN-014 | TODO | Implement shutdown-triggered CANCEL for in-flight | Gateway | +| 8 | CAN-020 | TODO | Stop forwarding REQUEST_STREAM_DATA after CANCEL | Gateway | +| 9 | CAN-021 | TODO | Ignore late RESPONSE frames for cancelled requests | Gateway | +| 10 | CAN-022 | TODO | Log cancelled requests with reason | Gateway | +| 11 | CAN-030 | TODO | Implement inflight request tracking in SDK | Microservice | +| 12 | CAN-031 | TODO | Create ConcurrentDictionary | Microservice | +| 13 | CAN-032 | TODO | Add handler task to tracking map | Microservice | +| 14 | CAN-033 | TODO | Implement CANCEL frame processing | Microservice | +| 15 | CAN-034 | TODO | Call cts.Cancel() on CANCEL frame | Microservice | +| 16 | CAN-035 | TODO | Remove from tracking when handler completes | Microservice | +| 17 | CAN-040 | TODO | Implement connection-close cancellation | Microservice | +| 18 | CAN-041 | TODO | Cancel all inflight on connection loss | Microservice | +| 19 | CAN-050 | TODO | Pass CancellationToken to handler interfaces | Microservice | +| 20 | CAN-051 | TODO | Document cancellation best practices for handlers | Docs | +| 21 | CAN-060 | TODO | Write integration tests: client disconnect → handler cancelled | | +| 22 | CAN-061 | TODO | Write integration tests: timeout → handler cancelled | | +| 23 | CAN-062 | TODO | Write tests: late response ignored | | + +## CancelPayload + +```csharp +public sealed class CancelPayload +{ + public string Reason { get; init; } = string.Empty; +} + +public static class CancelReasons +{ + public const string ClientDisconnected = "ClientDisconnected"; + public const string Timeout = "Timeout"; + public const string PayloadLimitExceeded = "PayloadLimitExceeded"; + public const string Shutdown = "Shutdown"; +} +``` + +## Gateway-Side: Sending CANCEL + +### On Client Disconnect +```csharp +// In TransportDispatchMiddleware +context.RequestAborted.Register(async () => +{ + await transport.SendCancelAsync( + connection, + correlationId, + CancelReasons.ClientDisconnected); +}); +``` + +### On Timeout +```csharp +using var cts = CancellationTokenSource.CreateLinkedTokenSource(context.RequestAborted); +cts.CancelAfter(decision.EffectiveTimeout); + +try +{ + var response = await transport.SendRequestAsync(..., cts.Token); +} +catch (OperationCanceledException) when (cts.IsCancellationRequested) +{ + if (!context.RequestAborted.IsCancellationRequested) + { + // Timeout, not client disconnect + await transport.SendCancelAsync(connection, correlationId, CancelReasons.Timeout); + context.Response.StatusCode = 504; + return; + } +} +``` + +### Late Response Handling +```csharp +private readonly ConcurrentDictionary _cancelledRequests = new(); + +public void MarkCancelled(Guid correlationId) +{ + _cancelledRequests[correlationId] = true; +} + +public bool IsCancelled(Guid correlationId) +{ + return _cancelledRequests.ContainsKey(correlationId); +} + +// When response arrives +if (IsCancelled(frame.CorrelationId)) +{ + _logger.LogDebug("Ignoring late response for cancelled {CorrelationId}", frame.CorrelationId); + return; // Discard +} +``` + +## Microservice-Side: Receiving CANCEL + +### Inflight Tracking +```csharp +internal sealed class InflightRequestTracker +{ + private readonly ConcurrentDictionary _inflight = new(); + + public CancellationToken Track(Guid correlationId, Task handlerTask) + { + var cts = new CancellationTokenSource(); + _inflight[correlationId] = new InflightRequest(cts, handlerTask); + return cts.Token; + } + + public void Cancel(Guid correlationId, string reason) + { + if (_inflight.TryGetValue(correlationId, out var request)) + { + request.Cts.Cancel(); + _logger.LogInformation("Cancelled {CorrelationId}: {Reason}", correlationId, reason); + } + } + + public void Complete(Guid correlationId) + { + if (_inflight.TryRemove(correlationId, out var request)) + { + request.Cts.Dispose(); + } + } + + public void CancelAll(string reason) + { + foreach (var kvp in _inflight) + { + kvp.Value.Cts.Cancel(); + } + _inflight.Clear(); + } +} +``` + +### Connection-Close Handling +```csharp +// When connection closes unexpectedly +_inflightTracker.CancelAll("ConnectionClosed"); +``` + +## Handler Cancellation Guidelines + +Handlers MUST: +1. Accept `CancellationToken` parameter +2. Pass token to all async I/O operations +3. Check `token.IsCancellationRequested` in loops +4. Stop work promptly when cancelled + +```csharp +public class ProcessDataEndpoint : IStellaEndpoint +{ + public async Task HandleAsync(DataRequest request, CancellationToken ct) + { + // Pass token to I/O + var data = await _database.QueryAsync(request.Id, ct); + + // Check in loops + foreach (var item in data) + { + ct.ThrowIfCancellationRequested(); + await ProcessItemAsync(item, ct); + } + + return new DataResponse { ... }; + } +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] CANCEL frames sent on client disconnect +2. [ ] CANCEL frames sent on timeout +3. [ ] SDK tracks inflight requests with CTS +4. [ ] SDK cancels handlers on CANCEL frame +5. [ ] Connection close cancels all inflight +6. [ ] Late responses are ignored/logged +7. [ ] Integration tests verify cancellation flow + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Cancellation is cooperative; handlers must honor the token +- CTS disposal happens on completion to avoid leaks +- Late response cleanup: entries expire after 60 seconds +- Shutdown CANCEL is best-effort (connections may close first) diff --git a/docs/router/SPRINT_7000_0005_0004_streaming.md b/docs/router/SPRINT_7000_0005_0004_streaming.md new file mode 100644 index 000000000..9324fc867 --- /dev/null +++ b/docs/router/SPRINT_7000_0005_0004_streaming.md @@ -0,0 +1,215 @@ +# Sprint 7000-0005-0004 · Protocol Features · Streaming Support + +## Topic & Scope + +Implement streaming request/response support. Large payloads stream through the gateway as `REQUEST_STREAM_DATA` and `RESPONSE_STREAM_DATA` frames rather than being fully buffered. + +**Goal:** Enable large file uploads/downloads without memory exhaustion at gateway. + +**Working directories:** +- `src/Gateway/StellaOps.Gateway.WebService/` (streaming dispatch) +- `src/__Libraries/StellaOps.Microservice/` (streaming handlers) +- `src/__Libraries/StellaOps.Router.Transport.InMemory/` (streaming frames) + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0005_0003 (cancellation - streaming needs cancel support) +- **Downstream:** SPRINT_7000_0005_0005 (payload limits) +- **Parallel work:** None. Sequential. +- **Cross-module impact:** SDK, Gateway, InMemory transport all modified. + +## Documentation Prerequisites + +- `docs/router/specs.md` (sections 5.4, 6.3, 7.5 - Streaming requirements) +- `docs/router/08-Step.md` (streaming section) +- `docs/router/implplan.md` (phase 8 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Working Directory | +|---|---------|--------|-------------|-------------------| +| 1 | STR-001 | TODO | Add SupportsStreaming flag to EndpointDescriptor | Common | +| 2 | STR-002 | TODO | Add streaming attribute support to [StellaEndpoint] | Common | +| 3 | STR-010 | TODO | Implement REQUEST_STREAM_DATA frame handling in transport | InMemory | +| 4 | STR-011 | TODO | Implement RESPONSE_STREAM_DATA frame handling in transport | InMemory | +| 5 | STR-012 | TODO | Implement end-of-stream signaling | InMemory | +| 6 | STR-020 | TODO | Implement streaming request dispatch in gateway | Gateway | +| 7 | STR-021 | TODO | Pipe HTTP body stream → REQUEST_STREAM_DATA frames | Gateway | +| 8 | STR-022 | TODO | Implement chunking for stream data | Configurable chunk size | +| 9 | STR-023 | TODO | Honor cancellation during streaming | Gateway | +| 10 | STR-030 | TODO | Implement streaming response handling in gateway | Gateway | +| 11 | STR-031 | TODO | Pipe RESPONSE_STREAM_DATA frames → HTTP response | Gateway | +| 12 | STR-032 | TODO | Set chunked transfer encoding | Gateway | +| 13 | STR-040 | TODO | Implement streaming body in RawRequestContext | Microservice | +| 14 | STR-041 | TODO | Expose Body as async-readable stream | Microservice | +| 15 | STR-042 | TODO | Implement backpressure (slow consumer) | Microservice | +| 16 | STR-050 | TODO | Implement streaming response writing | Microservice | +| 17 | STR-051 | TODO | Expose WriteBodyAsync for streaming output | Microservice | +| 18 | STR-052 | TODO | Chunk output into RESPONSE_STREAM_DATA frames | Microservice | +| 19 | STR-060 | TODO | Implement IRawStellaEndpoint streaming pattern | Microservice | +| 20 | STR-061 | TODO | Document streaming handler guidelines | Docs | +| 21 | STR-070 | TODO | Write integration tests for upload streaming | | +| 22 | STR-071 | TODO | Write integration tests for download streaming | | +| 23 | STR-072 | TODO | Write tests for cancellation during streaming | | + +## Streaming Frame Protocol + +### Request Streaming +``` +Gateway → Microservice: +1. REQUEST frame (headers, method, path, CorrelationId) +2. REQUEST_STREAM_DATA frame (chunk 1) +3. REQUEST_STREAM_DATA frame (chunk 2) +... +N. REQUEST_STREAM_DATA frame (final chunk, EndOfStream=true) +``` + +### Response Streaming +``` +Microservice → Gateway: +1. RESPONSE frame (status code, headers, CorrelationId) +2. RESPONSE_STREAM_DATA frame (chunk 1) +3. RESPONSE_STREAM_DATA frame (chunk 2) +... +N. RESPONSE_STREAM_DATA frame (final chunk, EndOfStream=true) +``` + +## StreamDataPayload + +```csharp +public sealed class StreamDataPayload +{ + public Guid CorrelationId { get; init; } + public byte[] Data { get; init; } = Array.Empty(); + public bool EndOfStream { get; init; } + public int SequenceNumber { get; init; } +} +``` + +## Gateway Streaming Dispatch + +```csharp +// In TransportDispatchMiddleware +if (endpoint.SupportsStreaming) +{ + await DispatchStreamingAsync(context, transport, decision, cancellationToken); +} +else +{ + await DispatchBufferedAsync(context, transport, decision, cancellationToken); +} + +private async Task DispatchStreamingAsync(...) +{ + // Send REQUEST header + var requestFrame = BuildRequestHeaderFrame(context); + await transport.SendFrameAsync(connection, requestFrame, ct); + + // Stream body chunks + var buffer = new byte[_options.StreamChunkSize]; + int bytesRead; + int sequence = 0; + + while ((bytesRead = await context.Request.Body.ReadAsync(buffer, ct)) > 0) + { + var streamFrame = new Frame + { + Type = FrameType.RequestStreamData, + CorrelationId = requestFrame.CorrelationId, + Payload = SerializeStreamData(buffer[..bytesRead], sequence++, endOfStream: false) + }; + await transport.SendFrameAsync(connection, streamFrame, ct); + } + + // Send end-of-stream + var endFrame = new Frame + { + Type = FrameType.RequestStreamData, + CorrelationId = requestFrame.CorrelationId, + Payload = SerializeStreamData(Array.Empty(), sequence, endOfStream: true) + }; + await transport.SendFrameAsync(connection, endFrame, ct); + + // Receive response (streaming or buffered) + await ReceiveResponseAsync(context, transport, connection, requestFrame.CorrelationId, ct); +} +``` + +## Microservice Streaming Handler + +```csharp +[StellaEndpoint("POST", "/files/upload", SupportsStreaming = true)] +public class FileUploadEndpoint : IRawStellaEndpoint +{ + public async Task HandleAsync(RawRequestContext context, CancellationToken ct) + { + // Body is a stream that reads from REQUEST_STREAM_DATA frames + var tempPath = Path.GetTempFileName(); + + await using var fileStream = File.Create(tempPath); + await context.Body.CopyToAsync(fileStream, ct); + + return RawResponse.Ok($"Uploaded {fileStream.Length} bytes"); + } +} + +[StellaEndpoint("GET", "/files/{id}/download", SupportsStreaming = true)] +public class FileDownloadEndpoint : IRawStellaEndpoint +{ + public async Task HandleAsync(RawRequestContext context, CancellationToken ct) + { + var fileId = context.PathParameters["id"]; + var filePath = _storage.GetPath(fileId); + + // Return streaming response + return new RawResponse + { + StatusCode = 200, + Body = File.OpenRead(filePath), // Stream, not buffered + Headers = new HeaderCollection + { + ["Content-Type"] = "application/octet-stream" + } + }; + } +} +``` + +## StreamingOptions + +```csharp +public sealed class StreamingOptions +{ + public int ChunkSize { get; set; } = 64 * 1024; // 64KB default + public int MaxConcurrentStreams { get; set; } = 100; + public TimeSpan StreamIdleTimeout { get; set; } = TimeSpan.FromMinutes(5); +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] REQUEST_STREAM_DATA frames implemented in transport +2. [ ] RESPONSE_STREAM_DATA frames implemented in transport +3. [ ] Gateway streams request body to microservice +4. [ ] Gateway streams response body to HTTP client +5. [ ] SDK exposes streaming Body in RawRequestContext +6. [ ] SDK can write streaming response +7. [ ] Cancellation works during streaming +8. [ ] Integration tests for upload and download streaming + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Default chunk size: 64KB (tunable) +- End-of-stream is explicit frame, not connection close +- Backpressure via channel capacity (bounded channels) +- Idle timeout cancels stuck streams +- Typed handlers don't support streaming (use IRawStellaEndpoint) diff --git a/docs/router/SPRINT_7000_0005_0005_payload_limits.md b/docs/router/SPRINT_7000_0005_0005_payload_limits.md new file mode 100644 index 000000000..dca9e5f20 --- /dev/null +++ b/docs/router/SPRINT_7000_0005_0005_payload_limits.md @@ -0,0 +1,231 @@ +# Sprint 7000-0005-0005 · Protocol Features · Payload Limits + +## Topic & Scope + +Implement payload size limits to protect the gateway from memory exhaustion. Enforce limits per-request, per-connection, and aggregate across all connections. + +**Goal:** Gateway rejects oversized payloads early and cancels streams that exceed limits mid-flight. + +**Working directory:** `src/Gateway/StellaOps.Gateway.WebService/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0005_0004 (streaming - limits apply to streams) +- **Downstream:** SPRINT_7000_0006_* (real transports) +- **Parallel work:** None. Sequential. +- **Cross-module impact:** Gateway only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 6.5 - Payload and memory protection) +- `docs/router/08-Step.md` (payload limits section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | LIM-001 | TODO | Implement PayloadLimitsMiddleware | Before dispatch | +| 2 | LIM-002 | TODO | Check Content-Length header against MaxRequestBytesPerCall | | +| 3 | LIM-003 | TODO | Return 413 for oversized Content-Length | Early rejection | +| 4 | LIM-010 | TODO | Implement per-request byte counter | | +| 5 | LIM-011 | TODO | Track bytes read during streaming | | +| 6 | LIM-012 | TODO | Abort when MaxRequestBytesPerCall exceeded mid-stream | | +| 7 | LIM-013 | TODO | Send CANCEL frame on limit breach | | +| 8 | LIM-020 | TODO | Implement per-connection byte counter | | +| 9 | LIM-021 | TODO | Track total inflight bytes per connection | | +| 10 | LIM-022 | TODO | Throttle/reject when MaxRequestBytesPerConnection exceeded | | +| 11 | LIM-030 | TODO | Implement aggregate byte counter | | +| 12 | LIM-031 | TODO | Track total inflight bytes across all connections | | +| 13 | LIM-032 | TODO | Throttle/reject when MaxAggregateInflightBytes exceeded | | +| 14 | LIM-033 | TODO | Return 503 for aggregate limit | Service overloaded | +| 15 | LIM-040 | TODO | Implement ByteCountingStream wrapper | Counts bytes as they flow | +| 16 | LIM-041 | TODO | Wire counting stream into dispatch | | +| 17 | LIM-050 | TODO | Create PayloadLimitOptions | All three limits | +| 18 | LIM-051 | TODO | Bind PayloadLimitOptions from configuration | | +| 19 | LIM-060 | TODO | Log limit breaches with request details | | +| 20 | LIM-061 | TODO | Add metrics for payload tracking | Prometheus/OpenTelemetry | +| 21 | LIM-070 | TODO | Write tests for early rejection (Content-Length) | | +| 22 | LIM-071 | TODO | Write tests for mid-stream cancellation | | +| 23 | LIM-072 | TODO | Write tests for connection limit | | +| 24 | LIM-073 | TODO | Write tests for aggregate limit | | + +## PayloadLimits + +```csharp +public sealed class PayloadLimits +{ + public long MaxRequestBytesPerCall { get; set; } = 10 * 1024 * 1024; // 10 MB + public long MaxRequestBytesPerConnection { get; set; } = 100 * 1024 * 1024; // 100 MB + public long MaxAggregateInflightBytes { get; set; } = 1024 * 1024 * 1024; // 1 GB +} +``` + +## PayloadLimitsMiddleware + +```csharp +public class PayloadLimitsMiddleware +{ + public async Task InvokeAsync(HttpContext context, IPayloadTracker tracker) + { + // Early rejection for known Content-Length + if (context.Request.ContentLength.HasValue) + { + if (context.Request.ContentLength > _limits.MaxRequestBytesPerCall) + { + _logger.LogWarning("Request rejected: Content-Length {Length} exceeds limit {Limit}", + context.Request.ContentLength, _limits.MaxRequestBytesPerCall); + context.Response.StatusCode = 413; // Payload Too Large + await context.Response.WriteAsJsonAsync(new + { + error = "Payload Too Large", + maxBytes = _limits.MaxRequestBytesPerCall + }); + return; + } + } + + // Check aggregate capacity + if (!tracker.TryReserve(context.Request.ContentLength ?? 0)) + { + context.Response.StatusCode = 503; // Service Unavailable + await context.Response.WriteAsJsonAsync(new + { + error = "Service Overloaded", + message = "Too many concurrent requests" + }); + return; + } + + try + { + await _next(context); + } + finally + { + tracker.Release(/* bytes actually used */); + } + } +} +``` + +## IPayloadTracker + +```csharp +public interface IPayloadTracker +{ + bool TryReserve(long estimatedBytes); + void Release(long actualBytes); + long CurrentInflightBytes { get; } + bool IsOverloaded { get; } +} + +internal sealed class PayloadTracker : IPayloadTracker +{ + private long _totalInflightBytes; + private readonly ConcurrentDictionary _perConnectionBytes = new(); + + public bool TryReserve(long estimatedBytes) + { + var newTotal = Interlocked.Add(ref _totalInflightBytes, estimatedBytes); + if (newTotal > _limits.MaxAggregateInflightBytes) + { + Interlocked.Add(ref _totalInflightBytes, -estimatedBytes); + return false; + } + return true; + } + + public void Release(long actualBytes) + { + Interlocked.Add(ref _totalInflightBytes, -actualBytes); + } +} +``` + +## ByteCountingStream + +```csharp +internal sealed class ByteCountingStream : Stream +{ + private readonly Stream _inner; + private readonly long _limit; + private readonly Action _onLimitExceeded; + private long _bytesRead; + + public override async ValueTask ReadAsync(Memory buffer, CancellationToken ct) + { + var read = await _inner.ReadAsync(buffer, ct); + _bytesRead += read; + + if (_bytesRead > _limit) + { + _onLimitExceeded(); + throw new PayloadLimitExceededException(_bytesRead, _limit); + } + + return read; + } + + public long BytesRead => _bytesRead; +} +``` + +## Mid-Stream Limit Breach Flow + +``` +1. Streaming request begins +2. Gateway counts bytes as they flow through ByteCountingStream +3. When _bytesRead > MaxRequestBytesPerCall: + a. Stop reading from HTTP body + b. Send CANCEL frame with reason "PayloadLimitExceeded" + c. Return 413 to client + d. Log the incident with request details +``` + +## Configuration + +```json +{ + "PayloadLimits": { + "MaxRequestBytesPerCall": 10485760, + "MaxRequestBytesPerConnection": 104857600, + "MaxAggregateInflightBytes": 1073741824 + } +} +``` + +## Error Responses + +| Condition | HTTP Status | Error Message | +|-----------|-------------|---------------| +| Content-Length exceeds per-call limit | 413 | Payload Too Large | +| Streaming exceeds per-call limit | 413 | Payload Too Large | +| Per-connection limit exceeded | 429 | Too Many Requests | +| Aggregate limit exceeded | 503 | Service Overloaded | + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] Early rejection for known oversized Content-Length +2. [ ] Mid-stream cancellation when limit exceeded +3. [ ] CANCEL frame sent on limit breach +4. [ ] Per-connection tracking works +5. [ ] Aggregate tracking works +6. [ ] All limit scenarios tested +7. [ ] Metrics/logging in place + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Default limits are conservative; tune for your environment +- Per-connection limit applies to inflight bytes, not lifetime total +- Aggregate limit prevents memory exhaustion but may cause 503s under load +- ByteCountingStream adds minimal overhead +- Limit breach is logged at Warning level diff --git a/docs/router/SPRINT_7000_0006_0001_transport_tcp.md b/docs/router/SPRINT_7000_0006_0001_transport_tcp.md new file mode 100644 index 000000000..efa16e405 --- /dev/null +++ b/docs/router/SPRINT_7000_0006_0001_transport_tcp.md @@ -0,0 +1,231 @@ +# Sprint 7000-0006-0001 · Real Transports · TCP Plugin + +## Topic & Scope + +Implement the TCP transport plugin. This is the primary production transport with length-prefixed framing for reliable frame delivery. + +**Goal:** Replace InMemory transport with production-grade TCP transport. + +**Working directory:** `src/__Libraries/StellaOps.Router.Transport.Tcp/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0005_0005 (all protocol features proven with InMemory) +- **Downstream:** SPRINT_7000_0006_0002 (TLS wraps TCP) +- **Parallel work:** None initially; UDP and RabbitMQ can start after TCP basics work +- **Cross-module impact:** None. New library only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 5 - Transport plugin requirements) +- `docs/router/09-Step.md` (TCP transport section) +- `docs/router/implplan.md` (phase 9 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | TCP-001 | TODO | Create `StellaOps.Router.Transport.Tcp` classlib project | Add to solution | +| 2 | TCP-002 | TODO | Add project reference to Router.Common | | +| 3 | TCP-010 | TODO | Implement `TcpTransportServer` : `ITransportServer` | Gateway side | +| 4 | TCP-011 | TODO | Implement TCP listener with configurable bind address/port | | +| 5 | TCP-012 | TODO | Implement connection accept loop | One connection per microservice | +| 6 | TCP-013 | TODO | Implement connection ID generation | Based on endpoint | +| 7 | TCP-020 | TODO | Implement `TcpTransportClient` : `ITransportClient` | Microservice side | +| 8 | TCP-021 | TODO | Implement connection establishment | With retry | +| 9 | TCP-022 | TODO | Implement reconnection on failure | Exponential backoff | +| 10 | TCP-030 | TODO | Implement length-prefixed framing protocol | | +| 11 | TCP-031 | TODO | Frame format: [4-byte length][payload] | Big-endian length | +| 12 | TCP-032 | TODO | Implement frame reader (async, streaming) | | +| 13 | TCP-033 | TODO | Implement frame writer (async, thread-safe) | | +| 14 | TCP-040 | TODO | Implement frame multiplexing | Multiple correlations on one socket | +| 15 | TCP-041 | TODO | Route responses by CorrelationId | | +| 16 | TCP-042 | TODO | Handle out-of-order responses | | +| 17 | TCP-050 | TODO | Implement keep-alive/ping at TCP level | | +| 18 | TCP-051 | TODO | Detect dead connections | | +| 19 | TCP-052 | TODO | Clean up on connection loss | | +| 20 | TCP-060 | TODO | Create TcpTransportOptions | BindAddress, Port, BufferSize | +| 21 | TCP-061 | TODO | Create DI registration `AddTcpTransport()` | | +| 22 | TCP-070 | TODO | Write integration tests with real sockets | | +| 23 | TCP-071 | TODO | Write tests for reconnection | | +| 24 | TCP-072 | TODO | Write tests for multiplexing | | +| 25 | TCP-073 | TODO | Write load tests | Concurrent requests | + +## Frame Format + +``` +┌─────────────────────────────────────────────────────────────┐ +│ 4 bytes (big-endian) │ N bytes (payload) │ +│ Payload Length │ [FrameType][CorrelationId][Data] │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Payload Structure +``` +Byte 0: FrameType (1 byte enum value) +Bytes 1-16: CorrelationId (16 bytes GUID) +Bytes 17+: Frame-specific data +``` + +## TcpTransportServer + +```csharp +public sealed class TcpTransportServer : ITransportServer, IAsyncDisposable +{ + private TcpListener? _listener; + private readonly ConcurrentDictionary _connections = new(); + + public async Task StartAsync(CancellationToken ct) + { + _listener = new TcpListener(_options.BindAddress, _options.Port); + _listener.Start(); + + _ = AcceptLoopAsync(ct); + } + + private async Task AcceptLoopAsync(CancellationToken ct) + { + while (!ct.IsCancellationRequested) + { + var client = await _listener!.AcceptTcpClientAsync(ct); + var connectionId = GenerateConnectionId(client); + var connection = new TcpConnection(connectionId, client, this); + _connections[connectionId] = connection; + + OnConnection?.Invoke(connectionId); + _ = connection.ReadLoopAsync(ct); + } + } + + public async Task SendFrameAsync(string connectionId, Frame frame) + { + if (_connections.TryGetValue(connectionId, out var conn)) + { + await conn.WriteFrameAsync(frame); + } + } +} +``` + +## TcpConnection (internal) + +```csharp +internal sealed class TcpConnection : IAsyncDisposable +{ + private readonly TcpClient _client; + private readonly NetworkStream _stream; + private readonly SemaphoreSlim _writeLock = new(1, 1); + + public async Task ReadLoopAsync(CancellationToken ct) + { + var lengthBuffer = new byte[4]; + + while (!ct.IsCancellationRequested) + { + // Read length prefix + await ReadExactAsync(_stream, lengthBuffer, ct); + var length = BinaryPrimitives.ReadInt32BigEndian(lengthBuffer); + + // Read payload + var payload = new byte[length]; + await ReadExactAsync(_stream, payload, ct); + + // Parse frame + var frame = ParseFrame(payload); + _server.OnFrame?.Invoke(_connectionId, frame); + } + } + + public async Task WriteFrameAsync(Frame frame) + { + var payload = SerializeFrame(frame); + var lengthBytes = new byte[4]; + BinaryPrimitives.WriteInt32BigEndian(lengthBytes, payload.Length); + + await _writeLock.WaitAsync(); + try + { + await _stream.WriteAsync(lengthBytes); + await _stream.WriteAsync(payload); + } + finally + { + _writeLock.Release(); + } + } +} +``` + +## TcpTransportOptions + +```csharp +public sealed class TcpTransportOptions +{ + public IPAddress BindAddress { get; set; } = IPAddress.Any; + public int Port { get; set; } = 5100; + public int ReceiveBufferSize { get; set; } = 64 * 1024; + public int SendBufferSize { get; set; } = 64 * 1024; + public TimeSpan KeepAliveInterval { get; set; } = TimeSpan.FromSeconds(30); + public TimeSpan ConnectTimeout { get; set; } = TimeSpan.FromSeconds(10); + public int MaxReconnectAttempts { get; set; } = 10; + public TimeSpan MaxReconnectBackoff { get; set; } = TimeSpan.FromMinutes(1); +} +``` + +## Multiplexing + +One TCP connection carries multiple concurrent requests: +- Each request has unique CorrelationId +- Responses can arrive in any order +- `ConcurrentDictionary>` for pending requests + +```csharp +internal sealed class PendingRequestTracker +{ + private readonly ConcurrentDictionary> _pending = new(); + + public Task TrackRequest(Guid correlationId, CancellationToken ct) + { + var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + ct.Register(() => tcs.TrySetCanceled()); + _pending[correlationId] = tcs; + return tcs.Task; + } + + public void CompleteRequest(Guid correlationId, Frame response) + { + if (_pending.TryRemove(correlationId, out var tcs)) + { + tcs.TrySetResult(response); + } + } +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] TcpTransportServer accepts connections and reads frames +2. [ ] TcpTransportClient connects and sends frames +3. [ ] Length-prefixed framing works correctly +4. [ ] Multiplexing routes responses to correct callers +5. [ ] Reconnection with backoff works +6. [ ] Keep-alive detects dead connections +7. [ ] Integration tests pass +8. [ ] Load tests demonstrate concurrent request handling + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Big-endian length prefix for network byte order +- Maximum frame size: 16 MB (configurable) +- One socket per microservice instance (not per request) +- Write lock prevents interleaved frames +- No compression at transport level (consider adding later) diff --git a/docs/router/SPRINT_7000_0006_0002_transport_tls.md b/docs/router/SPRINT_7000_0006_0002_transport_tls.md new file mode 100644 index 000000000..35a06fe67 --- /dev/null +++ b/docs/router/SPRINT_7000_0006_0002_transport_tls.md @@ -0,0 +1,227 @@ +# Sprint 7000-0006-0002 · Real Transports · TLS/mTLS Plugin + +## Topic & Scope + +Implement the TLS transport plugin (Certificate transport). Wraps TCP with TLS encryption and supports optional mutual TLS (mTLS) for verifiable peer identity. + +**Goal:** Secure transport with certificate-based authentication. + +**Working directory:** `src/__Libraries/StellaOps.Router.Transport.Tls/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0006_0001 (TCP transport - this wraps it) +- **Downstream:** None. Parallel with UDP and RabbitMQ. +- **Parallel work:** Can run in parallel with UDP and RabbitMQ sprints. +- **Cross-module impact:** None. New library only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 5 - Certificate transport requirements) +- `docs/router/09-Step.md` (TLS transport section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | TLS-001 | TODO | Create `StellaOps.Router.Transport.Tls` classlib project | Add to solution | +| 2 | TLS-002 | TODO | Add project reference to Router.Common and Transport.Tcp | Wraps TCP | +| 3 | TLS-010 | TODO | Implement `TlsTransportServer` : `ITransportServer` | Gateway side | +| 4 | TLS-011 | TODO | Wrap TcpListener with SslStream | | +| 5 | TLS-012 | TODO | Configure server certificate | | +| 6 | TLS-013 | TODO | Implement optional client certificate validation (mTLS) | | +| 7 | TLS-020 | TODO | Implement `TlsTransportClient` : `ITransportClient` | Microservice side | +| 8 | TLS-021 | TODO | Wrap TcpClient with SslStream | | +| 9 | TLS-022 | TODO | Implement server certificate validation | | +| 10 | TLS-023 | TODO | Implement client certificate presentation (mTLS) | | +| 11 | TLS-030 | TODO | Create TlsTransportOptions | Certificates, validation mode | +| 12 | TLS-031 | TODO | Support PEM file paths | | +| 13 | TLS-032 | TODO | Support PFX file paths with password | | +| 14 | TLS-033 | TODO | Support X509Certificate2 objects | For programmatic use | +| 15 | TLS-040 | TODO | Implement certificate chain validation | | +| 16 | TLS-041 | TODO | Implement certificate revocation checking (optional) | | +| 17 | TLS-042 | TODO | Implement hostname verification | | +| 18 | TLS-050 | TODO | Create DI registration `AddTlsTransport()` | | +| 19 | TLS-051 | TODO | Support certificate hot-reload | For rotation | +| 20 | TLS-060 | TODO | Write integration tests with self-signed certs | | +| 21 | TLS-061 | TODO | Write tests for mTLS | | +| 22 | TLS-062 | TODO | Write tests for cert validation failures | | + +## TlsTransportOptions + +```csharp +public sealed class TlsTransportOptions +{ + // Server-side (Gateway) + public X509Certificate2? ServerCertificate { get; set; } + public string? ServerCertificatePath { get; set; } // PEM or PFX + public string? ServerCertificateKeyPath { get; set; } // PEM private key + public string? ServerCertificatePassword { get; set; } // For PFX + + // Client-side (Microservice) + public X509Certificate2? ClientCertificate { get; set; } + public string? ClientCertificatePath { get; set; } + public string? ClientCertificateKeyPath { get; set; } + public string? ClientCertificatePassword { get; set; } + + // Validation + public bool RequireClientCertificate { get; set; } = false; // mTLS + public bool AllowSelfSigned { get; set; } = false; // Dev only + public bool CheckCertificateRevocation { get; set; } = false; + public string? ExpectedServerHostname { get; set; } // For SNI + + // Protocol + public SslProtocols EnabledProtocols { get; set; } = SslProtocols.Tls12 | SslProtocols.Tls13; +} +``` + +## Server Implementation + +```csharp +public sealed class TlsTransportServer : ITransportServer +{ + public async Task StartAsync(CancellationToken ct) + { + _listener = new TcpListener(_tcpOptions.BindAddress, _tcpOptions.Port); + _listener.Start(); + + _ = AcceptLoopAsync(ct); + } + + private async Task AcceptLoopAsync(CancellationToken ct) + { + while (!ct.IsCancellationRequested) + { + var tcpClient = await _listener!.AcceptTcpClientAsync(ct); + + var sslStream = new SslStream( + tcpClient.GetStream(), + leaveInnerStreamOpen: false, + userCertificateValidationCallback: ValidateClientCertificate); + + try + { + await sslStream.AuthenticateAsServerAsync(new SslServerAuthenticationOptions + { + ServerCertificate = _options.ServerCertificate, + ClientCertificateRequired = _options.RequireClientCertificate, + EnabledSslProtocols = _options.EnabledProtocols, + CertificateRevocationCheckMode = _options.CheckCertificateRevocation + ? X509RevocationMode.Online + : X509RevocationMode.NoCheck + }, ct); + + // Connection authenticated, continue with frame reading + var connectionId = GenerateConnectionId(tcpClient, sslStream.RemoteCertificate); + var connection = new TlsConnection(connectionId, tcpClient, sslStream, this); + _connections[connectionId] = connection; + + OnConnection?.Invoke(connectionId); + _ = connection.ReadLoopAsync(ct); + } + catch (AuthenticationException ex) + { + _logger.LogWarning(ex, "TLS handshake failed from {RemoteEndpoint}", + tcpClient.Client.RemoteEndPoint); + tcpClient.Dispose(); + } + } + } + + private bool ValidateClientCertificate( + object sender, X509Certificate? certificate, + X509Chain? chain, SslPolicyErrors errors) + { + if (!_options.RequireClientCertificate && certificate == null) + return true; + + if (_options.AllowSelfSigned) + return true; + + return errors == SslPolicyErrors.None; + } +} +``` + +## Client Implementation + +```csharp +public sealed class TlsTransportClient : ITransportClient +{ + public async Task ConnectAsync(CancellationToken ct) + { + var tcpClient = new TcpClient(); + await tcpClient.ConnectAsync(_options.Host, _options.Port, ct); + + var sslStream = new SslStream( + tcpClient.GetStream(), + leaveInnerStreamOpen: false, + userCertificateValidationCallback: ValidateServerCertificate); + + await sslStream.AuthenticateAsClientAsync(new SslClientAuthenticationOptions + { + TargetHost = _options.ExpectedServerHostname ?? _options.Host, + ClientCertificates = _options.ClientCertificate != null + ? new X509CertificateCollection { _options.ClientCertificate } + : null, + EnabledSslProtocols = _options.EnabledProtocols, + CertificateRevocationCheckMode = _options.CheckCertificateRevocation + ? X509RevocationMode.Online + : X509RevocationMode.NoCheck + }, ct); + + // Connected and authenticated + _stream = sslStream; + _tcpClient = tcpClient; + } +} +``` + +## mTLS Identity Extraction + +With mTLS, the microservice identity can be verified from the client certificate: + +```csharp +internal string ExtractIdentityFromCertificate(X509Certificate2 cert) +{ + // Common patterns: + // 1. Common Name (CN) + var cn = cert.GetNameInfo(X509NameType.SimpleName, forIssuer: false); + + // 2. Subject Alternative Name (SAN) - DNS or URI + var san = cert.Extensions["2.5.29.17"]; // SAN OID + + // 3. Custom extension for service identity + // ... + + return cn; +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] TlsTransportServer accepts TLS connections +2. [ ] TlsTransportClient connects with TLS +3. [ ] Server and client certificate configuration works +4. [ ] mTLS (mutual TLS) works when enabled +5. [ ] Certificate validation works (chain, revocation, hostname) +6. [ ] AllowSelfSigned works for dev environments +7. [ ] Certificate hot-reload works +8. [ ] Integration tests pass + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- TLS 1.2 and 1.3 enabled by default (1.0/1.1 disabled) +- Certificate revocation checking is optional (can slow down) +- mTLS is optional (RequireClientCertificate = false by default) +- Identity extraction from cert is customizable +- Certificate hot-reload uses file system watcher diff --git a/docs/router/SPRINT_7000_0006_0003_transport_udp.md b/docs/router/SPRINT_7000_0006_0003_transport_udp.md new file mode 100644 index 000000000..59156138f --- /dev/null +++ b/docs/router/SPRINT_7000_0006_0003_transport_udp.md @@ -0,0 +1,221 @@ +# Sprint 7000-0006-0003 · Real Transports · UDP Plugin + +## Topic & Scope + +Implement the UDP transport plugin for small, bounded payloads. UDP provides low-latency communication for simple operations but cannot handle streaming or large payloads. + +**Goal:** Fast transport for small, idempotent operations. + +**Working directory:** `src/__Libraries/StellaOps.Router.Transport.Udp/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0006_0001 (TCP transport for reference patterns) +- **Downstream:** None. +- **Parallel work:** Can run in parallel with TLS and RabbitMQ sprints. +- **Cross-module impact:** None. New library only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 5 - UDP transport requirements) +- `docs/router/09-Step.md` (UDP transport section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | UDP-001 | TODO | Create `StellaOps.Router.Transport.Udp` classlib project | Add to solution | +| 2 | UDP-002 | TODO | Add project reference to Router.Common | | +| 3 | UDP-010 | TODO | Implement `UdpTransportServer` : `ITransportServer` | Gateway side | +| 4 | UDP-011 | TODO | Implement UDP socket listener | | +| 5 | UDP-012 | TODO | Implement datagram receive loop | | +| 6 | UDP-013 | TODO | Route received datagrams by source address | | +| 7 | UDP-020 | TODO | Implement `UdpTransportClient` : `ITransportClient` | Microservice side | +| 8 | UDP-021 | TODO | Implement UDP socket for sending | | +| 9 | UDP-022 | TODO | Implement receive for responses | | +| 10 | UDP-030 | TODO | Enforce MaxRequestBytesPerCall limit | Single datagram | +| 11 | UDP-031 | TODO | Reject oversized payloads | | +| 12 | UDP-032 | TODO | Set maximum datagram size from config | | +| 13 | UDP-040 | TODO | Implement request/response correlation | Per-datagram matching | +| 14 | UDP-041 | TODO | Track pending requests with timeout | | +| 15 | UDP-042 | TODO | Handle out-of-order responses | | +| 16 | UDP-050 | TODO | Implement HELLO via UDP | | +| 17 | UDP-051 | TODO | Implement HEARTBEAT via UDP | | +| 18 | UDP-052 | TODO | Implement REQUEST/RESPONSE via UDP | No streaming | +| 19 | UDP-060 | TODO | Disable streaming for UDP transport | | +| 20 | UDP-061 | TODO | Reject endpoints with SupportsStreaming | | +| 21 | UDP-062 | TODO | Log streaming attempts as errors | | +| 22 | UDP-070 | TODO | Create UdpTransportOptions | BindAddress, Port, MaxDatagramSize | +| 23 | UDP-071 | TODO | Create DI registration `AddUdpTransport()` | | +| 24 | UDP-080 | TODO | Write integration tests | | +| 25 | UDP-081 | TODO | Write tests for size limit enforcement | | + +## Constraints + +From specs.md: +> UDP transport: +> * MUST be used only for small/bounded payloads (no unbounded streaming). +> * MUST respect configured `MaxRequestBytesPerCall`. + +- **No streaming:** REQUEST_STREAM_DATA and RESPONSE_STREAM_DATA are not supported +- **Size limit:** Entire request must fit in one datagram +- **Best for:** Ping, health checks, small queries, commands + +## Datagram Format + +Single UDP datagram = single frame: +``` +┌─────────────────────────────────────────────────────────────┐ +│ FrameType (1 byte) │ CorrelationId (16 bytes) │ Data (N) │ +└─────────────────────────────────────────────────────────────┘ +``` + +Maximum datagram size: Typically 65,507 bytes (IPv4) but practical limit ~1400 for MTU safety. + +## UdpTransportServer + +```csharp +public sealed class UdpTransportServer : ITransportServer +{ + private UdpClient? _listener; + private readonly ConcurrentDictionary _endpointToConnectionId = new(); + + public async Task StartAsync(CancellationToken ct) + { + _listener = new UdpClient(_options.Port); + _ = ReceiveLoopAsync(ct); + } + + private async Task ReceiveLoopAsync(CancellationToken ct) + { + while (!ct.IsCancellationRequested) + { + var result = await _listener!.ReceiveAsync(ct); + var remoteEndpoint = result.RemoteEndPoint; + var data = result.Buffer; + + // Parse frame + var frame = ParseFrame(data); + + // Get or create connection ID for this endpoint + var connectionId = _endpointToConnectionId.GetOrAdd( + remoteEndpoint, + ep => $"udp-{ep}"); + + // Handle HELLO specially to register connection + if (frame.Type == FrameType.Hello) + { + OnConnection?.Invoke(connectionId); + } + + OnFrame?.Invoke(connectionId, frame); + } + } + + public async Task SendFrameAsync(string connectionId, Frame frame) + { + var endpoint = ResolveEndpoint(connectionId); + var data = SerializeFrame(frame); + + if (data.Length > _options.MaxDatagramSize) + throw new PayloadTooLargeException(data.Length, _options.MaxDatagramSize); + + await _listener!.SendAsync(data, data.Length, endpoint); + } +} +``` + +## UdpTransportClient + +```csharp +public sealed class UdpTransportClient : ITransportClient +{ + private UdpClient? _client; + private readonly ConcurrentDictionary> _pending = new(); + + public async Task ConnectAsync(string host, int port, CancellationToken ct) + { + _client = new UdpClient(); + _client.Connect(host, port); + _ = ReceiveLoopAsync(ct); + } + + public async Task SendRequestAsync( + ConnectionState connection, Frame request, + TimeSpan timeout, CancellationToken ct) + { + var data = SerializeFrame(request); + + if (data.Length > _options.MaxDatagramSize) + throw new PayloadTooLargeException(data.Length, _options.MaxDatagramSize); + + var tcs = new TaskCompletionSource(); + using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct); + cts.CancelAfter(timeout); + cts.Token.Register(() => tcs.TrySetCanceled()); + + _pending[request.CorrelationId] = tcs; + + await _client!.SendAsync(data, data.Length); + + return await tcs.Task; + } + + // Streaming not supported + public Task SendStreamingAsync(...) => throw new NotSupportedException( + "UDP transport does not support streaming. Use TCP or TLS transport."); +} +``` + +## UdpTransportOptions + +```csharp +public sealed class UdpTransportOptions +{ + public IPAddress BindAddress { get; set; } = IPAddress.Any; + public int Port { get; set; } = 5101; + public int MaxDatagramSize { get; set; } = 8192; // Conservative default + public TimeSpan DefaultTimeout { get; set; } = TimeSpan.FromSeconds(5); + public bool AllowBroadcast { get; set; } = false; +} +``` + +## Use Cases + +UDP is appropriate for: +- **Health checks:** Small, frequent, non-critical +- **Metrics collection:** Fire-and-forget updates +- **Cache invalidation:** Small notifications +- **DNS-like lookups:** Quick request/response + +UDP is NOT appropriate for: +- **File uploads/downloads:** Requires streaming +- **Large requests/responses:** Exceeds datagram limit +- **Critical operations:** No delivery guarantee +- **Ordered sequences:** Out-of-order possible + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] UdpTransportServer receives datagrams +2. [ ] UdpTransportClient sends and receives +3. [ ] Size limits enforced +4. [ ] Streaming disabled/rejected +5. [ ] Request/response correlation works +6. [ ] Integration tests pass + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Default max datagram: 8KB (well under MTU) +- No retry/reliability - UDP is fire-and-forget +- Connection is logical (based on source IP:port) +- Timeout is per-request, no keepalive needed +- CANCEL is sent but may not arrive (best effort) diff --git a/docs/router/SPRINT_7000_0006_0004_transport_rabbitmq.md b/docs/router/SPRINT_7000_0006_0004_transport_rabbitmq.md new file mode 100644 index 000000000..03561bca8 --- /dev/null +++ b/docs/router/SPRINT_7000_0006_0004_transport_rabbitmq.md @@ -0,0 +1,218 @@ +# Sprint 7000-0006-0004 · Real Transports · RabbitMQ Plugin + +## Topic & Scope + +Implement the RabbitMQ transport plugin. Uses message queue infrastructure for reliable asynchronous communication with built-in durability options. + +**Goal:** Reliable transport using existing message queue infrastructure. + +**Working directory:** `src/__Libraries/StellaOps.Router.Transport.RabbitMq/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0006_0001 (TCP transport for reference patterns) +- **Downstream:** None. +- **Parallel work:** Can run in parallel with TLS and UDP sprints. +- **Cross-module impact:** None. New library only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 5 - RabbitMQ transport requirements) +- `docs/router/09-Step.md` (RabbitMQ transport section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | RMQ-001 | TODO | Create `StellaOps.Router.Transport.RabbitMq` classlib project | Add to solution | +| 2 | RMQ-002 | TODO | Add project reference to Router.Common | | +| 3 | RMQ-003 | TODO | Add RabbitMQ.Client NuGet package | | +| 4 | RMQ-010 | TODO | Implement `RabbitMqTransportServer` : `ITransportServer` | Gateway side | +| 5 | RMQ-011 | TODO | Implement connection to RabbitMQ broker | | +| 6 | RMQ-012 | TODO | Create request queue per gateway node | | +| 7 | RMQ-013 | TODO | Create response exchange for routing | | +| 8 | RMQ-014 | TODO | Implement consumer for incoming frames | | +| 9 | RMQ-020 | TODO | Implement `RabbitMqTransportClient` : `ITransportClient` | Microservice side | +| 10 | RMQ-021 | TODO | Implement connection to RabbitMQ broker | | +| 11 | RMQ-022 | TODO | Create response queue per microservice instance | | +| 12 | RMQ-023 | TODO | Bind response queue to exchange | | +| 13 | RMQ-030 | TODO | Implement queue/exchange naming convention | | +| 14 | RMQ-031 | TODO | Format: `stella.router.{nodeId}.requests` | Gateway request queue | +| 15 | RMQ-032 | TODO | Format: `stella.router.responses` | Response exchange | +| 16 | RMQ-033 | TODO | Routing key: `{connectionId}` | For response routing | +| 17 | RMQ-040 | TODO | Use CorrelationId for request/response matching | BasicProperties | +| 18 | RMQ-041 | TODO | Set ReplyTo for response routing | | +| 19 | RMQ-042 | TODO | Implement pending request tracking | | +| 20 | RMQ-050 | TODO | Implement HELLO via RabbitMQ | | +| 21 | RMQ-051 | TODO | Implement HEARTBEAT via RabbitMQ | | +| 22 | RMQ-052 | TODO | Implement REQUEST/RESPONSE via RabbitMQ | | +| 23 | RMQ-053 | TODO | Implement CANCEL via RabbitMQ | | +| 24 | RMQ-060 | TODO | Implement streaming via RabbitMQ (optional) | Chunked messages | +| 25 | RMQ-061 | TODO | Consider at-most-once delivery semantics | | +| 26 | RMQ-070 | TODO | Create RabbitMqTransportOptions | Connection, queues, durability | +| 27 | RMQ-071 | TODO | Create DI registration `AddRabbitMqTransport()` | | +| 28 | RMQ-080 | TODO | Write integration tests with local RabbitMQ | | +| 29 | RMQ-081 | TODO | Write tests for connection recovery | | + +## Queue/Exchange Topology + +``` + ┌─────────────────────────┐ + Microservice ──────────►│ stella.router.requests │ + (HELLO, HEARTBEAT, │ (Direct Exchange) │ + RESPONSE) └───────────┬─────────────┘ + │ routing_key = nodeId + ▼ + ┌─────────────────────────┐ + │ stella.gw.{nodeId}.in │◄─── Gateway consumes + │ (Queue) │ + └─────────────────────────┘ + + Gateway ───────────────►┌─────────────────────────┐ + (REQUEST, CANCEL) │ stella.router.responses │ + │ (Topic Exchange) │ + └───────────┬─────────────┘ + │ routing_key = instanceId + ▼ + ┌─────────────────────────┐ + │ stella.svc.{instanceId} │◄─── Microservice consumes + │ (Queue) │ + └─────────────────────────┘ +``` + +## Message Properties + +```csharp +var properties = channel.CreateBasicProperties(); +properties.CorrelationId = correlationId.ToString(); +properties.ReplyTo = replyQueueName; +properties.Type = frameType.ToString(); +properties.Timestamp = new AmqpTimestamp(DateTimeOffset.UtcNow.ToUnixTimeSeconds()); +properties.Expiration = timeout.TotalMilliseconds.ToString(); +properties.DeliveryMode = 1; // Non-persistent (or 2 for persistent) +``` + +## RabbitMqTransportOptions + +```csharp +public sealed class RabbitMqTransportOptions +{ + // Connection + public string HostName { get; set; } = "localhost"; + public int Port { get; set; } = 5672; + public string VirtualHost { get; set; } = "/"; + public string UserName { get; set; } = "guest"; + public string Password { get; set; } = "guest"; + + // TLS + public bool UseSsl { get; set; } = false; + public string? SslCertPath { get; set; } + + // Queues + public bool DurableQueues { get; set; } = false; // For dev, true for prod + public bool AutoDeleteQueues { get; set; } = true; // Clean up on disconnect + public int PrefetchCount { get; set; } = 10; // Concurrent messages + + // Naming + public string ExchangePrefix { get; set; } = "stella.router"; + public string QueuePrefix { get; set; } = "stella"; +} +``` + +## RabbitMqTransportServer + +```csharp +public sealed class RabbitMqTransportServer : ITransportServer +{ + private IConnection? _connection; + private IModel? _channel; + private readonly string _requestQueueName; + + public async Task StartAsync(CancellationToken ct) + { + var factory = new ConnectionFactory + { + HostName = _options.HostName, + Port = _options.Port, + VirtualHost = _options.VirtualHost, + UserName = _options.UserName, + Password = _options.Password + }; + + _connection = factory.CreateConnection(); + _channel = _connection.CreateModel(); + + // Declare exchanges + _channel.ExchangeDeclare(_options.RequestExchange, ExchangeType.Direct, durable: true); + _channel.ExchangeDeclare(_options.ResponseExchange, ExchangeType.Topic, durable: true); + + // Declare and bind request queue + _requestQueueName = $"{_options.QueuePrefix}.gw.{_nodeId}.in"; + _channel.QueueDeclare(_requestQueueName, + durable: _options.DurableQueues, + exclusive: false, + autoDelete: _options.AutoDeleteQueues); + _channel.QueueBind(_requestQueueName, _options.RequestExchange, routingKey: _nodeId); + + // Start consuming + var consumer = new EventingBasicConsumer(_channel); + consumer.Received += OnMessageReceived; + _channel.BasicConsume(_requestQueueName, autoAck: true, consumer); + } + + private void OnMessageReceived(object? sender, BasicDeliverEventArgs e) + { + var frame = ParseFrame(e.Body.ToArray(), e.BasicProperties); + var connectionId = ExtractConnectionId(e.BasicProperties); + + if (frame.Type == FrameType.Hello) + { + OnConnection?.Invoke(connectionId); + } + + OnFrame?.Invoke(connectionId, frame); + } +} +``` + +## At-Most-Once Semantics + +From specs.md: +> * Guarantee at-most-once semantics where practical. + +This means: +- Auto-ack messages (no redelivery on failure) +- Non-durable queues/messages by default +- Idempotent handlers are caller's responsibility + +For at-least-once (if needed later): +- Manual ack after processing +- Durable queues and persistent messages +- Deduplication in handler + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] RabbitMqTransportServer connects and consumes +2. [ ] RabbitMqTransportClient publishes and consumes +3. [ ] Queue/exchange topology correct +4. [ ] CorrelationId matching works +5. [ ] HELLO/HEARTBEAT/REQUEST/RESPONSE flow works +6. [ ] Connection recovery works +7. [ ] Integration tests pass with local RabbitMQ + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Auto-delete queues by default (clean up on disconnect) +- Non-persistent messages by default (speed over durability) +- Prefetch count limits concurrent processing +- Connection recovery uses RabbitMQ.Client built-in recovery +- Streaming is optional (can chunk large messages) diff --git a/docs/router/SPRINT_7000_0007_0001_router_config.md b/docs/router/SPRINT_7000_0007_0001_router_config.md new file mode 100644 index 000000000..a24789f3a --- /dev/null +++ b/docs/router/SPRINT_7000_0007_0001_router_config.md @@ -0,0 +1,220 @@ +# Sprint 7000-0007-0001 · Configuration · Router Config Library + +## Topic & Scope + +Implement the Router.Config library with YAML configuration support and hot-reload. Provides centralized configuration for services, endpoints, static instances, and payload limits. + +**Goal:** Configuration-driven router behavior with runtime updates. + +**Working directory:** `src/__Libraries/StellaOps.Router.Config/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0006_* (all transports - config applies to transport selection) +- **Downstream:** SPRINT_7000_0007_0002 (microservice YAML) +- **Parallel work:** None. Sequential. +- **Cross-module impact:** Gateway consumes this library. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 11 - Configuration and YAML requirements) +- `docs/router/10-Step.md` (configuration section) +- `docs/router/implplan.md` (phase 10 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | CFG-001 | TODO | Implement `RouterConfig` root object | | +| 2 | CFG-002 | TODO | Implement `ServiceConfig` for service definitions | | +| 3 | CFG-003 | TODO | Implement `EndpointConfig` for endpoint definitions | | +| 4 | CFG-004 | TODO | Implement `StaticInstanceConfig` for known instances | | +| 5 | CFG-010 | TODO | Implement YAML configuration binding | YamlDotNet | +| 6 | CFG-011 | TODO | Implement JSON configuration binding | System.Text.Json | +| 7 | CFG-012 | TODO | Implement environment variable overrides | | +| 8 | CFG-013 | TODO | Support configuration layering (base + overrides) | | +| 9 | CFG-020 | TODO | Implement hot-reload via IOptionsMonitor | | +| 10 | CFG-021 | TODO | Implement file system watcher for YAML | | +| 11 | CFG-022 | TODO | Trigger routing state refresh on config change | | +| 12 | CFG-023 | TODO | Handle errors in reloaded config (keep previous) | | +| 13 | CFG-030 | TODO | Implement `IRouterConfigProvider` interface | | +| 14 | CFG-031 | TODO | Implement validation on load | Required fields, format | +| 15 | CFG-032 | TODO | Log configuration changes | | +| 16 | CFG-040 | TODO | Create DI registration `AddRouterConfig()` | | +| 17 | CFG-041 | TODO | Integrate with Gateway startup | | +| 18 | CFG-050 | TODO | Write sample router.yaml | | +| 19 | CFG-051 | TODO | Write unit tests for binding | | +| 20 | CFG-052 | TODO | Write tests for hot-reload | | + +## RouterConfig Structure + +```csharp +public sealed class RouterConfig +{ + public IList Services { get; init; } = new List(); + public IList StaticInstances { get; init; } = new List(); + public PayloadLimits PayloadLimits { get; init; } = new(); + public RoutingOptions Routing { get; init; } = new(); +} + +public sealed class ServiceConfig +{ + public string Name { get; init; } = string.Empty; + public string DefaultVersion { get; init; } = "1.0.0"; + public TransportType DefaultTransport { get; init; } = TransportType.Tcp; + public IList Endpoints { get; init; } = new List(); +} + +public sealed class EndpointConfig +{ + public string Method { get; init; } = "GET"; + public string Path { get; init; } = string.Empty; + public TimeSpan? DefaultTimeout { get; init; } + public IList RequiringClaims { get; init; } = new List(); + public bool? SupportsStreaming { get; init; } +} + +public sealed class StaticInstanceConfig +{ + public string ServiceName { get; init; } = string.Empty; + public string Version { get; init; } = string.Empty; + public string Region { get; init; } = string.Empty; + public string Host { get; init; } = string.Empty; + public int Port { get; init; } + public TransportType Transport { get; init; } +} +``` + +## Sample router.yaml + +```yaml +# Router configuration +payloadLimits: + maxRequestBytesPerCall: 10485760 # 10 MB + maxRequestBytesPerConnection: 104857600 + maxAggregateInflightBytes: 1073741824 + +routing: + neighborRegions: + - eu2 + - us1 + tieBreaker: roundRobin + +services: + - name: billing + defaultVersion: "1.0.0" + defaultTransport: tcp + endpoints: + - method: POST + path: /invoices + defaultTimeout: 30s + requiringClaims: + - type: role + value: billing-admin + - method: GET + path: /invoices/{id} + defaultTimeout: 5s + + - name: inventory + defaultVersion: "2.1.0" + defaultTransport: tls + endpoints: + - method: GET + path: /items + supportsStreaming: true + +# Optional: static instances (usually discovered via HELLO) +staticInstances: + - serviceName: billing + version: "1.0.0" + region: eu1 + host: billing-eu1-01.internal + port: 5100 + transport: tcp +``` + +## Hot-Reload Implementation + +```csharp +public sealed class RouterConfigProvider : IRouterConfigProvider, IDisposable +{ + private RouterConfig _current; + private readonly FileSystemWatcher? _watcher; + private readonly ILogger _logger; + + public RouterConfigProvider(IOptions options, ILogger logger) + { + _logger = logger; + _current = LoadConfig(options.Value.ConfigPath); + + if (options.Value.EnableHotReload) + { + _watcher = new FileSystemWatcher(Path.GetDirectoryName(options.Value.ConfigPath)!) + { + Filter = Path.GetFileName(options.Value.ConfigPath), + NotifyFilter = NotifyFilters.LastWrite + }; + _watcher.Changed += OnConfigFileChanged; + _watcher.EnableRaisingEvents = true; + } + } + + private void OnConfigFileChanged(object sender, FileSystemEventArgs e) + { + try + { + var newConfig = LoadConfig(e.FullPath); + ValidateConfig(newConfig); + + var previous = _current; + _current = newConfig; + + _logger.LogInformation("Router configuration reloaded successfully"); + ConfigurationChanged?.Invoke(this, new ConfigChangedEventArgs(previous, newConfig)); + } + catch (Exception ex) + { + _logger.LogError(ex, "Failed to reload configuration, keeping previous"); + } + } + + public RouterConfig Current => _current; + public event EventHandler? ConfigurationChanged; +} +``` + +## Configuration Precedence + +1. **Code defaults** (in Common library) +2. **YAML configuration** (router.yaml) +3. **JSON configuration** (appsettings.json) +4. **Environment variables** (STELLAOPS_ROUTER_*) +5. **Microservice HELLO** (dynamic registration) +6. **Authority overrides** (for RequiringClaims) + +Later sources override earlier ones. + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] RouterConfig binds from YAML correctly +2. [ ] JSON and environment variables also work +3. [ ] Hot-reload updates config without restart +4. [ ] Validation rejects invalid config +5. [ ] Sample router.yaml documents all options +6. [ ] DI integration works with Gateway + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- YamlDotNet for YAML parsing (mature, well-supported) +- File watcher has debounce to avoid multiple reloads +- Invalid hot-reload keeps previous config (fail-safe) +- Static instances are optional (most discover via HELLO) diff --git a/docs/router/SPRINT_7000_0007_0002_microservice_yaml.md b/docs/router/SPRINT_7000_0007_0002_microservice_yaml.md new file mode 100644 index 000000000..9b95d9770 --- /dev/null +++ b/docs/router/SPRINT_7000_0007_0002_microservice_yaml.md @@ -0,0 +1,213 @@ +# Sprint 7000-0007-0002 · Configuration · Microservice YAML Config + +## Topic & Scope + +Implement YAML configuration support for microservices. Allows endpoint-level overrides for timeouts, RequiringClaims, and streaming flags without code changes. + +**Goal:** Microservices can customize endpoint behavior via YAML without rebuilding. + +**Working directory:** `src/__Libraries/StellaOps.Microservice/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0007_0001 (Router.Config patterns) +- **Downstream:** SPRINT_7000_0008_0001 (Authority integration) +- **Parallel work:** None. Sequential. +- **Cross-module impact:** Microservice SDK only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (sections 7.3, 11 - Microservice config requirements) +- `docs/router/10-Step.md` (microservice YAML section) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | MCFG-001 | TODO | Create `MicroserviceEndpointConfig` class | | +| 2 | MCFG-002 | TODO | Create `MicroserviceYamlConfig` root object | | +| 3 | MCFG-010 | TODO | Implement YAML loading from ConfigFilePath | | +| 4 | MCFG-011 | TODO | Implement endpoint matching by (Method, Path) | | +| 5 | MCFG-012 | TODO | Implement override merge with code defaults | | +| 6 | MCFG-020 | TODO | Override DefaultTimeout per endpoint | | +| 7 | MCFG-021 | TODO | Override RequiringClaims per endpoint | | +| 8 | MCFG-022 | TODO | Override SupportsStreaming per endpoint | | +| 9 | MCFG-030 | TODO | Implement precedence: code → YAML | | +| 10 | MCFG-031 | TODO | Document that YAML cannot create endpoints (only modify) | | +| 11 | MCFG-032 | TODO | Warn on YAML entries that don't match code endpoints | | +| 12 | MCFG-040 | TODO | Integrate with endpoint discovery | | +| 13 | MCFG-041 | TODO | Apply overrides before HELLO construction | | +| 14 | MCFG-050 | TODO | Create sample microservice.yaml | | +| 15 | MCFG-051 | TODO | Write unit tests for merge logic | | +| 16 | MCFG-052 | TODO | Write tests for precedence | | + +## MicroserviceYamlConfig Structure + +```csharp +public sealed class MicroserviceYamlConfig +{ + public IList Endpoints { get; init; } = new List(); +} + +public sealed class EndpointOverrideConfig +{ + public string Method { get; init; } = string.Empty; + public string Path { get; init; } = string.Empty; + public TimeSpan? DefaultTimeout { get; init; } + public bool? SupportsStreaming { get; init; } + public IList? RequiringClaims { get; init; } +} +``` + +## Sample microservice.yaml + +```yaml +# Microservice endpoint overrides +# Note: Only modifies endpoints declared in code; cannot create new endpoints + +endpoints: + - method: POST + path: /invoices + defaultTimeout: 60s # Override code default of 30s + requiringClaims: + - type: role + value: invoice-creator + - type: department + value: finance + + - method: GET + path: /invoices/{id} + defaultTimeout: 10s + + - method: POST + path: /reports/generate + supportsStreaming: true # Enable streaming for large reports + defaultTimeout: 300s # 5 minutes for long-running reports +``` + +## Merge Logic + +```csharp +internal sealed class EndpointOverrideMerger +{ + public EndpointDescriptor Merge( + EndpointDescriptor codeDefault, + EndpointOverrideConfig? yamlOverride) + { + if (yamlOverride == null) + return codeDefault; + + return codeDefault with + { + DefaultTimeout = yamlOverride.DefaultTimeout ?? codeDefault.DefaultTimeout, + SupportsStreaming = yamlOverride.SupportsStreaming ?? codeDefault.SupportsStreaming, + RequiringClaims = yamlOverride.RequiringClaims?.Select(c => + new ClaimRequirement { Type = c.Type, Value = c.Value }).ToList() + ?? codeDefault.RequiringClaims + }; + } +} +``` + +## Precedence Rules + +From specs.md section 7.3: +> Precedence rules MUST be clearly defined and honored: +> * Service identity & router pool: from `StellaMicroserviceOptions` (not YAML). +> * Endpoint set: from code (attributes/source gen); YAML MAY override properties but ideally not create endpoints not present in code. +> * `RequiringClaims` and timeouts: YAML overrides defaults from code, unless overridden by central Authority. + +``` +┌─────────────────┐ +│ Code defaults │ [StellaEndpoint] attribute values +└────────┬────────┘ + │ YAML overrides (if present) + ▼ +┌─────────────────┐ +│ YAML config │ Endpoint-specific overrides +└────────┬────────┘ + │ Authority overrides (later sprint) + ▼ +┌─────────────────┐ +│ Effective │ Final values sent in HELLO +└─────────────────┘ +``` + +## Integration with Discovery + +```csharp +internal sealed class EndpointDiscoveryService +{ + private readonly IMicroserviceYamlLoader _yamlLoader; + private readonly EndpointOverrideMerger _merger; + + public IReadOnlyList DiscoverEndpoints() + { + // 1. Discover from code + var codeEndpoints = DiscoverFromReflection(); + + // 2. Load YAML overrides + var yamlConfig = _yamlLoader.Load(); + + // 3. Merge + return codeEndpoints.Select(ep => + { + var yamlOverride = yamlConfig?.Endpoints + .FirstOrDefault(y => y.Method == ep.Method && y.Path == ep.Path); + + if (yamlOverride == null) + return ep; + + return _merger.Merge(ep, yamlOverride); + }).ToList(); + } +} +``` + +## Warning on Unmatched YAML + +```csharp +private void WarnUnmatchedOverrides( + IEnumerable codeEndpoints, + MicroserviceYamlConfig? yamlConfig) +{ + if (yamlConfig == null) return; + + var codeKeys = codeEndpoints.Select(e => (e.Method, e.Path)).ToHashSet(); + + foreach (var yamlEntry in yamlConfig.Endpoints) + { + if (!codeKeys.Contains((yamlEntry.Method, yamlEntry.Path))) + { + _logger.LogWarning( + "YAML override for {Method} {Path} does not match any code endpoint", + yamlEntry.Method, yamlEntry.Path); + } + } +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] YAML loading works from ConfigFilePath +2. [ ] Merge applies YAML overrides to code defaults +3. [ ] Precedence is code → YAML +4. [ ] Unmatched YAML entries logged as warnings +5. [ ] Sample microservice.yaml documented +6. [ ] Unit tests for merge logic + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- YAML cannot create endpoints (only modify) per spec +- Missing YAML file is not an error (optional config) +- Hot-reload of microservice YAML is not supported (restart required) +- RequiringClaims in YAML fully replaces code defaults (not merged) diff --git a/docs/router/SPRINT_7000_0008_0001_authority_integration.md b/docs/router/SPRINT_7000_0008_0001_authority_integration.md new file mode 100644 index 000000000..22ff6241c --- /dev/null +++ b/docs/router/SPRINT_7000_0008_0001_authority_integration.md @@ -0,0 +1,204 @@ +# Sprint 7000-0008-0001 · Integration · Authority Claims Override + +## Topic & Scope + +Implement Authority integration for RequiringClaims overrides. The central Authority service can push endpoint authorization requirements that override microservice defaults. + +**Goal:** Centralized authorization policy that takes precedence over microservice-defined claims. + +**Working directories:** +- `src/Gateway/StellaOps.Gateway.WebService/` (apply overrides) +- `src/Authority/` (if Authority changes needed) + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0007_0002 (microservice YAML - establishes precedence) +- **Downstream:** SPRINT_7000_0008_0002 (source generator) +- **Parallel work:** Can run in parallel with source generator sprint. +- **Cross-module impact:** May require Authority module changes. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 9 - Authorization / requiringClaims / Authority requirements) +- `docs/modules/authority/architecture.md` (Authority module design) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Working Directory | +|---|---------|--------|-------------|-------------------| +| 1 | AUTH-001 | TODO | Define `IAuthorityClaimsProvider` interface | Common/Gateway | +| 2 | AUTH-002 | TODO | Define `ClaimsOverride` model | Common | +| 3 | AUTH-010 | TODO | Implement Gateway startup claims fetch | Gateway | +| 4 | AUTH-011 | TODO | Request overrides from Authority on startup | | +| 5 | AUTH-012 | TODO | Wait for Authority before handling traffic (configurable) | | +| 6 | AUTH-020 | TODO | Implement runtime claims update | Gateway | +| 7 | AUTH-021 | TODO | Periodically refresh from Authority | | +| 8 | AUTH-022 | TODO | Or subscribe to Authority push notifications | | +| 9 | AUTH-030 | TODO | Merge Authority overrides with microservice defaults | Gateway | +| 10 | AUTH-031 | TODO | Authority takes precedence over YAML and code | | +| 11 | AUTH-032 | TODO | Store effective RequiringClaims per endpoint | | +| 12 | AUTH-040 | TODO | Implement AuthorizationMiddleware with claims enforcement | Gateway | +| 13 | AUTH-041 | TODO | Check user principal has all required claims | | +| 14 | AUTH-042 | TODO | Return 403 Forbidden on claim failure | | +| 15 | AUTH-050 | TODO | Create configuration for Authority connection | Gateway | +| 16 | AUTH-051 | TODO | Handle Authority unavailable (use cached/defaults) | | +| 17 | AUTH-060 | TODO | Write integration tests for claims enforcement | | +| 18 | AUTH-061 | TODO | Write tests for Authority override precedence | | + +## IAuthorityClaimsProvider + +```csharp +public interface IAuthorityClaimsProvider +{ + Task>> GetOverridesAsync( + CancellationToken cancellationToken); + + event EventHandler? OverridesChanged; +} + +public readonly record struct EndpointKey(string ServiceName, string Method, string Path); + +public sealed class ClaimsOverrideChangedEventArgs : EventArgs +{ + public IReadOnlyDictionary> Overrides { get; init; } = new Dictionary>(); +} +``` + +## Final Precedence Chain + +``` +┌─────────────────────┐ +│ Code defaults │ [StellaEndpoint] RequiringClaims +└──────────┬──────────┘ + │ YAML overrides + ▼ +┌─────────────────────┐ +│ Microservice YAML │ Endpoint-specific claims +└──────────┬──────────┘ + │ Authority overrides (highest priority) + ▼ +┌─────────────────────┐ +│ Authority Policy │ Central claims requirements +└──────────┬──────────┘ + │ + ▼ +┌─────────────────────┐ +│ Effective Claims │ What Gateway enforces +└─────────────────────┘ +``` + +## AuthorizationMiddleware (Updated) + +```csharp +public class AuthorizationMiddleware +{ + public async Task InvokeAsync(HttpContext context, IEffectiveClaimsStore claimsStore) + { + var endpoint = (EndpointDescriptor)context.Items["ResolvedEndpoint"]!; + + // Get effective claims (already merged with Authority) + var effectiveClaims = claimsStore.GetEffectiveClaims( + endpoint.ServiceName, endpoint.Method, endpoint.Path); + + // Check each required claim + foreach (var required in effectiveClaims) + { + var userClaims = context.User.Claims; + + bool hasClaim = required.Value == null + ? userClaims.Any(c => c.Type == required.Type) + : userClaims.Any(c => c.Type == required.Type && c.Value == required.Value); + + if (!hasClaim) + { + _logger.LogWarning( + "Authorization failed: user lacks claim {ClaimType}={ClaimValue}", + required.Type, required.Value ?? "(any)"); + context.Response.StatusCode = 403; + await context.Response.WriteAsJsonAsync(new + { + error = "Forbidden", + requiredClaim = new { type = required.Type, value = required.Value } + }); + return; + } + } + + await _next(context); + } +} +``` + +## IEffectiveClaimsStore + +```csharp +public interface IEffectiveClaimsStore +{ + IReadOnlyList GetEffectiveClaims( + string serviceName, string method, string path); + + void UpdateFromMicroservice(string serviceName, IReadOnlyList endpoints); + void UpdateFromAuthority(IReadOnlyDictionary> overrides); +} + +internal sealed class EffectiveClaimsStore : IEffectiveClaimsStore +{ + private readonly ConcurrentDictionary> _microserviceClaims = new(); + private readonly ConcurrentDictionary> _authorityClaims = new(); + + public IReadOnlyList GetEffectiveClaims( + string serviceName, string method, string path) + { + var key = new EndpointKey(serviceName, method, path); + + // Authority takes precedence + if (_authorityClaims.TryGetValue(key, out var authorityClaims)) + return authorityClaims; + + // Fall back to microservice defaults + if (_microserviceClaims.TryGetValue(key, out var msClaims)) + return msClaims; + + return Array.Empty(); + } +} +``` + +## Authority Connection Options + +```csharp +public sealed class AuthorityConnectionOptions +{ + public string AuthorityUrl { get; set; } = string.Empty; + public bool WaitForAuthorityOnStartup { get; set; } = true; + public TimeSpan StartupTimeout { get; set; } = TimeSpan.FromSeconds(30); + public TimeSpan RefreshInterval { get; set; } = TimeSpan.FromMinutes(5); + public bool UseAuthorityPushNotifications { get; set; } = false; +} +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] IAuthorityClaimsProvider implemented +2. [ ] Gateway fetches overrides on startup +3. [ ] Authority overrides take precedence +4. [ ] AuthorizationMiddleware enforces effective claims +5. [ ] Graceful handling when Authority unavailable +6. [ ] Integration tests verify claims enforcement + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Authority overrides fully replace microservice claims (not merged) +- Startup can optionally wait for Authority (fail-safe mode proceeds without) +- Refresh interval is 5 minutes by default (tune for your environment) +- Authority push notifications optional (polling is default) +- This sprint assumes Authority module exists; coordinate with Authority team diff --git a/docs/router/SPRINT_7000_0008_0002_source_generator.md b/docs/router/SPRINT_7000_0008_0002_source_generator.md new file mode 100644 index 000000000..a801ab775 --- /dev/null +++ b/docs/router/SPRINT_7000_0008_0002_source_generator.md @@ -0,0 +1,231 @@ +# Sprint 7000-0008-0002 · Integration · Endpoint Source Generator + +## Topic & Scope + +Implement a Roslyn source generator for compile-time endpoint discovery. Generates endpoint metadata at build time, eliminating runtime reflection overhead. + +**Goal:** Faster startup and AOT compatibility via build-time endpoint discovery. + +**Working directory:** `src/__Libraries/StellaOps.Microservice.SourceGen/` + +## Dependencies & Concurrency + +- **Upstream:** SPRINT_7000_0003_0001 (SDK core with reflection-based discovery) +- **Downstream:** None. +- **Parallel work:** Can run in parallel with Authority integration. +- **Cross-module impact:** Microservice SDK consumes generated code. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 7.2 - Endpoint definition & discovery) +- Roslyn Source Generator documentation + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | GEN-001 | TODO | Convert project to source generator | Microsoft.CodeAnalysis.CSharp | +| 2 | GEN-002 | TODO | Implement `[StellaEndpoint]` attribute detection | Syntax receiver | +| 3 | GEN-003 | TODO | Extract Method, Path, and other attribute properties | | +| 4 | GEN-010 | TODO | Detect handler interface implementation | IStellaEndpoint, etc. | +| 5 | GEN-011 | TODO | Generate `EndpointDescriptor` instances | | +| 6 | GEN-012 | TODO | Generate `IGeneratedEndpointProvider` implementation | | +| 7 | GEN-020 | TODO | Generate registration code for DI | | +| 8 | GEN-021 | TODO | Generate handler factory methods | | +| 9 | GEN-030 | TODO | Implement incremental generation | For fast builds | +| 10 | GEN-031 | TODO | Cache compilation results | | +| 11 | GEN-040 | TODO | Add analyzer for invalid [StellaEndpoint] usage | Diagnostics | +| 12 | GEN-041 | TODO | Error on missing handler interface | | +| 13 | GEN-042 | TODO | Warning on duplicate Method+Path | | +| 14 | GEN-050 | TODO | Hook into SDK to prefer generated over reflection | | +| 15 | GEN-051 | TODO | Fall back to reflection if generation not available | | +| 16 | GEN-060 | TODO | Write unit tests for generator | | +| 17 | GEN-061 | TODO | Test generated code compiles and works | | +| 18 | GEN-062 | TODO | Test incremental generation | | + +## Source Generator Output + +Given this input: +```csharp +[StellaEndpoint("POST", "/invoices", DefaultTimeout = 30)] +public sealed class CreateInvoiceEndpoint : IStellaEndpoint +{ + public Task HandleAsync(CreateInvoiceRequest request, CancellationToken ct) => ...; +} +``` + +The generator produces: +```csharp +// +namespace StellaOps.Microservice.Generated +{ + [global::System.CodeDom.Compiler.GeneratedCode("StellaOps.Microservice.SourceGen", "1.0.0")] + internal static class StellaEndpoints + { + public static global::System.Collections.Generic.IReadOnlyList + GetEndpoints() + { + return new global::StellaOps.Router.Common.EndpointDescriptor[] + { + new global::StellaOps.Router.Common.EndpointDescriptor + { + Method = "POST", + Path = "/invoices", + DefaultTimeout = global::System.TimeSpan.FromSeconds(30), + SupportsStreaming = false, + RequiringClaims = global::System.Array.Empty(), + HandlerType = typeof(global::MyApp.CreateInvoiceEndpoint) + }, + // ... more endpoints + }; + } + + public static void RegisterHandlers( + global::Microsoft.Extensions.DependencyInjection.IServiceCollection services) + { + services.AddTransient(); + // ... more handlers + } + } +} +``` + +## Generator Implementation + +```csharp +[Generator] +public class StellaEndpointGenerator : IIncrementalGenerator +{ + public void Initialize(IncrementalGeneratorInitializationContext context) + { + // Find all classes with [StellaEndpoint] + var endpointClasses = context.SyntaxProvider + .ForAttributeWithMetadataName( + "StellaOps.Microservice.StellaEndpointAttribute", + predicate: static (node, _) => node is ClassDeclarationSyntax, + transform: static (ctx, _) => GetEndpointInfo(ctx)) + .Where(static info => info is not null); + + // Combine and generate + context.RegisterSourceOutput( + endpointClasses.Collect(), + static (spc, endpoints) => GenerateEndpointsClass(spc, endpoints!)); + } + + private static EndpointInfo? GetEndpointInfo(GeneratorAttributeSyntaxContext context) + { + var classSymbol = (INamedTypeSymbol)context.TargetSymbol; + var attribute = context.Attributes[0]; + + // Extract attribute parameters + var method = attribute.ConstructorArguments[0].Value as string; + var path = attribute.ConstructorArguments[1].Value as string; + + // Find timeout, streaming, etc. from named arguments + var timeout = attribute.NamedArguments + .FirstOrDefault(a => a.Key == "DefaultTimeout").Value.Value as int? ?? 30; + + // Verify handler interface + var implementsHandler = classSymbol.AllInterfaces + .Any(i => i.Name.StartsWith("IStellaEndpoint")); + + if (!implementsHandler) + { + // Report diagnostic + return null; + } + + return new EndpointInfo(classSymbol, method!, path!, timeout); + } +} +``` + +## IGeneratedEndpointProvider + +```csharp +public interface IGeneratedEndpointProvider +{ + IReadOnlyList GetEndpoints(); + void RegisterHandlers(IServiceCollection services); +} + +// Generated implementation +internal sealed class GeneratedEndpointProvider : IGeneratedEndpointProvider +{ + public IReadOnlyList GetEndpoints() + => StellaEndpoints.GetEndpoints(); + + public void RegisterHandlers(IServiceCollection services) + => StellaEndpoints.RegisterHandlers(services); +} +``` + +## SDK Integration + +```csharp +internal sealed class EndpointDiscoveryService +{ + public IReadOnlyList DiscoverEndpoints() + { + // Prefer generated + var generated = TryGetGeneratedProvider(); + if (generated != null) + { + _logger.LogDebug("Using source-generated endpoint discovery"); + return generated.GetEndpoints(); + } + + // Fall back to reflection + _logger.LogDebug("Using reflection-based endpoint discovery"); + return DiscoverFromReflection(); + } + + private IGeneratedEndpointProvider? TryGetGeneratedProvider() + { + // Look for generated type in entry assembly + var entryAssembly = Assembly.GetEntryAssembly(); + var providerType = entryAssembly?.GetType( + "StellaOps.Microservice.Generated.GeneratedEndpointProvider"); + + if (providerType != null) + return (IGeneratedEndpointProvider)Activator.CreateInstance(providerType)!; + + return null; + } +} +``` + +## Diagnostics + +| ID | Severity | Message | +|----|----------|---------| +| STELLA001 | Error | Class with [StellaEndpoint] must implement IStellaEndpoint<> or IRawStellaEndpoint | +| STELLA002 | Warning | Duplicate endpoint: {Method} {Path} | +| STELLA003 | Warning | [StellaEndpoint] on abstract class is ignored | +| STELLA004 | Info | Generated {N} endpoint descriptors | + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] Source generator detects [StellaEndpoint] classes +2. [ ] Generates EndpointDescriptor array +3. [ ] Generates DI registration +4. [ ] Incremental generation for fast builds +5. [ ] Analyzers report invalid usage +6. [ ] SDK prefers generated over reflection +7. [ ] All tests pass + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Incremental generation is essential for large projects +- Generated code uses fully qualified names to avoid conflicts +- Fallback to reflection ensures compatibility with older projects +- AOT scenarios require source generation (no reflection) diff --git a/docs/router/SPRINT_7000_0009_0001_reference_example.md b/docs/router/SPRINT_7000_0009_0001_reference_example.md new file mode 100644 index 000000000..69497cb99 --- /dev/null +++ b/docs/router/SPRINT_7000_0009_0001_reference_example.md @@ -0,0 +1,260 @@ +# Sprint 7000-0009-0001 · Examples · Reference Implementation + +## Topic & Scope + +Build a complete reference example demonstrating the router, gateway, and microservice SDK working together. Provides templates for common patterns and validates the entire system end-to-end. + +**Goal:** Working example that developers can copy and adapt. + +**Working directory:** `examples/router/` + +## Dependencies & Concurrency + +- **Upstream:** All feature sprints complete (7000-0001 through 7000-0008) +- **Downstream:** SPRINT_7000_0009_0002 (migration docs) +- **Parallel work:** Can run in parallel with migration docs. +- **Cross-module impact:** None. Examples only. + +## Documentation Prerequisites + +- `docs/router/specs.md` (complete specification) +- `docs/router/implplan.md` (phase 11 guidance) + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | EX-001 | TODO | Create `examples/router/` directory structure | | +| 2 | EX-002 | TODO | Create example solution `Examples.Router.sln` | | +| 3 | EX-010 | TODO | Create `Examples.Gateway` project | Full gateway setup | +| 4 | EX-011 | TODO | Configure gateway with all middleware | | +| 5 | EX-012 | TODO | Create example router.yaml | | +| 6 | EX-013 | TODO | Configure TCP and TLS transports | | +| 7 | EX-020 | TODO | Create `Examples.Billing.Microservice` project | | +| 8 | EX-021 | TODO | Implement simple GET/POST endpoints | | +| 9 | EX-022 | TODO | Implement streaming upload endpoint | IRawStellaEndpoint | +| 10 | EX-023 | TODO | Create example microservice.yaml | | +| 11 | EX-030 | TODO | Create `Examples.Inventory.Microservice` project | Second service | +| 12 | EX-031 | TODO | Demonstrate multi-service routing | | +| 13 | EX-040 | TODO | Create docker-compose.yaml | Local dev environment | +| 14 | EX-041 | TODO | Include RabbitMQ for transport option | | +| 15 | EX-042 | TODO | Include health monitoring | | +| 16 | EX-050 | TODO | Write README.md with run instructions | | +| 17 | EX-051 | TODO | Document adding new endpoints | | +| 18 | EX-052 | TODO | Document cancellation behavior | | +| 19 | EX-053 | TODO | Document payload limit testing | | +| 20 | EX-060 | TODO | Create integration test project | | +| 21 | EX-061 | TODO | Test full end-to-end flow | | + +## Directory Structure + +``` +examples/router/ +├── Examples.Router.sln +├── docker-compose.yaml +├── README.md +├── src/ +│ ├── Examples.Gateway/ +│ │ ├── Program.cs +│ │ ├── appsettings.json +│ │ └── router.yaml +│ ├── Examples.Billing.Microservice/ +│ │ ├── Program.cs +│ │ ├── appsettings.json +│ │ ├── microservice.yaml +│ │ └── Endpoints/ +│ │ ├── CreateInvoiceEndpoint.cs +│ │ ├── GetInvoiceEndpoint.cs +│ │ └── UploadAttachmentEndpoint.cs +│ └── Examples.Inventory.Microservice/ +│ ├── Program.cs +│ └── Endpoints/ +│ ├── ListItemsEndpoint.cs +│ └── GetItemEndpoint.cs +└── tests/ + └── Examples.Integration.Tests/ +``` + +## Example Gateway Program.cs + +```csharp +var builder = WebApplication.CreateBuilder(args); + +// Router configuration +builder.Services.AddRouterConfig(options => +{ + options.ConfigPath = "router.yaml"; + options.EnableHotReload = true; +}); + +// Gateway node configuration +builder.Services.Configure( + builder.Configuration.GetSection("GatewayNode")); + +// Transports +builder.Services.AddTcpTransport(options => +{ + options.Port = 5100; +}); +builder.Services.AddTlsTransport(options => +{ + options.Port = 5101; + options.ServerCertificatePath = "certs/gateway.pfx"; +}); + +// Routing +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); + +// Authority integration +builder.Services.AddAuthorityClaimsProvider(options => +{ + options.AuthorityUrl = builder.Configuration["Authority:Url"]; +}); + +var app = builder.Build(); + +// Middleware pipeline +app.UseForwardedHeaders(); +app.UseMiddleware(); +app.UseMiddleware(); +app.UseMiddleware(); +app.UseAuthentication(); +app.UseMiddleware(); +app.UseMiddleware(); +app.UseMiddleware(); +app.UseMiddleware(); + +app.Run(); +``` + +## Example Microservice Program.cs + +```csharp +var builder = Host.CreateApplicationBuilder(args); + +builder.Services.AddStellaMicroservice(options => +{ + options.ServiceName = "billing"; + options.Version = "1.0.0"; + options.Region = "eu1"; + options.InstanceId = $"billing-{Environment.MachineName}"; + options.ConfigFilePath = "microservice.yaml"; + options.Routers = new[] + { + new RouterEndpointConfig + { + Host = "gateway.local", + Port = 5100, + TransportType = TransportType.Tcp + } + }; +}); + +var host = builder.Build(); +await host.RunAsync(); +``` + +## Example Endpoints + +### Typed Endpoint +```csharp +[StellaEndpoint("POST", "/invoices", DefaultTimeout = 30)] +public sealed class CreateInvoiceEndpoint : IStellaEndpoint +{ + private readonly IInvoiceService _service; + + public CreateInvoiceEndpoint(IInvoiceService service) => _service = service; + + public async Task HandleAsync( + CreateInvoiceRequest request, + CancellationToken ct) + { + var invoice = await _service.CreateAsync(request, ct); + return new CreateInvoiceResponse { InvoiceId = invoice.Id }; + } +} +``` + +### Streaming Endpoint +```csharp +[StellaEndpoint("POST", "/invoices/{id}/attachments", SupportsStreaming = true)] +public sealed class UploadAttachmentEndpoint : IRawStellaEndpoint +{ + private readonly IStorageService _storage; + + public async Task HandleAsync(RawRequestContext context, CancellationToken ct) + { + var invoiceId = context.PathParameters["id"]; + + // Stream body directly to storage + var path = await _storage.StoreAsync(invoiceId, context.Body, ct); + + return RawResponse.Ok(JsonSerializer.Serialize(new { path })); + } +} +``` + +## docker-compose.yaml + +```yaml +version: '3.8' +services: + gateway: + build: ./src/Examples.Gateway + ports: + - "8080:8080" # HTTP ingress + - "5100:5100" # TCP transport + - "5101:5101" # TLS transport + environment: + - GatewayNode__Region=eu1 + - GatewayNode__NodeId=gw-01 + + billing: + build: ./src/Examples.Billing.Microservice + environment: + - Stella__Routers__0__Host=gateway + - Stella__Routers__0__Port=5100 + depends_on: + - gateway + + inventory: + build: ./src/Examples.Inventory.Microservice + environment: + - Stella__Routers__0__Host=gateway + - Stella__Routers__0__Port=5100 + depends_on: + - gateway + + rabbitmq: + image: rabbitmq:3-management + ports: + - "5672:5672" + - "15672:15672" +``` + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] All example projects build +2. [ ] docker-compose starts full environment +3. [ ] HTTP requests route through gateway to microservices +4. [ ] Streaming upload works +5. [ ] Multiple microservices register correctly +6. [ ] README documents all usage patterns +7. [ ] Integration tests pass + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Examples are separate solution from main StellaOps +- Uses Docker for easy local dev +- Includes both TCP and TLS examples +- RabbitMQ included for transport option demo diff --git a/docs/router/SPRINT_7000_0010_0001_migration.md b/docs/router/SPRINT_7000_0010_0001_migration.md new file mode 100644 index 000000000..6a65fc446 --- /dev/null +++ b/docs/router/SPRINT_7000_0010_0001_migration.md @@ -0,0 +1,267 @@ +# Sprint 7000-0010-0001 · Migration · WebService to Microservice + +## Topic & Scope + +Define and document the migration path from existing `StellaOps.*.WebService` projects to the new microservice pattern with router. This is the final sprint that connects the router infrastructure to the rest of StellaOps. + +**Goal:** Clear migration guide and tooling for converting WebServices to Microservices. + +**Working directories:** +- `docs/router/` (migration documentation) +- Potentially existing WebService projects (for pilot migration) + +## Dependencies & Concurrency + +- **Upstream:** All router sprints complete (7000-0001 through 7000-0009) +- **Downstream:** None. Final sprint. +- **Parallel work:** None. +- **Cross-module impact:** YES - This sprint affects existing StellaOps modules. + +## Documentation Prerequisites + +- `docs/router/specs.md` (section 14 - Migration requirements) +- `docs/router/implplan.md` (phase 11-12 guidance) +- Existing WebService project structures + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +## Delivery Tracker + +| # | Task ID | Status | Description | Notes | +|---|---------|--------|-------------|-------| +| 1 | MIG-001 | TODO | Inventory all existing WebService projects | List all services | +| 2 | MIG-002 | TODO | Document HTTP routes per service | Method + Path | +| 3 | MIG-010 | TODO | Document Strategy A: In-place adaptation | | +| 4 | MIG-011 | TODO | Add SDK to existing WebService | | +| 5 | MIG-012 | TODO | Wrap controllers in [StellaEndpoint] handlers | | +| 6 | MIG-013 | TODO | Register with router alongside HTTP | | +| 7 | MIG-014 | TODO | Gradual traffic shift from HTTP to router | | +| 8 | MIG-020 | TODO | Document Strategy B: Clean split | | +| 9 | MIG-021 | TODO | Extract domain logic to shared library | | +| 10 | MIG-022 | TODO | Create new Microservice project | | +| 11 | MIG-023 | TODO | Map routes to handlers | | +| 12 | MIG-024 | TODO | Phase out original WebService | | +| 13 | MIG-030 | TODO | Document CancellationToken wiring | | +| 14 | MIG-031 | TODO | Identify async operations needing token | | +| 15 | MIG-032 | TODO | Update DB calls, HTTP calls, etc. | | +| 16 | MIG-040 | TODO | Document streaming migration | | +| 17 | MIG-041 | TODO | Convert file upload controllers | | +| 18 | MIG-042 | TODO | Convert file download controllers | | +| 19 | MIG-050 | TODO | Create migration checklist template | | +| 20 | MIG-051 | TODO | Create automated route inventory tool | Optional | +| 21 | MIG-060 | TODO | Pilot migration: choose one WebService | | +| 22 | MIG-061 | TODO | Execute pilot migration | | +| 23 | MIG-062 | TODO | Document lessons learned | | +| 24 | MIG-070 | TODO | Merge Router.sln into StellaOps.sln | | +| 25 | MIG-071 | TODO | Update CI/CD for router components | | + +## Migration Strategies + +### Strategy A: In-Place Adaptation + +Best for: Services that need to maintain HTTP compatibility during transition. + +``` +┌─────────────────────────────────────┐ +│ StellaOps.Billing.WebService │ +│ ┌─────────────────────────────┐ │ +│ │ Existing HTTP Controllers │◄───┼──── HTTP clients (legacy) +│ └─────────────────────────────┘ │ +│ ┌─────────────────────────────┐ │ +│ │ [StellaEndpoint] Handlers │◄───┼──── Router (new) +│ └─────────────────────────────┘ │ +│ ┌─────────────────────────────┐ │ +│ │ Shared Domain Logic │ │ +│ └─────────────────────────────┘ │ +└─────────────────────────────────────┘ +``` + +Steps: +1. Add `StellaOps.Microservice` package reference +2. Create handler classes for each route +3. Handlers call existing service layer +4. Register with router pool +5. Test via router +6. Shift traffic gradually +7. Remove HTTP controllers when ready + +### Strategy B: Clean Split + +Best for: Major refactoring or when HTTP compatibility not needed. + +``` +┌─────────────────────────────────────┐ +│ StellaOps.Billing.Domain │ ◄── Shared library +│ (extracted business logic) │ +└─────────────────────────────────────┘ + ▲ ▲ + │ │ +┌─────────┴───────┐ ┌───────┴─────────┐ +│ (Legacy) │ │ (New) │ +│ Billing.Web │ │ Billing.Micro │ +│ Service │ │ service │ +│ HTTP only │ │ Router only │ +└─────────────────┘ └─────────────────┘ +``` + +Steps: +1. Extract domain logic to `.Domain` library +2. Create new `.Microservice` project +3. Implement handlers using domain library +4. Deploy alongside WebService +5. Shift traffic to router +6. Deprecate WebService + +## Controller to Handler Mapping + +### Before (ASP.NET Controller) +```csharp +[ApiController] +[Route("api/invoices")] +public class InvoicesController : ControllerBase +{ + private readonly IInvoiceService _service; + + [HttpPost] + [Authorize(Roles = "billing-admin")] + public async Task Create( + [FromBody] CreateInvoiceRequest request, + CancellationToken ct) // <-- Often missing! + { + var invoice = await _service.CreateAsync(request); + return Ok(new { invoice.Id }); + } +} +``` + +### After (Microservice Handler) +```csharp +[StellaEndpoint("POST", "/api/invoices")] +public sealed class CreateInvoiceEndpoint : IStellaEndpoint +{ + private readonly IInvoiceService _service; + + public CreateInvoiceEndpoint(IInvoiceService service) => _service = service; + + public async Task HandleAsync( + CreateInvoiceRequest request, + CancellationToken ct) // <-- Required, propagated + { + var invoice = await _service.CreateAsync(request, ct); // Pass token! + return new CreateInvoiceResponse { InvoiceId = invoice.Id }; + } +} +``` + +## CancellationToken Checklist + +For each migrated handler, verify: +- [ ] Handler accepts CancellationToken parameter +- [ ] Token passed to all database calls +- [ ] Token passed to all HTTP client calls +- [ ] Token passed to all file I/O operations +- [ ] Long-running loops check `ct.IsCancellationRequested` +- [ ] Token passed to Task.Delay, WaitAsync, etc. + +## Streaming Migration + +### File Upload (Before) +```csharp +[HttpPost("upload")] +public async Task Upload(IFormFile file) +{ + using var stream = file.OpenReadStream(); + await _storage.SaveAsync(stream); + return Ok(); +} +``` + +### File Upload (After) +```csharp +[StellaEndpoint("POST", "/upload", SupportsStreaming = true)] +public sealed class UploadEndpoint : IRawStellaEndpoint +{ + public async Task HandleAsync(RawRequestContext ctx, CancellationToken ct) + { + await _storage.SaveAsync(ctx.Body, ct); // Body is already a stream + return RawResponse.Ok(); + } +} +``` + +## Migration Checklist Template + +```markdown +# Migration Checklist: [ServiceName] + +## Inventory +- [ ] List all HTTP routes (Method + Path) +- [ ] Identify streaming endpoints +- [ ] Identify authorization requirements +- [ ] Document external dependencies + +## Preparation +- [ ] Add StellaOps.Microservice package +- [ ] Configure router connection +- [ ] Set up local gateway for testing + +## Per-Route Migration +For each route: +- [ ] Create [StellaEndpoint] handler class +- [ ] Map request/response types +- [ ] Wire CancellationToken throughout +- [ ] Convert to IRawStellaEndpoint if streaming +- [ ] Write unit tests +- [ ] Write integration tests + +## Cutover +- [ ] Deploy alongside existing WebService +- [ ] Verify via router routing +- [ ] Shift percentage of traffic +- [ ] Monitor for errors +- [ ] Full cutover +- [ ] Remove WebService HTTP listeners + +## Cleanup +- [ ] Remove unused controller code +- [ ] Remove HTTP pipeline configuration +- [ ] Update documentation +``` + +## StellaOps Modules to Migrate + +| Module | WebService | Priority | Complexity | +|--------|------------|----------|------------| +| Concelier | StellaOps.Concelier.WebService | High | Medium | +| Scanner | StellaOps.Scanner.WebService | High | High (streaming) | +| Authority | StellaOps.Authority.WebService | Medium | Low | +| Orchestrator | StellaOps.Orchestrator.WebService | Medium | Medium | +| Scheduler | StellaOps.Scheduler.WebService | Low | Low | +| Notify | StellaOps.Notify.WebService | Low | Low | + +## Exit Criteria + +Before marking this sprint DONE: +1. [ ] Migration strategies documented +2. [ ] Controller-to-handler mapping guide complete +3. [ ] CancellationToken checklist complete +4. [ ] Streaming migration guide complete +5. [ ] Migration checklist template created +6. [ ] Pilot migration executed successfully +7. [ ] Router.sln merged into StellaOps.sln +8. [ ] CI/CD updated + +## Execution Log + +| Date (UTC) | Update | Owner | +|------------|--------|-------| +| | | | + +## Decisions & Risks + +- Pilot migration should be a low-risk service first +- Strategy A preferred for gradual transition +- Strategy B preferred for greenfield-like rewrites +- CancellationToken wiring is the #1 source of migration bugs +- Streaming endpoints require IRawStellaEndpoint, not typed handlers +- Authorization migrates from [Authorize(Roles)] to RequiringClaims diff --git a/docs/router/SPRINT_INDEX.md b/docs/router/SPRINT_INDEX.md new file mode 100644 index 000000000..59ea1f8aa --- /dev/null +++ b/docs/router/SPRINT_INDEX.md @@ -0,0 +1,200 @@ +# Stella Ops Router - Sprint Index + +> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies. + +This document provides an overview of all sprints for implementing the StellaOps Router infrastructure. Sprints are organized for maximum agent independence while respecting dependencies. + +## Key Documents + +| Document | Purpose | +|----------|---------| +| [specs.md](./specs.md) | **Canonical specification** - READ FIRST | +| [implplan.md](./implplan.md) | High-level implementation plan | +| Step files (01-29) | Detailed task breakdowns per phase | + +## Sprint Epochs + +All router sprints use **Epoch 7000** to maintain isolation from existing StellaOps work. + +| Batch | Focus Area | Sprints | +|-------|------------|---------| +| 0001 | Foundation | Skeleton, Common library | +| 0002 | InMemory Transport | Prove the design before real transports | +| 0003 | Microservice SDK | Core infrastructure, request handling | +| 0004 | Gateway | Core, middleware, connection handling | +| 0005 | Protocol Features | Heartbeat, routing, cancellation, streaming, limits | +| 0006 | Real Transports | TCP, TLS, UDP, RabbitMQ | +| 0007 | Configuration | Router config, microservice YAML | +| 0008 | Integration | Authority, source generator | +| 0009 | Examples | Reference implementation | +| 0010 | Migration | WebService → Microservice | + +## Sprint Dependency Graph + +``` + ┌─────────────────────────────────────┐ + │ SPRINT_7000_0001_0001 │ + │ Router Skeleton │ + └───────────────┬─────────────────────┘ + │ + ┌───────────────▼─────────────────────┐ + │ SPRINT_7000_0001_0002 │ + │ Common Library Models │ + └───────────────┬─────────────────────┘ + │ + ┌───────────────▼─────────────────────┐ + │ SPRINT_7000_0002_0001 │ + │ InMemory Transport │ + └───────────────┬─────────────────────┘ + │ + ┌──────────────────────────┼──────────────────────────┐ + │ │ │ + ▼ │ ▼ +┌─────────────────────┐ │ ┌─────────────────────┐ +│ SPRINT_7000_0003_* │ │ │ SPRINT_7000_0004_* │ +│ Microservice SDK │ │ │ Gateway │ +│ (2 sprints) │◄────────────┼────────────►│ (3 sprints) │ +└─────────┬───────────┘ │ └─────────┬───────────┘ + │ │ │ + └─────────────────────────┼───────────────────────┘ + │ + ┌───────────────▼─────────────────────┐ + │ SPRINT_7000_0005_0001-0005 │ + │ Protocol Features (sequential) │ + │ Heartbeat → Routing → Cancel │ + │ → Streaming → Payload Limits │ + └───────────────┬─────────────────────┘ + │ + ┌──────────────────────────┼──────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ TCP Transport │ │ UDP Transport │ │ RabbitMQ │ +│ 7000_0006_0001 │ │ 7000_0006_0003 │ │ 7000_0006_0004 │ +└────────┬────────┘ └─────────────────┘ └─────────────────┘ + │ + ▼ +┌─────────────────┐ +│ TLS Transport │ +│ 7000_0006_0002 │ +└────────┬────────┘ + │ + └──────────────────────────┬──────────────────────────┘ + │ + ┌───────────────▼─────────────────────┐ + │ SPRINT_7000_0007_0001-0002 │ + │ Configuration (sequential) │ + └───────────────┬─────────────────────┘ + │ + ┌──────────────────────────┼──────────────────────────┐ + │ │ │ + ▼ │ ▼ +┌─────────────────────┐ │ ┌─────────────────────┐ +│ Authority Integration│ │ │ Source Generator │ +│ 7000_0008_0001 │◄────────────┼────────────►│ 7000_0008_0002 │ +└─────────────────────┘ │ └─────────────────────┘ + │ + ┌───────────────▼─────────────────────┐ + │ SPRINT_7000_0009_0001 │ + │ Reference Example │ + └───────────────┬─────────────────────┘ + │ + ┌───────────────▼─────────────────────┐ + │ SPRINT_7000_0010_0001 │ + │ Migration │ + │ (Connects to rest of StellaOps) │ + └─────────────────────────────────────┘ +``` + +## Parallel Execution Opportunities + +These sprints can run in parallel: + +| Phase | Parallel Track A | Parallel Track B | Parallel Track C | +|-------|------------------|------------------|------------------| +| After InMemory | SDK Core (0003_0001) | Gateway Core (0004_0001) | - | +| After Protocol | TCP (0006_0001) | UDP (0006_0003) | RabbitMQ (0006_0004) | +| After TCP | TLS (0006_0002) | (continues above) | (continues above) | +| After Config | Authority (0008_0001) | Source Gen (0008_0002) | - | + +## Sprint Status Overview + +| Sprint | Name | Status | Working Directory | +|--------|------|--------|-------------------| +| 7000-0001-0001 | Router Skeleton | TODO | Multiple (see sprint) | +| 7000-0001-0002 | Common Library | TODO | `src/__Libraries/StellaOps.Router.Common/` | +| 7000-0002-0001 | InMemory Transport | TODO | `src/__Libraries/StellaOps.Router.Transport.InMemory/` | +| 7000-0003-0001 | SDK Core | TODO | `src/__Libraries/StellaOps.Microservice/` | +| 7000-0003-0002 | SDK Handlers | TODO | `src/__Libraries/StellaOps.Microservice/` | +| 7000-0004-0001 | Gateway Core | TODO | `src/Gateway/StellaOps.Gateway.WebService/` | +| 7000-0004-0002 | Gateway Middleware | TODO | `src/Gateway/StellaOps.Gateway.WebService/` | +| 7000-0004-0003 | Gateway Connections | TODO | `src/Gateway/StellaOps.Gateway.WebService/` | +| 7000-0005-0001 | Heartbeat & Health | TODO | SDK + Gateway | +| 7000-0005-0002 | Routing Algorithm | TODO | `src/Gateway/StellaOps.Gateway.WebService/` | +| 7000-0005-0003 | Cancellation | TODO | SDK + Gateway | +| 7000-0005-0004 | Streaming | TODO | SDK + Gateway + InMemory | +| 7000-0005-0005 | Payload Limits | TODO | `src/Gateway/StellaOps.Gateway.WebService/` | +| 7000-0006-0001 | TCP Transport | TODO | `src/__Libraries/StellaOps.Router.Transport.Tcp/` | +| 7000-0006-0002 | TLS Transport | TODO | `src/__Libraries/StellaOps.Router.Transport.Tls/` | +| 7000-0006-0003 | UDP Transport | TODO | `src/__Libraries/StellaOps.Router.Transport.Udp/` | +| 7000-0006-0004 | RabbitMQ Transport | TODO | `src/__Libraries/StellaOps.Router.Transport.RabbitMq/` | +| 7000-0007-0001 | Router Config | TODO | `src/__Libraries/StellaOps.Router.Config/` | +| 7000-0007-0002 | Microservice YAML | TODO | `src/__Libraries/StellaOps.Microservice/` | +| 7000-0008-0001 | Authority Integration | TODO | Gateway + Authority | +| 7000-0008-0002 | Source Generator | TODO | `src/__Libraries/StellaOps.Microservice.SourceGen/` | +| 7000-0009-0001 | Reference Example | TODO | `examples/router/` | +| 7000-0010-0001 | Migration | TODO | Multiple (final integration) | + +## Critical Path + +The minimum path to a working router: + +1. **7000-0001-0001** → Skeleton +2. **7000-0001-0002** → Common models +3. **7000-0002-0001** → InMemory transport +4. **7000-0003-0001** → SDK core +5. **7000-0003-0002** → SDK handlers +6. **7000-0004-0001** → Gateway core +7. **7000-0004-0002** → Gateway middleware +8. **7000-0004-0003** → Gateway connections + +After these 8 sprints, you have a working router with InMemory transport for testing. + +## Isolation Strategy + +The router is developed in isolation using: + +1. **Separate solution file:** `StellaOps.Router.sln` +2. **Dedicated directories:** All router code in new directories +3. **No changes to existing modules:** Until migration sprint +4. **InMemory transport first:** No network dependencies during core development + +This ensures: +- Router development doesn't impact existing StellaOps builds +- Agents can work independently on router without merge conflicts +- Full testing possible without real infrastructure +- Migration is a conscious, controlled step + +## Agent Assignment Guidance + +For maximum parallelization: +- **Foundation Agent:** Sprints 7000-0001-0001, 7000-0001-0002 +- **SDK Agent:** Sprints 7000-0003-0001, 7000-0003-0002 +- **Gateway Agent:** Sprints 7000-0004-0001, 7000-0004-0002, 7000-0004-0003 +- **Transport Agent:** Sprints 7000-0002-0001, 7000-0006-* +- **Protocol Agent:** Sprints 7000-0005-* +- **Config Agent:** Sprints 7000-0007-* +- **Integration Agent:** Sprints 7000-0008-*, 7000-0010-0001 +- **Documentation Agent:** Sprint 7000-0009-0001 + +## Invariants (Never Violate) + +From `specs.md`, these are non-negotiable: +- **Method + Path** is the endpoint identity +- **Strict semver** for version matching +- **Region from GatewayNodeConfig.Region** (never from headers/host) +- **No HTTP transport** between gateway and microservices +- **RequiringClaims** (not AllowedRoles) for authorization +- **Opaque body handling** (router doesn't interpret payloads) + +Any change to these invariants requires updating `specs.md` first. diff --git a/scripts/add_blocked_reference.py b/scripts/add_blocked_reference.py new file mode 100644 index 000000000..337ded9cb --- /dev/null +++ b/scripts/add_blocked_reference.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +""" +Add BLOCKED dependency tree reference to all sprint files. +""" + +import os +import re +from pathlib import Path + +DOCS_DIR = Path(__file__).parent.parent / "docs" +IMPLPLAN_DIR = DOCS_DIR / "implplan" +ROUTER_DIR = DOCS_DIR / "router" + +# Reference lines with correct relative paths +REFERENCE_LINE_IMPLPLAN = "\n> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [BLOCKED_DEPENDENCY_TREE.md](./BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies.\n" +REFERENCE_LINE_ROUTER = "\n> **BLOCKED Tasks:** Before working on BLOCKED tasks, review [../implplan/BLOCKED_DEPENDENCY_TREE.md](../implplan/BLOCKED_DEPENDENCY_TREE.md) for root blockers and dependencies.\n" + +def add_reference_to_sprint(filepath: Path, reference_line: str) -> bool: + """Add BLOCKED reference to a sprint file. Returns True if modified.""" + content = filepath.read_text(encoding="utf-8") + + # Skip if reference already exists + if "BLOCKED_DEPENDENCY_TREE.md" in content: + return False + + # Find the best insertion point + # Priority 1: After "## Documentation Prerequisites" section (before next ##) + # Priority 2: After "## Dependencies & Concurrency" section + # Priority 3: After the first line (title) + + lines = content.split("\n") + insert_index = None + + # Look for Documentation Prerequisites section + for i, line in enumerate(lines): + if line.strip().startswith("## Documentation Prerequisites"): + # Find the next section header or end of list + for j in range(i + 1, len(lines)): + if lines[j].strip().startswith("## "): + insert_index = j + break + elif lines[j].strip() == "" and j + 1 < len(lines) and lines[j + 1].strip().startswith("## "): + insert_index = j + 1 + break + if insert_index is None: + # No next section found, insert after last non-empty line in prerequisites + for j in range(i + 1, len(lines)): + if lines[j].strip().startswith("## "): + insert_index = j + break + break + + # Fallback: after Dependencies & Concurrency + if insert_index is None: + for i, line in enumerate(lines): + if line.strip().startswith("## Dependencies"): + for j in range(i + 1, len(lines)): + if lines[j].strip().startswith("## "): + insert_index = j + break + break + + # Fallback: after first heading + if insert_index is None: + for i, line in enumerate(lines): + if line.strip().startswith("# "): + insert_index = i + 2 # After title and blank line + break + + # Final fallback: beginning of file + if insert_index is None: + insert_index = 1 + + # Insert the reference + new_lines = lines[:insert_index] + [reference_line.strip(), ""] + lines[insert_index:] + new_content = "\n".join(new_lines) + + filepath.write_text(new_content, encoding="utf-8") + return True + + +def main(): + modified = 0 + skipped = 0 + + # Process implplan directory + print("Processing docs/implplan...") + for filepath in sorted(IMPLPLAN_DIR.glob("SPRINT_*.md")): + if add_reference_to_sprint(filepath, REFERENCE_LINE_IMPLPLAN): + print(f"Modified: {filepath.name}") + modified += 1 + else: + print(f"Skipped: {filepath.name}") + skipped += 1 + + # Process router directory + print("\nProcessing docs/router...") + for filepath in sorted(ROUTER_DIR.glob("SPRINT_*.md")): + if add_reference_to_sprint(filepath, REFERENCE_LINE_ROUTER): + print(f"Modified: {filepath.name}") + modified += 1 + else: + print(f"Skipped: {filepath.name}") + skipped += 1 + + print(f"\nSummary: {modified} files modified, {skipped} files skipped") + + +if __name__ == "__main__": + main() diff --git a/src/Cli/StellaOps.Cli/Commands/CommandFactory.cs b/src/Cli/StellaOps.Cli/Commands/CommandFactory.cs index ddfc5d9ff..5283ce312 100644 --- a/src/Cli/StellaOps.Cli/Commands/CommandFactory.cs +++ b/src/Cli/StellaOps.Cli/Commands/CommandFactory.cs @@ -4,6 +4,7 @@ using System.Threading; using System.Threading.Tasks; using Microsoft.Extensions.Logging; using StellaOps.Cli.Configuration; +using StellaOps.Cli.Extensions; using StellaOps.Cli.Plugins; using StellaOps.Cli.Services.Models.AdvisoryAi; @@ -5182,13 +5183,13 @@ internal static class CommandFactory Description = "Image digests to test (can be specified multiple times).", AllowMultipleArgumentsPerToken = true }; - imagesOption.IsRequired = true; + imagesOption.Required = true; var scannerOption = new Option("--scanner", "-s") { Description = "Scanner container image reference." }; - scannerOption.IsRequired = true; + scannerOption.Required = true; var policyBundleOption = new Option("--policy-bundle") { @@ -5582,13 +5583,13 @@ internal static class CommandFactory { Description = "Start timestamp (ISO-8601). Required." }; - fromOption.IsRequired = true; + fromOption.Required = true; var toOption = new Option("--to") { Description = "End timestamp (ISO-8601). Required." }; - toOption.IsRequired = true; + toOption.Required = true; var logsTenantOption = new Option("--tenant", "-t") { @@ -6544,7 +6545,7 @@ internal static class CommandFactory var secretsInjectRefOption = new Option("--secret-ref") { Description = "Secret reference (provider-specific path).", - IsRequired = true + Required = true }; var secretsInjectProviderOption = new Option("--provider") @@ -6844,19 +6845,18 @@ internal static class CommandFactory return CommandHandlers.HandleExceptionsListAsync( services, + tenant, vuln, scopeType, scopeValue, statuses, owner, effect, - expiringDays, + expiringDays.HasValue ? DateTimeOffset.UtcNow.AddDays(expiringDays.Value) : null, includeExpired, pageSize, pageToken, - tenant, - json, - csv, + json || csv, verbose, cancellationToken); }); @@ -6977,7 +6977,8 @@ internal static class CommandFactory var effect = parseResult.GetValue(createEffectOption) ?? string.Empty; var justification = parseResult.GetValue(createJustificationOption) ?? string.Empty; var owner = parseResult.GetValue(createOwnerOption) ?? string.Empty; - var expiration = parseResult.GetValue(createExpirationOption); + var expirationStr = parseResult.GetValue(createExpirationOption); + var expiration = !string.IsNullOrWhiteSpace(expirationStr) && DateTimeOffset.TryParse(expirationStr, out var exp) ? exp : (DateTimeOffset?)null; var evidence = parseResult.GetValue(createEvidenceOption) ?? Array.Empty(); var policy = parseResult.GetValue(createPolicyOption); var stage = parseResult.GetValue(createStageOption); @@ -6987,17 +6988,17 @@ internal static class CommandFactory return CommandHandlers.HandleExceptionsCreateAsync( services, + tenant ?? string.Empty, vuln, scopeType, scopeValue, effect, justification, - owner, + owner ?? string.Empty, expiration, evidence, policy, stage, - tenant, json, verbose, cancellationToken); @@ -7042,9 +7043,9 @@ internal static class CommandFactory return CommandHandlers.HandleExceptionsPromoteAsync( services, exceptionId, - target, - comment, tenant, + target ?? "active", + comment, json, verbose, cancellationToken); @@ -7128,10 +7129,10 @@ internal static class CommandFactory return CommandHandlers.HandleExceptionsImportAsync( services, + tenant ?? string.Empty, file, stage, source, - tenant, json, verbose, cancellationToken); @@ -7184,11 +7185,13 @@ internal static class CommandFactory return CommandHandlers.HandleExceptionsExportAsync( services, - output, + tenant, statuses, format, + output, + false, // includeManifest signed, - tenant, + false, // json output verbose, cancellationToken); }); @@ -7470,13 +7473,13 @@ internal static class CommandFactory var backfillFromOption = new Option("--from") { Description = "Start date/time for backfill (ISO 8601 format).", - IsRequired = true + Required = true }; var backfillToOption = new Option("--to") { Description = "End date/time for backfill (ISO 8601 format).", - IsRequired = true + Required = true }; var backfillDryRunOption = new Option("--dry-run") @@ -7732,19 +7735,19 @@ internal static class CommandFactory var quotaSetTenantOption = new Option("--tenant") { Description = "Tenant ID.", - IsRequired = true + Required = true }; var quotaSetResourceTypeOption = new Option("--resource-type") { Description = "Resource type (api_calls, data_ingested_bytes, items_processed, backfills, concurrent_jobs, storage_bytes).", - IsRequired = true + Required = true }; var quotaSetLimitOption = new Option("--limit") { Description = "Quota limit value.", - IsRequired = true + Required = true }; var quotaSetPeriodOption = new Option("--period") @@ -7800,13 +7803,13 @@ internal static class CommandFactory var quotaResetTenantOption = new Option("--tenant") { Description = "Tenant ID.", - IsRequired = true + Required = true }; var quotaResetResourceTypeOption = new Option("--resource-type") { Description = "Resource type to reset.", - IsRequired = true + Required = true }; var quotaResetReasonOption = new Option("--reason") @@ -9547,7 +9550,7 @@ internal static class CommandFactory var outputOption = new Option("--output", "-o") { Description = "Output path for the downloaded spec (file or directory).", - IsRequired = true + Required = true }; var serviceOption = new Option("--service", "-s") diff --git a/src/Cli/StellaOps.Cli/Commands/CommandHandlers.cs b/src/Cli/StellaOps.Cli/Commands/CommandHandlers.cs index 2738e45c4..906ceab25 100644 --- a/src/Cli/StellaOps.Cli/Commands/CommandHandlers.cs +++ b/src/Cli/StellaOps.Cli/Commands/CommandHandlers.cs @@ -48,43 +48,72 @@ namespace StellaOps.Cli.Commands; internal static class CommandHandlers { private const string KmsPassphraseEnvironmentVariable = "STELLAOPS_KMS_PASSPHRASE"; - private static readonly JsonSerializerOptions KmsJsonOptions = new(JsonSerializerDefaults.Web) - { - WriteIndented = true - }; - - private static async Task VerifyBundleAsync(string path, ILogger logger, CancellationToken cancellationToken) - { - // Simple SHA256 check using sidecar .sha256 file if present; fail closed on mismatch. - var shaPath = path + ".sha256"; - if (!File.Exists(shaPath)) - { - logger.LogError("Checksum file missing for bundle {Bundle}. Expected sidecar {Sidecar}.", path, shaPath); - Environment.ExitCode = 21; - throw new InvalidOperationException("Checksum file missing"); - } - - var expected = (await File.ReadAllTextAsync(shaPath, cancellationToken).ConfigureAwait(false)).Trim(); - using var stream = File.OpenRead(path); - var hash = await SHA256.HashDataAsync(stream, cancellationToken).ConfigureAwait(false); - var actual = Convert.ToHexString(hash).ToLowerInvariant(); - - if (!string.Equals(expected, actual, StringComparison.OrdinalIgnoreCase)) - { - logger.LogError("Checksum mismatch for {Bundle}. Expected {Expected} but found {Actual}", path, expected, actual); - Environment.ExitCode = 22; - throw new InvalidOperationException("Checksum verification failed"); - } - - logger.LogInformation("Checksum verified for {Bundle}", path); - } + private static readonly JsonSerializerOptions KmsJsonOptions = new(JsonSerializerDefaults.Web) + { + WriteIndented = true + }; - public static async Task HandleScannerDownloadAsync( - IServiceProvider services, - string channel, - string? output, - bool overwrite, - bool install, + /// + /// Standard JSON serializer options for CLI output. + /// + private static readonly JsonSerializerOptions JsonOptions = new(JsonSerializerDefaults.Web) + { + WriteIndented = true, + PropertyNamingPolicy = JsonNamingPolicy.CamelCase + }; + + /// + /// JSON serializer options for output (alias for JsonOptions). + /// + private static readonly JsonSerializerOptions JsonOutputOptions = JsonOptions; + + /// + /// Sets the verbosity level for logging. + /// + private static void SetVerbosity(IServiceProvider services, bool verbose) + { + // Configure logging level based on verbose flag + var loggerFactory = services.GetService(); + if (loggerFactory is not null && verbose) + { + // Enable debug logging when verbose is true + var logger = loggerFactory.CreateLogger("StellaOps.Cli.Commands.CommandHandlers"); + logger.LogDebug("Verbose logging enabled"); + } + } + + private static async Task VerifyBundleAsync(string path, ILogger logger, CancellationToken cancellationToken) + { + // Simple SHA256 check using sidecar .sha256 file if present; fail closed on mismatch. + var shaPath = path + ".sha256"; + if (!File.Exists(shaPath)) + { + logger.LogError("Checksum file missing for bundle {Bundle}. Expected sidecar {Sidecar}.", path, shaPath); + Environment.ExitCode = 21; + throw new InvalidOperationException("Checksum file missing"); + } + + var expected = (await File.ReadAllTextAsync(shaPath, cancellationToken).ConfigureAwait(false)).Trim(); + using var stream = File.OpenRead(path); + var hash = await SHA256.HashDataAsync(stream, cancellationToken).ConfigureAwait(false); + var actual = Convert.ToHexString(hash).ToLowerInvariant(); + + if (!string.Equals(expected, actual, StringComparison.OrdinalIgnoreCase)) + { + logger.LogError("Checksum mismatch for {Bundle}. Expected {Expected} but found {Actual}", path, expected, actual); + Environment.ExitCode = 22; + throw new InvalidOperationException("Checksum verification failed"); + } + + logger.LogInformation("Checksum verified for {Bundle}", path); + } + + public static async Task HandleScannerDownloadAsync( + IServiceProvider services, + string channel, + string? output, + bool overwrite, + bool install, bool verbose, CancellationToken cancellationToken) { @@ -114,29 +143,29 @@ internal static class CommandHandlers CliMetrics.RecordScannerDownload(channel, result.FromCache); - if (install) - { - await VerifyBundleAsync(result.Path, logger, cancellationToken).ConfigureAwait(false); - - var installer = scope.ServiceProvider.GetRequiredService(); - await installer.InstallAsync(result.Path, verbose, cancellationToken).ConfigureAwait(false); - CliMetrics.RecordScannerInstall(channel); - } + if (install) + { + await VerifyBundleAsync(result.Path, logger, cancellationToken).ConfigureAwait(false); + + var installer = scope.ServiceProvider.GetRequiredService(); + await installer.InstallAsync(result.Path, verbose, cancellationToken).ConfigureAwait(false); + CliMetrics.RecordScannerInstall(channel); + } Environment.ExitCode = 0; - } - catch (Exception ex) - { - logger.LogError(ex, "Failed to download scanner bundle."); - if (Environment.ExitCode == 0) - { - Environment.ExitCode = 1; - } - } - finally - { - verbosity.MinimumLevel = previousLevel; - } + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to download scanner bundle."); + if (Environment.ExitCode == 0) + { + Environment.ExitCode = 1; + } + } + finally + { + verbosity.MinimumLevel = previousLevel; + } } public static async Task HandleTaskRunnerSimulateAsync( @@ -264,15 +293,15 @@ internal static class CommandHandlers { var console = AnsiConsole.Console; - console.MarkupLine($"[bold]Scan[/]: {result.ScanId}"); - console.MarkupLine($"Image: {result.ImageDigest}"); - console.MarkupLine($"Generated: {result.GeneratedAt:O}"); - console.MarkupLine($"Outcome: {result.Graph.Outcome}"); - - if (result.BestPlan is not null) - { - console.MarkupLine($"Best Terminal: {result.BestPlan.TerminalPath} (conf {result.BestPlan.Confidence:F1}, user {result.BestPlan.User}, cwd {result.BestPlan.WorkingDirectory})"); - } + console.MarkupLine($"[bold]Scan[/]: {result.ScanId}"); + console.MarkupLine($"Image: {result.ImageDigest}"); + console.MarkupLine($"Generated: {result.GeneratedAt:O}"); + console.MarkupLine($"Outcome: {result.Graph.Outcome}"); + + if (result.BestPlan is not null) + { + console.MarkupLine($"Best Terminal: {result.BestPlan.TerminalPath} (conf {result.BestPlan.Confidence:F1}, user {result.BestPlan.User}, cwd {result.BestPlan.WorkingDirectory})"); + } var planTable = new Table() .AddColumn("Terminal") @@ -284,15 +313,15 @@ internal static class CommandHandlers foreach (var plan in result.Graph.Plans.OrderByDescending(p => p.Confidence)) { - var confidence = plan.Confidence.ToString("F1", CultureInfo.InvariantCulture); - planTable.AddRow( - plan.TerminalPath, - plan.Runtime ?? "-", - plan.Type.ToString(), - confidence, - plan.User, - plan.WorkingDirectory); - } + var confidence = plan.Confidence.ToString("F1", CultureInfo.InvariantCulture); + planTable.AddRow( + plan.TerminalPath, + plan.Runtime ?? "-", + plan.Type.ToString(), + confidence, + plan.User, + plan.WorkingDirectory); + } if (planTable.Rows.Count > 0) { @@ -6860,7 +6889,7 @@ internal static class CommandHandlers } AnsiConsole.Write(violationTable); -} + } private static int DetermineVerifyExitCode(AocVerifyResponse response) { @@ -10895,13 +10924,10 @@ stella policy test {policyName}.stella Code = diag.Code, Message = diag.Message, Severity = diag.Severity.ToString().ToLowerInvariant(), - Line = diag.Line, - Column = diag.Column, - Span = diag.Span, - Suggestion = diag.Suggestion + Path = diag.Path }; - if (diag.Severity == PolicyDsl.DiagnosticSeverity.Error) + if (diag.Severity == PolicyIssueSeverity.Error) { errors.Add(diagnostic); } @@ -10939,7 +10965,7 @@ stella policy test {policyName}.stella InputPath = fullPath, IrPath = irPath, Digest = digest, - SyntaxVersion = compileResult.Document?.SyntaxVersion, + SyntaxVersion = compileResult.Document?.Syntax, PolicyName = compileResult.Document?.Name, RuleCount = compileResult.Document?.Rules.Length ?? 0, ProfileCount = compileResult.Document?.Profiles.Length ?? 0, @@ -10985,24 +11011,14 @@ stella policy test {policyName}.stella foreach (var err in errors) { - var location = err.Line.HasValue ? $":{err.Line}" : ""; - if (err.Column.HasValue) location += $":{err.Column}"; - AnsiConsole.MarkupLine($"[red]error[{Markup.Escape(err.Code)}]{location}: {Markup.Escape(err.Message)}[/]"); - if (!string.IsNullOrWhiteSpace(err.Suggestion)) - { - AnsiConsole.MarkupLine($" [cyan]suggestion: {Markup.Escape(err.Suggestion)}[/]"); - } + var location = !string.IsNullOrWhiteSpace(err.Path) ? $" at {err.Path}" : ""; + AnsiConsole.MarkupLine($"[red]error[{Markup.Escape(err.Code)}]{Markup.Escape(location)}: {Markup.Escape(err.Message)}[/]"); } foreach (var warn in warnings) { - var location = warn.Line.HasValue ? $":{warn.Line}" : ""; - if (warn.Column.HasValue) location += $":{warn.Column}"; - AnsiConsole.MarkupLine($"[yellow]warning[{Markup.Escape(warn.Code)}]{location}: {Markup.Escape(warn.Message)}[/]"); - if (!string.IsNullOrWhiteSpace(warn.Suggestion)) - { - AnsiConsole.MarkupLine($" [cyan]suggestion: {Markup.Escape(warn.Suggestion)}[/]"); - } + var location = !string.IsNullOrWhiteSpace(warn.Path) ? $" at {warn.Path}" : ""; + AnsiConsole.MarkupLine($"[yellow]warning[{Markup.Escape(warn.Code)}]{Markup.Escape(location)}: {Markup.Escape(warn.Message)}[/]"); } } @@ -13248,18 +13264,6 @@ stella policy test {policyName}.stella } } - private static string GetVexStatusMarkup(string status) - { - return status?.ToLowerInvariant() switch - { - "affected" => "[red]affected[/]", - "not_affected" => "[green]not_affected[/]", - "fixed" => "[blue]fixed[/]", - "under_investigation" => "[yellow]under_investigation[/]", - _ => Markup.Escape(status ?? "(unknown)") - }; - } - #endregion #region Vulnerability Explorer (CLI-VULN-29-001) @@ -14543,13 +14547,13 @@ stella policy test {policyName}.stella var fixText = obs.Fix?.Available == true ? "[green]available[/]" : "[grey]none[/]"; table.AddRow( - Markup.Escape(obs.ObservationId), - Markup.Escape(sourceVendor), - Markup.Escape(aliasesText), - Markup.Escape(severityText), + new Markup(Markup.Escape(obs.ObservationId)), + new Markup(Markup.Escape(sourceVendor)), + new Markup(Markup.Escape(aliasesText)), + new Markup(Markup.Escape(severityText)), new Markup(kevText), new Markup(fixText), - obs.CreatedAt.ToUniversalTime().ToString("u", CultureInfo.InvariantCulture)); + new Markup(Markup.Escape(obs.CreatedAt.ToUniversalTime().ToString("u", CultureInfo.InvariantCulture)))); } AnsiConsole.Write(table); @@ -15386,12 +15390,12 @@ stella policy test {policyName}.stella var size = FormatSize(snapshot.SizeBytes); table.AddRow( - Markup.Escape(snapshot.SnapshotId.Length > 20 ? snapshot.SnapshotId[..17] + "..." : snapshot.SnapshotId), - Markup.Escape(snapshot.CaseId), + new Markup(Markup.Escape(snapshot.SnapshotId.Length > 20 ? snapshot.SnapshotId[..17] + "..." : snapshot.SnapshotId)), + new Markup(Markup.Escape(snapshot.CaseId)), new Markup(statusMarkup), - artifactCount, - size, - snapshot.CreatedAt.ToUniversalTime().ToString("u", CultureInfo.InvariantCulture)); + new Markup(Markup.Escape(artifactCount)), + new Markup(Markup.Escape(size)), + new Markup(Markup.Escape(snapshot.CreatedAt.ToUniversalTime().ToString("u", CultureInfo.InvariantCulture)))); } AnsiConsole.Write(table); diff --git a/src/Cli/StellaOps.Cli/Configuration/CliProfile.cs b/src/Cli/StellaOps.Cli/Configuration/CliProfile.cs index 9347a4d05..2066afa7e 100644 --- a/src/Cli/StellaOps.Cli/Configuration/CliProfile.cs +++ b/src/Cli/StellaOps.Cli/Configuration/CliProfile.cs @@ -12,7 +12,7 @@ namespace StellaOps.Cli.Configuration; /// CLI profile for storing named configurations. /// Per CLI-CORE-41-001, supports profiles/contexts for multi-environment workflows. /// -public sealed class CliProfile +public sealed record CliProfile { /// /// Profile name (e.g., "prod", "staging", "dev"). diff --git a/src/Cli/StellaOps.Cli/Configuration/GlobalOptions.cs b/src/Cli/StellaOps.Cli/Configuration/GlobalOptions.cs index 29ab2cbd4..a885d86fc 100644 --- a/src/Cli/StellaOps.Cli/Configuration/GlobalOptions.cs +++ b/src/Cli/StellaOps.Cli/Configuration/GlobalOptions.cs @@ -54,87 +54,45 @@ public sealed class GlobalOptions /// public static IEnumerable