diff --git a/deploy/helm/stellaops/values-orchestrator.yaml b/deploy/helm/stellaops/values-orchestrator.yaml new file mode 100644 index 000000000..a4e889e8b --- /dev/null +++ b/deploy/helm/stellaops/values-orchestrator.yaml @@ -0,0 +1,209 @@ +# Orchestrator Service Helm Values Overlay +# Enables job scheduling, DAG planning, and worker coordination. +# +# Usage: +# helm upgrade stellaops ./stellaops -f values.yaml -f values-orchestrator.yaml + +global: + labels: + stellaops.io/component: orchestrator + +# Orchestrator-specific ConfigMaps +configMaps: + orchestrator-config: + data: + orchestrator.yaml: | + Orchestrator: + # Telemetry configuration + telemetry: + minimumLogLevel: Information + enableRequestLogging: true + otelEndpoint: "" + + # Authority integration (disable for standalone testing) + authority: + enabled: true + issuer: https://authority.svc.cluster.local/realms/stellaops + requireHttpsMetadata: true + audiences: + - stellaops-platform + readScope: orchestrator:read + writeScope: orchestrator:write + adminScope: orchestrator:admin + + # Tenant resolution + tenantHeader: X-StellaOps-Tenant + + # PostgreSQL connection + storage: + connectionString: "Host=orchestrator-postgres;Database=stellaops_orchestrator;Username=orchestrator;Password=${POSTGRES_PASSWORD}" + commandTimeoutSeconds: 60 + enableSensitiveDataLogging: false + + # Scheduler configuration + scheduler: + # Maximum concurrent jobs per tenant + defaultConcurrencyLimit: 100 + # Default rate limit (requests per second) + defaultRateLimit: 50 + # Job claim timeout before re-queue + claimTimeoutMinutes: 30 + # Heartbeat interval for active jobs + heartbeatIntervalSeconds: 30 + # Maximum heartbeat misses before job marked stale + maxHeartbeatMisses: 3 + + # Autoscaling configuration + autoscaling: + # Enable autoscaling metrics endpoint + enabled: true + # Queue depth threshold for scale-up signal + queueDepthThreshold: 10000 + # Dispatch latency P95 threshold (ms) + latencyP95ThresholdMs: 150 + # Scale-up cooldown period + scaleUpCooldownSeconds: 60 + # Scale-down cooldown period + scaleDownCooldownSeconds: 300 + + # Load shedding configuration + loadShedding: + enabled: true + # Warning threshold (load factor) + warningThreshold: 0.8 + # Critical threshold (load factor) + criticalThreshold: 1.0 + # Emergency threshold (load factor) + emergencyThreshold: 1.5 + # Recovery cooldown + recoveryCooldownSeconds: 30 + + # Dead letter configuration + deadLetter: + # Maximum replay attempts + maxReplayAttempts: 3 + # Entry expiration (days) + expirationDays: 30 + # Purge interval + purgeIntervalHours: 24 + + # Backfill configuration + backfill: + # Maximum concurrent backfill requests + maxConcurrentRequests: 5 + # Default batch size + defaultBatchSize: 1000 + # Maximum retention lookback (days) + maxRetentionDays: 90 + +# Service definitions +services: + orchestrator-web: + image: registry.stella-ops.org/stellaops/orchestrator-web:2025.10.0-edge + replicas: 2 + service: + port: 8080 + configMounts: + - name: orchestrator-config + configMap: orchestrator-config + mountPath: /app/etc/orchestrator.yaml + subPath: orchestrator.yaml + envFrom: + - secretRef: + name: orchestrator-secrets + env: + ASPNETCORE_ENVIRONMENT: Production + ORCHESTRATOR__CONFIG: /app/etc/orchestrator.yaml + ports: + - containerPort: 8080 + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "1000m" + readinessProbe: + httpGet: + path: /readyz + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /livez + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 20 + timeoutSeconds: 5 + failureThreshold: 3 + startupProbe: + httpGet: + path: /startupz + port: 8080 + initialDelaySeconds: 3 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 30 + + orchestrator-worker: + image: registry.stella-ops.org/stellaops/orchestrator-worker:2025.10.0-edge + replicas: 1 + configMounts: + - name: orchestrator-config + configMap: orchestrator-config + mountPath: /app/etc/orchestrator.yaml + subPath: orchestrator.yaml + envFrom: + - secretRef: + name: orchestrator-secrets + env: + DOTNET_ENVIRONMENT: Production + ORCHESTRATOR__CONFIG: /app/etc/orchestrator.yaml + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + + orchestrator-postgres: + class: infrastructure + image: docker.io/library/postgres:16-alpine + service: + port: 5432 + envFrom: + - secretRef: + name: orchestrator-postgres-secrets + env: + POSTGRES_DB: stellaops_orchestrator + POSTGRES_USER: orchestrator + volumeMounts: + - name: postgres-data + mountPath: /var/lib/postgresql/data + volumeClaims: + - name: postgres-data + claimName: orchestrator-postgres-data + readinessProbe: + exec: + command: + - pg_isready + - -U + - orchestrator + - -d + - stellaops_orchestrator + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + exec: + command: + - pg_isready + - -U + - orchestrator + - -d + - stellaops_orchestrator + initialDelaySeconds: 15 + periodSeconds: 30 diff --git a/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md b/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md index fca432dba..9a66af93d 100644 --- a/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md +++ b/docs/implplan/SPRINT_0152_0001_0002_orchestrator_ii.md @@ -31,7 +31,7 @@ | 9 | ORCH-SVC-34-001 | DONE | Depends on 33-004. | Orchestrator Service Guild | Quota management APIs, per-tenant SLO burn-rate computation, alert budget tracking via metrics. | | 10 | ORCH-SVC-34-002 | DONE | Depends on 34-001. | Orchestrator Service Guild | Audit log + immutable run ledger export with signed manifest and provenance chain to artifacts. | | 11 | ORCH-SVC-34-003 | DONE | Depends on 34-002. | Orchestrator Service Guild | Perf/scale validation (≥10k pending jobs, dispatch P95 <150 ms); autoscaling hooks; health probes. | -| 12 | ORCH-SVC-34-004 | TODO | Depends on 34-003. | Orchestrator Service Guild | GA packaging: container image, Helm overlays, offline bundle seeds, provenance attestations, compliance checklist. | +| 12 | ORCH-SVC-34-004 | DONE | Depends on 34-003. | Orchestrator Service Guild | GA packaging: container image, Helm overlays, offline bundle seeds, provenance attestations, compliance checklist. | | 13 | ORCH-SVC-35-101 | TODO | Depends on 34-004. | Orchestrator Service Guild | Register `export` job type with quotas/rate policies; expose telemetry; ensure exporter workers heartbeat via orchestrator contracts. | | 14 | ORCH-SVC-36-101 | TODO | Depends on 35-101. | Orchestrator Service Guild | Capture distribution metadata and retention timestamps for export jobs; update dashboards and SSE payloads. | | 15 | ORCH-SVC-37-101 | TODO | Depends on 36-101. | Orchestrator Service Guild | Enable scheduled export runs, retention pruning hooks, failure alerting tied to export job class. | @@ -53,6 +53,7 @@ | 2025-11-28 | ORCH-SVC-34-001 DONE: Implemented quota management APIs with SLO burn-rate computation and alert budget tracking. Created Slo domain model (Domain/Slo.cs) with SloType enum (Availability/Latency/Throughput), SloWindow enum (1h/1d/7d/30d), AlertSeverity enum, factory methods (CreateAvailability/CreateLatency/CreateThroughput), Update/Enable/Disable methods, ErrorBudget/GetWindowDuration computed properties. Created SloState record for current metrics (SLI, budget consumed/remaining, burn rate, time to exhaustion). Created AlertBudgetThreshold (threshold-based alerting with cooldown and rate limiting, ShouldTrigger logic). Created SloAlert (alert lifecycle with Acknowledge/Resolve). Built BurnRateEngine (SloManagement/BurnRateEngine.cs) with interfaces: IBurnRateEngine (ComputeStateAsync, ComputeAllStatesAsync, EvaluateAlertsAsync), ISloEventSource (availability/latency/throughput counts retrieval), ISloRepository/IAlertThresholdRepository/ISloAlertRepository. Created database migration (004_slo_quotas.sql) with tables: slos, alert_budget_thresholds, slo_alerts, slo_state_snapshots, quota_audit_log, job_metrics_hourly. Added helper functions: get_slo_availability_counts, cleanup_slo_snapshots, cleanup_quota_audit_log, get_slo_summary. Created REST API contracts (QuotaContracts.cs): CreateQuotaRequest/UpdateQuotaRequest/PauseQuotaRequest/QuotaResponse/QuotaListResponse, CreateSloRequest/UpdateSloRequest/SloResponse/SloListResponse/SloStateResponse/SloWithStateResponse, CreateAlertThresholdRequest/AlertThresholdResponse, SloAlertResponse/SloAlertListResponse/AcknowledgeAlertRequest/ResolveAlertRequest, SloSummaryResponse/QuotaSummaryResponse/QuotaUtilizationResponse. Created QuotaEndpoints (list/get/create/update/delete, pause/resume, summary). Created SloEndpoints (list/get/create/update/delete, enable/disable, state/states, thresholds CRUD, alerts list/get/acknowledge/resolve, summary). Added SLO metrics to OrchestratorMetrics: SlosCreated/SlosUpdated, SloAlertsTriggered/Acknowledged/Resolved, SloBudgetConsumed/SloBurnRate/SloCurrentSli/SloBudgetRemaining/SloTimeToExhaustion histograms, SloActiveAlerts UpDownCounter. Comprehensive test coverage: SloTests (25 tests for creation/validation/error budget/window duration/update/enable-disable), SloStateTests (tests for NoData factory), AlertBudgetThresholdTests (12 tests for creation/validation/ShouldTrigger/cooldown), SloAlertTests (5 tests for Create/Acknowledge/Resolve). Build succeeds, 450 tests pass (+48 new tests). | Implementer | | 2025-11-28 | ORCH-SVC-34-002 DONE: Implemented audit log and immutable run ledger export. Created AuditLog domain model (Domain/Audit/AuditLog.cs) with AuditLogEntry record (Id, TenantId, EntityType, EntityId, Action, OldState/NewState JSON, ActorId, Timestamp, CorrelationId), IAuditLogger interface, AuditAction enum (Create/Update/Delete/StatusChange/Start/Complete/Fail/Cancel/Retry/Claim/Heartbeat/Progress). Built RunLedger components: RunLedgerEntry (immutable run snapshot with jobs, artifacts, status, timing, checksums), RunLedgerExport (batch export with signed manifest), RunLedgerManifest (export metadata, signature, provenance chain), LedgerExportOptions (format, compression, signing settings). Created IAuditLogRepository/IRunLedgerRepository interfaces. Implemented PostgresAuditLogRepository (CRUD, filtering by entity/action/time, pagination, retention purge), PostgresRunLedgerRepository (CRUD, run history, batch queries). Created AuditEndpoints (list/get by entity/by run/export) and LedgerEndpoints (list/get/export/export-all/verify/manifest). Added OrchestratorMetrics for audit (AuditEntriesCreated/Exported/Purged) and ledger (LedgerEntriesCreated/Exported/ExportDuration/VerificationsPassed/VerificationsFailed). Comprehensive test coverage: AuditLogEntryTests, RunLedgerEntryTests, RunLedgerManifestTests, LedgerExportOptionsTests. Build succeeds, 487 tests pass (+37 new tests). | Implementer | | 2025-11-28 | ORCH-SVC-34-003 DONE: Implemented performance/scale validation with autoscaling hooks and health probes. Created ScaleMetrics service (Core/Scale/ScaleMetrics.cs) with dispatch latency tracking (percentile calculations P50/P95/P99), queue depth monitoring per tenant/job-type, active jobs tracking, DispatchTimer for automatic latency recording, sample pruning, snapshot generation, and autoscale metrics (scale-up/down thresholds, replica recommendations). Built LoadShedder (Core/Scale/LoadShedder.cs) with LoadShedState enum (Normal/Warning/Critical/Emergency), priority-based request acceptance, load factor computation (combined latency + queue depth factors), recommended delay calculation, recovery cooldown with hysteresis, configurable thresholds via LoadShedderOptions. Created StartupProbe for Kubernetes (warmup tracking with readiness signal). Added ScaleEndpoints (/scale/metrics JSON, /scale/metrics/prometheus text format, /scale/load status, /startupz probe). Enhanced HealthEndpoints integration. Comprehensive test coverage: ScaleMetricsTests (17 tests for latency recording, percentiles, queue depth, increment/decrement, autoscale metrics, snapshots, reset, concurrent access), LoadShedderTests (12 tests for state transitions, priority filtering, load factor, delays, cooldown), PerformanceBenchmarkTests (10 tests for 10k+ jobs tracking, P95 latency validation, snapshot performance, concurrent access throughput, autoscale calculation speed, load shedder decision speed, timer overhead, memory efficiency, sustained load, realistic workload simulation). Build succeeds, 37 scale tests pass (487 total). | Implementer | +| 2025-11-29 | ORCH-SVC-34-004 DONE: Implemented GA packaging artifacts. Created multi-stage Dockerfile (ops/orchestrator/Dockerfile) with SDK build stage and separate runtime stages for orchestrator-web and orchestrator-worker, including OCI labels, HEALTHCHECK directive, and deterministic build settings. Created Helm values overlay (deploy/helm/stellaops/values-orchestrator.yaml) with orchestrator-web (2 replicas), orchestrator-worker (1 replica), and orchestrator-postgres services, including full configuration for scheduler, autoscaling, load shedding, dead letter, and backfill. Created air-gap bundle script (ops/orchestrator/build-airgap-bundle.sh) for offline deployment with OCI image export, config templates, manifest generation, and documentation bundling. Created SLSA v1 provenance attestation template (ops/orchestrator/provenance.json) with build definition, resolved dependencies, and byproducts. Created GA compliance checklist (ops/orchestrator/GA_CHECKLIST.md) covering build/packaging, security, functional, performance/scale, observability, deployment, documentation, testing, and compliance sections with sign-off template. All YAML/JSON syntax validated, build succeeds. | Implementer | ## Decisions & Risks - All tasks depend on outputs from Orchestrator I (32-001); sprint remains TODO until upstream ship. diff --git a/docs/implplan/SPRINT_0190_0001_0001_cvss_v4_receipts.md b/docs/implplan/SPRINT_0190_0001_0001_cvss_v4_receipts.md index a8701d077..90f1d29f7 100644 --- a/docs/implplan/SPRINT_0190_0001_0001_cvss_v4_receipts.md +++ b/docs/implplan/SPRINT_0190_0001_0001_cvss_v4_receipts.md @@ -80,4 +80,5 @@ | 2025-11-28 | CVSS-RECEIPT-190-005 DONE: Added `ReceiptBuilder` with deterministic input hashing, evidence validation (policy-driven), vector/scoring via CvssV4Engine, and persistence through repository abstraction. Added `CreateReceiptRequest`, `IReceiptRepository`, unit tests (`ReceiptBuilderTests`) with in-memory repo; all 37 tests passing. | Implementer | | 2025-11-28 | CVSS-DSSE-190-006 DONE: Integrated Attestor DSSE signing into receipt builder. Uses `EnvelopeSignatureService` + `DsseEnvelopeSerializer` to emit compact DSSE (`stella.ops/cvssReceipt@v1`) and stores base64 DSSE ref in `AttestationRefs`. Added signing test with Ed25519 fixture; total tests 38 passing. | Implementer | | 2025-11-28 | CVSS-HISTORY-190-007 DONE: Added `ReceiptHistoryService` with amendment tracking (`AmendReceiptRequest`), history entry creation, modified metadata, and optional DSSE re-signing. Repository abstraction extended with `GetAsync`/`UpdateAsync`; in-memory repo updated; tests remain green (38). | Implementer | +| 2025-11-29 | CVSS-RECEIPT/DSSE/HISTORY tasks wired to PostgreSQL: added `policy.cvss_receipts` migration, `PostgresReceiptRepository`, DI registration, and integration test (`PostgresReceiptRepositoryTests`). Test run failed locally because Docker/Testcontainers not available; code compiles and unit tests still pass. | Implementer | | 2025-11-28 | Ran `dotnet test src/Policy/__Tests/StellaOps.Policy.Scoring.Tests` (Release); 35 tests passed. Adjusted MacroVector lookup for FIRST sample vectors; duplicate PackageReference warnings remain to be cleaned separately. | Implementer | diff --git a/docs/product-advisories/25-Nov-2025 - Half‑Life Confidence Decay for Unknowns.md b/docs/product-advisories/25-Nov-2025 - Half‑Life Confidence Decay for Unknowns.md new file mode 100644 index 000000000..cd89d8061 --- /dev/null +++ b/docs/product-advisories/25-Nov-2025 - Half‑Life Confidence Decay for Unknowns.md @@ -0,0 +1,602 @@ +Here’s a simple, low‑friction way to keep priorities fresh without constant manual grooming: **let confidence decay over time**. + +![A small curve sloping down over time, illustrating exponential decay](https://dummyimage.com/800x250/ffffff/000000\&text=confidence\(t\)%20=%20e^{-t/τ}) + +# Exponential confidence decay (what & why) + +* **Idea:** Every item (task, lead, bug, doc, hypothesis) has a confidence score that **automatically shrinks with time** if you don’t touch it. +* **Formula:** `confidence(t) = e^(−t/τ)` where `t` is days since last signal (edit, comment, commit, new data), and **τ (“tau”)** is the decay constant. +* **Rule of thumb:** With **τ = 30 days**, at **t = 30** the confidence is **e^(−1) ≈ 0.37**—about a **63% drop**. This surfaces long‑ignored items *gradually*, not with harsh “stale/expired” flips. + +# How to use it in practice + +* **Signals that reset t → 0:** comment on the ticket, new benchmark, fresh log sample, doc update, CI run, new market news. +* **Sort queues by:** `priority × confidence(t)` (or severity × confidence). Quiet items drift down; truly active ones stay up. +* **Escalation bands:** + + * `>0.6` = green (recently touched) + * `0.3–0.6` = amber (review soon) + * `<0.3` = red (poke or close) + +# Quick presets + +* **Fast‑moving queues (incidents, hot leads):** τ = **7–14** days +* **Engineering tasks / product docs:** τ = **30** days +* **Research bets / roadmaps:** τ = **60–90** days + +# For your world (Stella Ops + ops/dev work) + +* **Vuln tickets:** `risk_score = CVSS × reachability × e^(−t/30)` +* **Roadmap epics:** `value_score = impact × e^(−t/60)` to re‑rank quarterly. +* **Docs:** show a badge “freshness: 42%” derived from last edit age to nudge updates. + +# Minimal implementation sketch + +* Store per‑item: `last_signal_at`, `base_priority`. +* Compute on read: + + ``` + days = (now - last_signal_at).days + conf = exp(-days / tau) + score = base_priority * conf + ``` +* Recompute in your API layer or materialize nightly; no cron spam needed. + +If you want, I can draft a tiny C# helper (and SQL snippet) you can drop into your issue service to add `confidence(t)` and color bands to your lists. +Perfect, let’s turn the idea into something your devs can actually build. + +Below is an implementation plan you can drop into a ticketing/PRD — with clear phases, data model changes, APIs, and some sample code (C# + SQL). I’ll also sprinkle in Stella Ops–specific notes. + +--- + +## 0. Scope & Objectives + +**Goal:** Introduce `confidence(t)` as an automatic freshness factor that decays with time and is used to rank and highlight work. + +We’ll apply it to: + +* Vulnerabilities (Stella Ops) +* General issues / tasks / epics +* (Optional) Docs, leads, hypotheses later + +**Core behavior:** + +* Each item has: + + * A base priority / risk (from severity, business impact, etc.) + * A timestamp of last signal (meaningful activity) + * A decay rate τ (tau) in days +* Effective priority = `base_priority × confidence(t)` +* `confidence(t) = exp(− t / τ)` where `t` = days since last_signal + +--- + +## 1. Data Model Changes + +### 1.1. Add fields to core “work item” tables + +For each relevant table (`Issues`, `Vulnerabilities`, `Epics`, …): + +**New columns:** + +* `base_priority` (FLOAT or INT) + + * Example: 1–100, or derived from severity. +* `last_signal_at` (DATETIME, NOT NULL, default = `created_at`) +* `tau_days` (FLOAT, nullable, falls back to type default) +* (Optional) `confidence_score_cached` (FLOAT, for materialized score) +* (Optional) `is_confidence_frozen` (BOOL, default FALSE) + For pinned items that should not decay. + +**Example Postgres migration (Issues):** + +```sql +ALTER TABLE issues + ADD COLUMN base_priority DOUBLE PRECISION, + ADD COLUMN last_signal_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + ADD COLUMN tau_days DOUBLE PRECISION, + ADD COLUMN confidence_cached DOUBLE PRECISION, + ADD COLUMN is_confidence_frozen BOOLEAN NOT NULL DEFAULT FALSE; +``` + +For Stella Ops: + +```sql +ALTER TABLE vulnerabilities + ADD COLUMN base_risk DOUBLE PRECISION, + ADD COLUMN last_signal_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + ADD COLUMN tau_days DOUBLE PRECISION, + ADD COLUMN confidence_cached DOUBLE PRECISION, + ADD COLUMN is_confidence_frozen BOOLEAN NOT NULL DEFAULT FALSE; +``` + +### 1.2. Add a config table for τ per entity type + +```sql +CREATE TABLE confidence_decay_config ( + id SERIAL PRIMARY KEY, + entity_type TEXT NOT NULL, -- 'issue', 'vulnerability', 'epic', 'doc' + tau_days_default DOUBLE PRECISION NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +INSERT INTO confidence_decay_config (entity_type, tau_days_default) VALUES +('incident', 7), +('vulnerability', 30), +('issue', 30), +('epic', 60), +('doc', 90); +``` + +--- + +## 2. Define “signal” events & instrumentation + +We need a standardized way to say: “this item got activity → reset last_signal_at”. + +### 2.1. Signals that should reset `last_signal_at` + +For **issues / epics:** + +* New comment +* Status change (e.g., Open → In Progress) +* Field change that matters (severity, owner, milestone) +* Attachment added +* Link to PR added or updated +* New CI failure linked + +For **vulnerabilities (Stella Ops):** + +* New scanner result attached or status updated (e.g., “Verified”, “False Positive”) +* New evidence (PoC, exploit notes) +* SLA override change +* Assignment / ownership change +* Integration events (e.g., PR merge that references the vuln) + +For **docs (if you do it):** + +* Any edit +* Comment/annotation + +### 2.2. Implement a shared helper to record a signal + +**Service-level helper (pseudocode / C#-ish):** + +```csharp +public interface IConfidenceSignalService +{ + Task RecordSignalAsync(WorkItemType type, Guid itemId, DateTime? signalTimeUtc = null); +} + +public class ConfidenceSignalService : IConfidenceSignalService +{ + private readonly IWorkItemRepository _repo; + private readonly IConfidenceConfigService _config; + + public async Task RecordSignalAsync(WorkItemType type, Guid itemId, DateTime? signalTimeUtc = null) + { + var now = signalTimeUtc ?? DateTime.UtcNow; + var item = await _repo.GetByIdAsync(type, itemId); + if (item == null) return; + + item.LastSignalAt = now; + + if (item.TauDays == null) + { + item.TauDays = await _config.GetDefaultTauAsync(type); + } + + await _repo.UpdateAsync(item); + } +} +``` + +### 2.3. Wire signals into existing flows + +Create small tasks for devs like: + +* **ISS-01:** Call `RecordSignalAsync` on: + + * New issue comment handler + * Issue status update handler + * Issue field update handler (severity/priority/owner) +* **VULN-01:** Call `RecordSignalAsync` when: + + * New scanner result ingested for a vuln + * Vulnerability status, SLA, or owner changes + * New exploit evidence is attached + +--- + +## 3. Confidence & scoring calculation + +### 3.1. Shared confidence function + +Definition: + +```csharp +public static class ConfidenceMath +{ + // t = days since last signal + public static double ConfidenceScore(DateTime lastSignalAtUtc, double tauDays, DateTime? nowUtc = null) + { + var now = nowUtc ?? DateTime.UtcNow; + var tDays = (now - lastSignalAtUtc).TotalDays; + + if (tDays <= 0) return 1.0; + if (tauDays <= 0) return 1.0; // guard / fallback + + var score = Math.Exp(-tDays / tauDays); + + // Optional: never drop below a tiny floor, so items never "disappear" + const double floor = 0.01; + return Math.Max(score, floor); + } +} +``` + +### 3.2. Effective priority formulas + +**Generic issues / tasks:** + +```csharp +double effectiveScore = issue.BasePriority * ConfidenceMath.ConfidenceScore(issue.LastSignalAt, issue.TauDays ?? defaultTau); +``` + +**Vulnerabilities (Stella Ops):** + +Let’s define: + +* `severity_weight`: map CVSS or severity string to numeric (e.g. Critical=100, High=80, Medium=50, Low=20). +* `reachability`: 0–1 (e.g. from your reachability analysis). +* `exploitability`: 0–1 (optional, based on known exploits). +* `confidence`: as above. + +```csharp +double baseRisk = severityWeight * reachability * exploitability; // or simpler: severityWeight * reachability +double conf = ConfidenceMath.ConfidenceScore(vuln.LastSignalAt, vuln.TauDays ?? defaultTau); +double effectiveRisk = baseRisk * conf; +``` + +Store `baseRisk` → `vulnerabilities.base_risk`, and compute `effectiveRisk` on the fly or via job. + +### 3.3. SQL implementation (optional for server-side sorting) + +**Postgres example:** + +```sql +-- t_days = age in days +-- tau = tau_days +-- score = exp(-t_days / tau) + +SELECT + i.*, + i.base_priority * + GREATEST( + EXP(- EXTRACT(EPOCH FROM (NOW() - i.last_signal_at)) / (86400 * COALESCE(i.tau_days, 30))), + 0.01 + ) AS effective_priority +FROM issues i +ORDER BY effective_priority DESC; +``` + +You can wrap that in a view: + +```sql +CREATE VIEW issues_with_confidence AS +SELECT + i.*, + GREATEST( + EXP(- EXTRACT(EPOCH FROM (NOW() - i.last_signal_at)) / (86400 * COALESCE(i.tau_days, 30))), + 0.01 + ) AS confidence, + i.base_priority * + GREATEST( + EXP(- EXTRACT(EPOCH FROM (NOW() - i.last_signal_at)) / (86400 * COALESCE(i.tau_days, 30))), + 0.01 + ) AS effective_priority +FROM issues i; +``` + +--- + +## 4. Caching & performance + +You have two options: + +### 4.1. Compute on read (simplest to start) + +* Use the helper function in your service layer or a DB view. +* Pros: + + * No jobs, always fresh. +* Cons: + + * Slight CPU cost on heavy lists. + +**Plan:** Start with this. If you see perf issues, move to 4.2. + +### 4.2. Periodic materialization job (optional later) + +Add a scheduled job (e.g. hourly) that: + +1. Selects all active items. +2. Computes `confidence_score` and `effective_priority`. +3. Writes to `confidence_cached` and `effective_priority_cached` (if you add such a column). + +Service then sorts by cached values. + +--- + +## 5. Backfill & migration + +### 5.1. Initial backfill script + +For existing records: + +* If `last_signal_at` is NULL → set to `created_at`. +* Derive `base_priority` / `base_risk` from existing severity fields. +* Set `tau_days` from config. + +**Example:** + +```sql +UPDATE issues +SET last_signal_at = created_at +WHERE last_signal_at IS NULL; + +UPDATE issues +SET base_priority = CASE severity + WHEN 'critical' THEN 100 + WHEN 'high' THEN 80 + WHEN 'medium' THEN 50 + WHEN 'low' THEN 20 + ELSE 10 +END +WHERE base_priority IS NULL; + +UPDATE issues i +SET tau_days = c.tau_days_default +FROM confidence_decay_config c +WHERE c.entity_type = 'issue' + AND i.tau_days IS NULL; +``` + +Do similarly for `vulnerabilities` using severity / CVSS. + +### 5.2. Sanity checks + +Add a small script/test to verify: + +* Newly created items → `confidence ≈ 1.0`. +* 30-day-old items with τ=30 → `confidence ≈ 0.37`. +* Ordering changes when you edit/comment on items. + +--- + +## 6. API & Query Layer + +### 6.1. New sorting options + +Update list APIs: + +* Accept parameter: `sort=effective_priority` or `sort=confidence`. +* Default sort for some views: + + * Vulnerabilities backlog: `sort=effective_risk` (risk × confidence). + * Issues backlog: `sort=effective_priority`. + +**Example REST API contract:** + +`GET /api/issues?sort=effective_priority&state=open` + +**Response fields (additions):** + +```json +{ + "id": "ISS-123", + "title": "Fix login bug", + "base_priority": 80, + "last_signal_at": "2025-11-01T10:00:00Z", + "tau_days": 30, + "confidence": 0.63, + "effective_priority": 50.4, + "confidence_band": "amber" +} +``` + +### 6.2. Confidence banding (for UI) + +Define bands server-side (easy to change): + +* Green: `confidence >= 0.6` +* Amber: `0.3 ≤ confidence < 0.6` +* Red: `confidence < 0.3` + +You can compute on server: + +```csharp +string ConfidenceBand(double confidence) => + confidence >= 0.6 ? "green" + : confidence >= 0.3 ? "amber" + : "red"; +``` + +--- + +## 7. UI / UX changes + +### 7.1. List views (issues / vulns / epics) + +For each item row: + +* Show a small freshness pill: + + * Text: `Active`, `Review soon`, `Stale` + * Derived from confidence band. + * Tooltip: + + * “Confidence 78%. Last activity 3 days ago. τ = 30 days.” + +* Sort default: by `effective_priority` / `effective_risk`. + +* Filters: + + * `Freshness: [All | Active | Review soon | Stale]` + * Optionally: “Show stale only” toggle. + +**Example labels:** + +* Green: “Active (confidence 82%)” +* Amber: “Review soon (confidence 45%)” +* Red: “Stale (confidence 18%)” + +### 7.2. Detail views + +On an issue / vuln page: + +* Add a “Confidence” section: + + * “Confidence: **52%**” + * “Last signal: **12 days ago**” + * “Decay τ: **30 days**” + * “Effective priority: **Base 80 × 0.52 = 42**” + +* (Optional) small mini-chart (text-only or simple bar) showing approximate decay, but not necessary for first iteration. + +### 7.3. Admin / settings UI + +Add an internal settings page: + +* Table of entity types with editable τ: + + | Entity type | τ (days) | Notes | + | ------------- | -------- | ---------------------------- | + | Incident | 7 | Fast-moving | + | Vulnerability | 30 | Standard risk review cadence | + | Issue | 30 | Sprint-level decay | + | Epic | 60 | Quarterly | + | Doc | 90 | Slow decay | + +* Optionally: toggle to pin item (`is_confidence_frozen`) from UI. + +--- + +## 8. Stella Ops–specific behavior + +For vulnerabilities: + +### 8.1. Base risk calculation + +Ingested fields you likely already have: + +* `cvss_score` or `severity` +* `reachable` (true/false or numeric) +* (Optional) `exploit_available` (bool) or exploitability score +* `asset_criticality` (1–5) + +Define `base_risk` as: + +```text +severity_weight = f(cvss_score or severity) +reachability = reachable ? 1.0 : 0.5 -- example +exploitability = exploit_available ? 1.0 : 0.7 +asset_factor = 0.5 + 0.1 * asset_criticality -- 1 → 1.0, 5 → 1.5 + +base_risk = severity_weight * reachability * exploitability * asset_factor +``` + +Store `base_risk` on vuln row. + +Then: + +```text +effective_risk = base_risk * confidence(t) +``` + +Use `effective_risk` for backlog ordering and SLAs dashboards. + +### 8.2. Signals for vulns + +Make sure these all call `RecordSignalAsync(Vulnerability, vulnId)`: + +* New scan result for same vuln (re-detected). +* Change status to “In Progress”, “Ready for Deploy”, “Verified Fixed”, etc. +* Assigning an owner. +* Attaching PoC / exploit details. + +### 8.3. Vuln UI copy ideas + +* Pill text: + + * “Risk: 850 (confidence 68%)” + * “Last analyst activity 11 days ago” + +* In backlog view: show **Effective Risk** as main sort, with a smaller subtext “Base 1200 × Confidence 71%”. + +--- + +## 9. Rollout plan + +### Phase 1 – Infrastructure (backend-only) + +* [ ] DB migrations & config table +* [ ] Implement `ConfidenceMath` and helper functions +* [ ] Implement `IConfidenceSignalService` +* [ ] Wire signals into key flows (comments, state changes, scanner ingestion) +* [ ] Add `confidence` and `effective_priority/risk` to API responses +* [ ] Backfill script + dry run in staging + +### Phase 2 – Internal UI & feature flag + +* [ ] Add optional sorting by effective score to internal/staff views +* [ ] Add confidence pill (hidden behind feature flag `confidence_decay_v1`) +* [ ] Dogfood internally: + + * Do items bubble up/down as expected? + * Are any items “disappearing” because decay is too aggressive? + +### Phase 3 – Parameter tuning + +* [ ] Adjust τ per type based on feedback: + + * If things decay too fast → increase τ + * If queues rarely change → decrease τ +* [ ] Decide on confidence floor (0.01? 0.05?) so nothing goes to literal 0. + +### Phase 4 – General release + +* [ ] Make effective score the default sort for key views: + + * Vulnerabilities backlog + * Issues backlog +* [ ] Document behavior for users (help center / inline tooltip) +* [ ] Add admin UI to tweak τ per entity type. + +--- + +## 10. Edge cases & safeguards + +* **New items** + + * `last_signal_at = created_at`, confidence = 1.0. +* **Pinned items** + + * If `is_confidence_frozen = true` → treat confidence as 1.0. +* **Items without τ** + + * Always fallback to entity type default. +* **Timezones** + + * Always store & compute in UTC. +* **Very old items** + + * Floor the confidence so they’re still visible when explicitly searched. + +--- + +If you want, I can turn this into: + +* A short **technical design doc** (with sections: Problem, Proposal, Alternatives, Rollout). +* Or a **set of Jira tickets** grouped by backend / frontend / infra that your team can pick up directly. diff --git a/ops/orchestrator/Dockerfile b/ops/orchestrator/Dockerfile new file mode 100644 index 000000000..1f691aa53 --- /dev/null +++ b/ops/orchestrator/Dockerfile @@ -0,0 +1,124 @@ +# syntax=docker/dockerfile:1.7-labs + +# Orchestrator Service Dockerfile +# Multi-stage build for deterministic, reproducible container images. +# Supports air-gapped deployment via digest-pinned base images. + +ARG SDK_IMAGE=mcr.microsoft.com/dotnet/nightly/sdk:10.0 +ARG RUNTIME_IMAGE=mcr.microsoft.com/dotnet/nightly/aspnet:10.0 + +ARG VERSION=0.0.0 +ARG CHANNEL=dev +ARG GIT_SHA=0000000 +ARG SOURCE_DATE_EPOCH=0 + +# ============================================================================== +# Stage 1: Build +# ============================================================================== +FROM ${SDK_IMAGE} AS build +ARG GIT_SHA +ARG SOURCE_DATE_EPOCH +WORKDIR /src + +ENV DOTNET_CLI_TELEMETRY_OPTOUT=1 \ + DOTNET_SKIP_FIRST_TIME_EXPERIENCE=1 \ + NUGET_XMLDOC_MODE=skip \ + SOURCE_DATE_EPOCH=${SOURCE_DATE_EPOCH} + +# Copy solution and project files for restore +COPY src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.sln ./ +COPY src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Core/StellaOps.Orchestrator.Core.csproj StellaOps.Orchestrator.Core/ +COPY src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Infrastructure/StellaOps.Orchestrator.Infrastructure.csproj StellaOps.Orchestrator.Infrastructure/ +COPY src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.WebService/StellaOps.Orchestrator.WebService.csproj StellaOps.Orchestrator.WebService/ +COPY src/Orchestrator/StellaOps.Orchestrator/StellaOps.Orchestrator.Worker/StellaOps.Orchestrator.Worker.csproj StellaOps.Orchestrator.Worker/ +COPY Directory.Build.props Directory.Packages.props ./ + +# Restore dependencies with cache mount +RUN --mount=type=cache,target=/root/.nuget/packages \ + dotnet restore StellaOps.Orchestrator.sln + +# Copy source files +COPY src/Orchestrator/StellaOps.Orchestrator/ ./ + +# Publish WebService +RUN --mount=type=cache,target=/root/.nuget/packages \ + dotnet publish StellaOps.Orchestrator.WebService/StellaOps.Orchestrator.WebService.csproj \ + -c Release \ + -o /app/publish/webservice \ + /p:UseAppHost=false \ + /p:ContinuousIntegrationBuild=true \ + /p:SourceRevisionId=${GIT_SHA} \ + /p:Deterministic=true \ + /p:TreatWarningsAsErrors=true + +# Publish Worker (optional, for hybrid deployments) +RUN --mount=type=cache,target=/root/.nuget/packages \ + dotnet publish StellaOps.Orchestrator.Worker/StellaOps.Orchestrator.Worker.csproj \ + -c Release \ + -o /app/publish/worker \ + /p:UseAppHost=false \ + /p:ContinuousIntegrationBuild=true \ + /p:SourceRevisionId=${GIT_SHA} \ + /p:Deterministic=true \ + /p:TreatWarningsAsErrors=true + +# ============================================================================== +# Stage 2: Runtime (WebService) +# ============================================================================== +FROM ${RUNTIME_IMAGE} AS orchestrator-web +WORKDIR /app +ARG VERSION +ARG CHANNEL +ARG GIT_SHA + +ENV DOTNET_EnableDiagnostics=0 \ + ASPNETCORE_URLS=http://0.0.0.0:8080 \ + ASPNETCORE_ENVIRONMENT=Production \ + ORCHESTRATOR__TELEMETRY__MINIMUMLOGLEVEL=Information + +COPY --from=build /app/publish/webservice/ ./ + +# Health check endpoints +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:8080/healthz || exit 1 + +EXPOSE 8080 + +LABEL org.opencontainers.image.title="StellaOps Orchestrator WebService" \ + org.opencontainers.image.description="Job scheduling, DAG planning, and worker coordination service" \ + org.opencontainers.image.version="${VERSION}" \ + org.opencontainers.image.revision="${GIT_SHA}" \ + org.opencontainers.image.source="https://git.stella-ops.org/stella-ops/stellaops" \ + org.opencontainers.image.vendor="StellaOps" \ + org.opencontainers.image.licenses="AGPL-3.0-or-later" \ + org.stellaops.release.channel="${CHANNEL}" \ + org.stellaops.component="orchestrator-web" + +ENTRYPOINT ["dotnet", "StellaOps.Orchestrator.WebService.dll"] + +# ============================================================================== +# Stage 3: Runtime (Worker) +# ============================================================================== +FROM ${RUNTIME_IMAGE} AS orchestrator-worker +WORKDIR /app +ARG VERSION +ARG CHANNEL +ARG GIT_SHA + +ENV DOTNET_EnableDiagnostics=0 \ + ASPNETCORE_ENVIRONMENT=Production \ + ORCHESTRATOR__TELEMETRY__MINIMUMLOGLEVEL=Information + +COPY --from=build /app/publish/worker/ ./ + +LABEL org.opencontainers.image.title="StellaOps Orchestrator Worker" \ + org.opencontainers.image.description="Background worker for job execution and orchestration tasks" \ + org.opencontainers.image.version="${VERSION}" \ + org.opencontainers.image.revision="${GIT_SHA}" \ + org.opencontainers.image.source="https://git.stella-ops.org/stella-ops/stellaops" \ + org.opencontainers.image.vendor="StellaOps" \ + org.opencontainers.image.licenses="AGPL-3.0-or-later" \ + org.stellaops.release.channel="${CHANNEL}" \ + org.stellaops.component="orchestrator-worker" + +ENTRYPOINT ["dotnet", "StellaOps.Orchestrator.Worker.dll"] diff --git a/ops/orchestrator/GA_CHECKLIST.md b/ops/orchestrator/GA_CHECKLIST.md new file mode 100644 index 000000000..b502e4245 --- /dev/null +++ b/ops/orchestrator/GA_CHECKLIST.md @@ -0,0 +1,108 @@ +# Orchestrator Service GA Checklist + +> Pre-release validation checklist for StellaOps Orchestrator Service. +> All items must be verified before promoting to `stable` channel. + +## Build & Packaging + +- [ ] Container images build successfully for all target architectures (amd64, arm64) +- [ ] Multi-stage Dockerfile produces minimal runtime images (<100MB compressed) +- [ ] OCI labels include version, git SHA, and license metadata +- [ ] HEALTHCHECK directive validates endpoint availability +- [ ] Build is reproducible (same inputs produce byte-identical outputs) +- [ ] SBOM generated and attached to container images (SPDX 3.0.1 or CycloneDX 1.6) +- [ ] Provenance attestation generated per SLSA v1 specification +- [ ] Air-gap bundle script creates valid offline deployment package + +## Security + +- [ ] Container runs as non-root user (UID 1000+) +- [ ] No secrets baked into container image layers +- [ ] Base image digest-pinned to known-good version +- [ ] Vulnerability scan passes with no HIGH/CRITICAL unfixed CVEs +- [ ] TLS 1.3 enforced for all external endpoints +- [ ] Authority JWT validation enabled and tested +- [ ] Tenant isolation enforced at API and storage layers +- [ ] Sensitive configuration loaded from Kubernetes secrets only + +## Functional + +- [ ] Job scheduling CRUD operations work correctly +- [ ] Cron expression parsing handles edge cases (DST, leap years) +- [ ] DAG planning respects dependency ordering +- [ ] Dead letter queue captures failed jobs with full context +- [ ] Backfill API handles large date ranges without OOM +- [ ] Worker heartbeat detection marks stale jobs correctly +- [ ] Rate limiting and concurrency limits enforced per tenant + +## Performance & Scale + +- [ ] System tracks 10,000+ pending jobs without degradation +- [ ] Dispatch latency P95 < 150ms under normal load +- [ ] Queue depth metrics exposed for autoscaling (KEDA/HPA) +- [ ] Load shedding activates at configured thresholds +- [ ] Database connection pooling sized appropriately +- [ ] Memory usage stable under sustained load (no leaks) + +## Observability + +- [ ] Structured logging with correlation IDs enabled +- [ ] OpenTelemetry traces exported to configured endpoint +- [ ] Prometheus metrics exposed at `/metrics` endpoint +- [ ] Health probes respond correctly: + - `/healthz` - basic liveness + - `/livez` - deep liveness with dependency checks + - `/readyz` - readiness for traffic + - `/startupz` - startup completion check +- [ ] Autoscaling metrics endpoint returns valid JSON + +## Deployment + +- [ ] Helm values overlay tested with production-like configuration +- [ ] PostgreSQL schema migrations run idempotently +- [ ] Rolling update strategy configured (maxSurge/maxUnavailable) +- [ ] Pod disruption budget prevents full outage +- [ ] Resource requests/limits appropriate for target workload +- [ ] Network policies restrict traffic to required paths only +- [ ] Service mesh (Istio/Linkerd) integration tested if applicable + +## Documentation + +- [ ] Architecture document updated in `docs/modules/orchestrator/` +- [ ] API reference generated from OpenAPI spec +- [ ] Runbook for common operations (restart, scale, failover) +- [ ] Troubleshooting guide for known issues +- [ ] Upgrade path documented from previous versions + +## Testing + +- [ ] Unit tests pass (100% of Core, 80%+ of Infrastructure) +- [ ] Integration tests pass against real PostgreSQL +- [ ] Performance benchmarks meet targets +- [ ] Chaos testing validates graceful degradation +- [ ] E2E tests cover critical user journeys + +## Compliance + +- [ ] AGPL-3.0-or-later license headers in all source files +- [ ] Third-party license notices collected and bundled +- [ ] Attestation chain verifiable via `stella attest verify` +- [ ] Air-gap deployment tested in isolated network +- [ ] CryptoProfile compatibility verified (FIPS/eIDAS if required) + +--- + +## Sign-off + +| Role | Name | Date | Signature | +|------|------|------|-----------| +| Engineering Lead | | | | +| QA Lead | | | | +| Security Review | | | | +| Release Manager | | | | + +**Release Version:** ________________ + +**Release Channel:** [ ] edge [ ] stable [ ] lts + +**Notes:** diff --git a/ops/orchestrator/build-airgap-bundle.sh b/ops/orchestrator/build-airgap-bundle.sh new file mode 100644 index 000000000..48336e267 --- /dev/null +++ b/ops/orchestrator/build-airgap-bundle.sh @@ -0,0 +1,276 @@ +#!/usr/bin/env bash +set -euo pipefail + +# ORCH-SVC-34-004: Build air-gap bundle for Orchestrator service +# Packages container images, configs, and manifests for offline deployment. + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +VERSION="${VERSION:-2025.10.0-edge}" +CHANNEL="${CHANNEL:-edge}" +BUNDLE_DIR="${BUNDLE_DIR:-$REPO_ROOT/out/bundles/orchestrator-${VERSION}}" +SRC_DIR="${SRC_DIR:-$REPO_ROOT/out/buildx/orchestrator}" + +usage() { + cat <&2; usage 64 ;; + esac +done + +BUNDLE_DIR="${BUNDLE_DIR:-$REPO_ROOT/out/bundles/orchestrator-${VERSION}}" +TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + +echo "[orchestrator-airgap] Building bundle v${VERSION} (${CHANNEL})" +echo "[orchestrator-airgap] Output: ${BUNDLE_DIR}" + +mkdir -p "$BUNDLE_DIR"/{images,configs,manifests,docs} + +# ------------------------------------------------------------------------------ +# Stage 1: Export container images as OCI archives +# ------------------------------------------------------------------------------ +if [[ "$SKIP_IMAGES" == "false" ]]; then + echo "[orchestrator-airgap] Exporting container images..." + + IMAGES=( + "orchestrator-web:${VERSION}" + "orchestrator-worker:${VERSION}" + ) + + for img in "${IMAGES[@]}"; do + img_name="${img%%:*}" + img_file="${BUNDLE_DIR}/images/${img_name}.oci.tar.gz" + + if [[ -f "${SRC_DIR}/${img_name}/image.oci" ]]; then + echo "[orchestrator-airgap] Packaging ${img_name} from buildx output..." + gzip -c "${SRC_DIR}/${img_name}/image.oci" > "$img_file" + else + echo "[orchestrator-airgap] Exporting ${img_name} via docker save..." + docker save "registry.stella-ops.org/stellaops/${img}" | gzip > "$img_file" + fi + + # Generate checksum + sha256sum "$img_file" | cut -d' ' -f1 > "${img_file}.sha256" + + # Copy SBOM if available + if [[ -f "${SRC_DIR}/${img_name}/sbom.syft.json" ]]; then + cp "${SRC_DIR}/${img_name}/sbom.syft.json" "${BUNDLE_DIR}/manifests/${img_name}.sbom.json" + fi + done +else + echo "[orchestrator-airgap] Skipping image export (--skip-images)" +fi + +# ------------------------------------------------------------------------------ +# Stage 2: Copy configuration templates +# ------------------------------------------------------------------------------ +echo "[orchestrator-airgap] Copying configuration templates..." + +# Helm values overlay +if [[ -f "$REPO_ROOT/deploy/helm/stellaops/values-orchestrator.yaml" ]]; then + cp "$REPO_ROOT/deploy/helm/stellaops/values-orchestrator.yaml" \ + "${BUNDLE_DIR}/configs/values-orchestrator.yaml" +fi + +# Sample configuration +if [[ -f "$REPO_ROOT/etc/orchestrator.yaml.sample" ]]; then + cp "$REPO_ROOT/etc/orchestrator.yaml.sample" \ + "${BUNDLE_DIR}/configs/orchestrator.yaml.sample" +fi + +# PostgreSQL migration scripts +if [[ -d "$REPO_ROOT/src/Orchestrator/StellaOps.Orchestrator/migrations" ]]; then + mkdir -p "${BUNDLE_DIR}/configs/migrations" + cp "$REPO_ROOT/src/Orchestrator/StellaOps.Orchestrator/migrations/"*.sql \ + "${BUNDLE_DIR}/configs/migrations/" 2>/dev/null || true +fi + +# Bootstrap secrets template +cat > "${BUNDLE_DIR}/configs/secrets.env.example" <<'SECRETS_EOF' +# Orchestrator Secrets Template +# Copy to secrets.env and fill in values before deployment + +# PostgreSQL password (required) +POSTGRES_PASSWORD= + +# Authority JWT signing key (if using local Authority) +AUTHORITY_SIGNING_KEY= + +# OpenTelemetry endpoint (optional) +OTEL_EXPORTER_OTLP_ENDPOINT= + +# Tenant encryption key for multi-tenant isolation (optional) +TENANT_ENCRYPTION_KEY= +SECRETS_EOF + +# ------------------------------------------------------------------------------ +# Stage 3: Generate bundle manifest +# ------------------------------------------------------------------------------ +echo "[orchestrator-airgap] Generating bundle manifest..." + +# Calculate checksums for all bundle files +MANIFEST_FILE="${BUNDLE_DIR}/manifests/bundle-manifest.json" + +# Build file list with checksums +FILES_JSON="[]" +while IFS= read -r -d '' file; do + rel_path="${file#$BUNDLE_DIR/}" + if [[ "$rel_path" != "manifests/bundle-manifest.json" ]]; then + sha=$(sha256sum "$file" | cut -d' ' -f1) + size=$(stat -f%z "$file" 2>/dev/null || stat -c%s "$file" 2>/dev/null || echo "0") + FILES_JSON=$(echo "$FILES_JSON" | jq --arg name "$rel_path" --arg sha "$sha" --arg size "$size" \ + '. + [{"name": $name, "sha256": $sha, "size": ($size | tonumber)}]') + fi +done < <(find "$BUNDLE_DIR" -type f -print0 | sort -z) + +cat > "$MANIFEST_FILE" < "${MANIFEST_FILE}.sha256" + +# ------------------------------------------------------------------------------ +# Stage 4: Copy documentation +# ------------------------------------------------------------------------------ +echo "[orchestrator-airgap] Copying documentation..." + +# Module architecture +if [[ -f "$REPO_ROOT/docs/modules/orchestrator/architecture.md" ]]; then + cp "$REPO_ROOT/docs/modules/orchestrator/architecture.md" \ + "${BUNDLE_DIR}/docs/architecture.md" +fi + +# GA checklist +if [[ -f "$REPO_ROOT/ops/orchestrator/GA_CHECKLIST.md" ]]; then + cp "$REPO_ROOT/ops/orchestrator/GA_CHECKLIST.md" \ + "${BUNDLE_DIR}/docs/GA_CHECKLIST.md" +fi + +# Quick deployment guide +cat > "${BUNDLE_DIR}/docs/DEPLOY.md" <<'DEPLOY_EOF' +# Orchestrator Air-Gap Deployment Guide + +## Prerequisites + +- Docker or containerd runtime +- Kubernetes 1.28+ (for Helm deployment) or Docker Compose +- PostgreSQL 16+ (included as container or external) + +## Quick Start (Docker) + +1. Load images: + ```bash + for img in images/*.oci.tar.gz; do + gunzip -c "$img" | docker load + done + ``` + +2. Configure secrets: + ```bash + cp configs/secrets.env.example secrets.env + # Edit secrets.env with your values + ``` + +3. Start services: + ```bash + docker compose -f docker-compose.orchestrator.yaml up -d + ``` + +## Helm Deployment + +1. Import images to registry: + ```bash + for img in images/*.oci.tar.gz; do + crane push "$img" your-registry.local/stellaops/$(basename "$img" .oci.tar.gz) + done + ``` + +2. Install chart: + ```bash + helm upgrade --install stellaops ./stellaops \ + -f configs/values-orchestrator.yaml \ + --set global.imageRegistry=your-registry.local + ``` + +## Verification + +Check health endpoints: +```bash +curl http://localhost:8080/healthz +curl http://localhost:8080/readyz +``` +DEPLOY_EOF + +# ------------------------------------------------------------------------------ +# Stage 5: Create final tarball +# ------------------------------------------------------------------------------ +echo "[orchestrator-airgap] Creating final tarball..." + +TARBALL="${BUNDLE_DIR}.tar.gz" +tar -C "$(dirname "$BUNDLE_DIR")" -czf "$TARBALL" "$(basename "$BUNDLE_DIR")" + +# Checksum the tarball +sha256sum "$TARBALL" | cut -d' ' -f1 > "${TARBALL}.sha256" + +echo "[orchestrator-airgap] Bundle created successfully:" +echo " Tarball: ${TARBALL}" +echo " SHA256: $(cat "${TARBALL}.sha256")" +echo " Size: $(du -h "$TARBALL" | cut -f1)" diff --git a/ops/orchestrator/provenance.json b/ops/orchestrator/provenance.json new file mode 100644 index 000000000..b8716cf8d --- /dev/null +++ b/ops/orchestrator/provenance.json @@ -0,0 +1,106 @@ +{ + "_type": "https://in-toto.io/Statement/v1", + "subject": [ + { + "name": "registry.stella-ops.org/stellaops/orchestrator-web", + "digest": { + "sha256": "" + } + }, + { + "name": "registry.stella-ops.org/stellaops/orchestrator-worker", + "digest": { + "sha256": "" + } + } + ], + "predicateType": "https://slsa.dev/provenance/v1", + "predicate": { + "buildDefinition": { + "buildType": "https://stella-ops.org/OrchestratorBuild/v1", + "externalParameters": { + "source": { + "uri": "git+https://git.stella-ops.org/stella-ops/stellaops.git", + "digest": { + "gitCommit": "" + } + }, + "builderImage": { + "uri": "mcr.microsoft.com/dotnet/nightly/sdk:10.0", + "digest": { + "sha256": "" + } + } + }, + "internalParameters": { + "dockerfile": "ops/orchestrator/Dockerfile", + "targetStages": ["orchestrator-web", "orchestrator-worker"], + "buildArgs": { + "VERSION": "", + "CHANNEL": "", + "GIT_SHA": "", + "SOURCE_DATE_EPOCH": "" + } + }, + "resolvedDependencies": [ + { + "uri": "pkg:nuget/Microsoft.Extensions.Hosting@10.0.0", + "digest": { + "sha256": "" + } + }, + { + "uri": "pkg:nuget/Npgsql.EntityFrameworkCore.PostgreSQL@10.0.0", + "digest": { + "sha256": "" + } + }, + { + "uri": "pkg:nuget/Cronos@0.10.0", + "digest": { + "sha256": "" + } + } + ] + }, + "runDetails": { + "builder": { + "id": "https://git.stella-ops.org/stella-ops/stellaops/-/runners/1", + "builderDependencies": [ + { + "uri": "docker.io/moby/buildkit:latest", + "digest": { + "sha256": "" + } + } + ], + "version": { + "buildkit": "0.14.0" + } + }, + "metadata": { + "invocationId": "", + "startedOn": "", + "finishedOn": "" + }, + "byproducts": [ + { + "name": "sbom-web", + "uri": "registry.stella-ops.org/stellaops/orchestrator-web:sbom", + "mediaType": "application/spdx+json", + "digest": { + "sha256": "" + } + }, + { + "name": "sbom-worker", + "uri": "registry.stella-ops.org/stellaops/orchestrator-worker:sbom", + "mediaType": "application/spdx+json", + "digest": { + "sha256": "" + } + } + ] + } + } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/DigestEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/DigestEntity.cs new file mode 100644 index 000000000..b634a6909 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/DigestEntity.cs @@ -0,0 +1,30 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Digest status values. +/// +public static class DigestStatus +{ + public const string Collecting = "collecting"; + public const string Sending = "sending"; + public const string Sent = "sent"; +} + +/// +/// Represents a digest of aggregated notifications. +/// +public sealed class DigestEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required Guid ChannelId { get; init; } + public required string Recipient { get; init; } + public required string DigestKey { get; init; } + public int EventCount { get; init; } + public string Events { get; init; } = "[]"; + public string Status { get; init; } = DigestStatus.Collecting; + public DateTimeOffset CollectUntil { get; init; } + public DateTimeOffset? SentAt { get; init; } + public DateTimeOffset CreatedAt { get; init; } + public DateTimeOffset UpdatedAt { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/EscalationEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/EscalationEntity.cs new file mode 100644 index 000000000..fc1463102 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/EscalationEntity.cs @@ -0,0 +1,51 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Represents an escalation policy. +/// +public sealed class EscalationPolicyEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required string Name { get; init; } + public string? Description { get; init; } + public bool Enabled { get; init; } = true; + public string Steps { get; init; } = "[]"; + public int RepeatCount { get; init; } + public string Metadata { get; init; } = "{}"; + public DateTimeOffset CreatedAt { get; init; } + public DateTimeOffset UpdatedAt { get; init; } +} + +/// +/// Escalation state status values. +/// +public static class EscalationStatus +{ + public const string Active = "active"; + public const string Acknowledged = "acknowledged"; + public const string Resolved = "resolved"; + public const string Expired = "expired"; +} + +/// +/// Represents the state of an escalation. +/// +public sealed class EscalationStateEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required Guid PolicyId { get; init; } + public Guid? IncidentId { get; init; } + public required string CorrelationId { get; init; } + public int CurrentStep { get; init; } + public int RepeatIteration { get; init; } + public string Status { get; init; } = EscalationStatus.Active; + public DateTimeOffset StartedAt { get; init; } + public DateTimeOffset? NextEscalationAt { get; init; } + public DateTimeOffset? AcknowledgedAt { get; init; } + public string? AcknowledgedBy { get; init; } + public DateTimeOffset? ResolvedAt { get; init; } + public string? ResolvedBy { get; init; } + public string Metadata { get; init; } = "{}"; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/InboxEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/InboxEntity.cs new file mode 100644 index 000000000..133ef679a --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/InboxEntity.cs @@ -0,0 +1,22 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Represents an in-app notification inbox item. +/// +public sealed class InboxEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required Guid UserId { get; init; } + public required string Title { get; init; } + public string? Body { get; init; } + public required string EventType { get; init; } + public string EventPayload { get; init; } = "{}"; + public bool Read { get; init; } + public bool Archived { get; init; } + public string? ActionUrl { get; init; } + public string? CorrelationId { get; init; } + public DateTimeOffset CreatedAt { get; init; } + public DateTimeOffset? ReadAt { get; init; } + public DateTimeOffset? ArchivedAt { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/IncidentEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/IncidentEntity.cs new file mode 100644 index 000000000..f0859bb5a --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/IncidentEntity.cs @@ -0,0 +1,46 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Incident severity values. +/// +public static class IncidentSeverity +{ + public const string Critical = "critical"; + public const string High = "high"; + public const string Medium = "medium"; + public const string Low = "low"; +} + +/// +/// Incident status values. +/// +public static class IncidentStatus +{ + public const string Open = "open"; + public const string Acknowledged = "acknowledged"; + public const string Resolved = "resolved"; + public const string Closed = "closed"; +} + +/// +/// Represents an incident. +/// +public sealed class IncidentEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required string Title { get; init; } + public string? Description { get; init; } + public string Severity { get; init; } = IncidentSeverity.Medium; + public string Status { get; init; } = IncidentStatus.Open; + public string? Source { get; init; } + public string? CorrelationId { get; init; } + public Guid? AssignedTo { get; init; } + public Guid? EscalationPolicyId { get; init; } + public string Metadata { get; init; } = "{}"; + public DateTimeOffset CreatedAt { get; init; } + public DateTimeOffset? AcknowledgedAt { get; init; } + public DateTimeOffset? ResolvedAt { get; init; } + public DateTimeOffset? ClosedAt { get; init; } + public string? CreatedBy { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/MaintenanceWindowEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/MaintenanceWindowEntity.cs new file mode 100644 index 000000000..913c0ce36 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/MaintenanceWindowEntity.cs @@ -0,0 +1,18 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Represents a maintenance window for suppressing notifications. +/// +public sealed class MaintenanceWindowEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required string Name { get; init; } + public string? Description { get; init; } + public DateTimeOffset StartAt { get; init; } + public DateTimeOffset EndAt { get; init; } + public Guid[]? SuppressChannels { get; init; } + public string[]? SuppressEventTypes { get; init; } + public DateTimeOffset CreatedAt { get; init; } + public string? CreatedBy { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/NotifyAuditEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/NotifyAuditEntity.cs new file mode 100644 index 000000000..c3ffd44fa --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/NotifyAuditEntity.cs @@ -0,0 +1,17 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Represents an audit log entry for the notify module. +/// +public sealed class NotifyAuditEntity +{ + public long Id { get; init; } + public required string TenantId { get; init; } + public Guid? UserId { get; init; } + public required string Action { get; init; } + public required string ResourceType { get; init; } + public string? ResourceId { get; init; } + public string? Details { get; init; } + public string? CorrelationId { get; init; } + public DateTimeOffset CreatedAt { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/OnCallScheduleEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/OnCallScheduleEntity.cs new file mode 100644 index 000000000..175851177 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/OnCallScheduleEntity.cs @@ -0,0 +1,29 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Rotation type values. +/// +public static class RotationType +{ + public const string Daily = "daily"; + public const string Weekly = "weekly"; + public const string Custom = "custom"; +} + +/// +/// Represents an on-call schedule. +/// +public sealed class OnCallScheduleEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required string Name { get; init; } + public string? Description { get; init; } + public string Timezone { get; init; } = "UTC"; + public string RotationType { get; init; } = Models.RotationType.Weekly; + public string Participants { get; init; } = "[]"; + public string Overrides { get; init; } = "[]"; + public string Metadata { get; init; } = "{}"; + public DateTimeOffset CreatedAt { get; init; } + public DateTimeOffset UpdatedAt { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/QuietHoursEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/QuietHoursEntity.cs new file mode 100644 index 000000000..126ed611e --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/QuietHoursEntity.cs @@ -0,0 +1,19 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Represents quiet hours configuration. +/// +public sealed class QuietHoursEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public Guid? UserId { get; init; } + public Guid? ChannelId { get; init; } + public required TimeOnly StartTime { get; init; } + public required TimeOnly EndTime { get; init; } + public string Timezone { get; init; } = "UTC"; + public int[] DaysOfWeek { get; init; } = [0, 1, 2, 3, 4, 5, 6]; + public bool Enabled { get; init; } = true; + public DateTimeOffset CreatedAt { get; init; } + public DateTimeOffset UpdatedAt { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/RuleEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/RuleEntity.cs new file mode 100644 index 000000000..6694b000d --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/RuleEntity.cs @@ -0,0 +1,21 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Represents a notification routing rule. +/// +public sealed class RuleEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required string Name { get; init; } + public string? Description { get; init; } + public bool Enabled { get; init; } = true; + public int Priority { get; init; } + public string[] EventTypes { get; init; } = []; + public string Filter { get; init; } = "{}"; + public Guid[] ChannelIds { get; init; } = []; + public Guid? TemplateId { get; init; } + public string Metadata { get; init; } = "{}"; + public DateTimeOffset CreatedAt { get; init; } + public DateTimeOffset UpdatedAt { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/TemplateEntity.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/TemplateEntity.cs new file mode 100644 index 000000000..52c748aa0 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Models/TemplateEntity.cs @@ -0,0 +1,18 @@ +namespace StellaOps.Notify.Storage.Postgres.Models; + +/// +/// Represents a notification template. +/// +public sealed class TemplateEntity +{ + public required Guid Id { get; init; } + public required string TenantId { get; init; } + public required string Name { get; init; } + public required ChannelType ChannelType { get; init; } + public string? SubjectTemplate { get; init; } + public required string BodyTemplate { get; init; } + public string Locale { get; init; } = "en"; + public string Metadata { get; init; } = "{}"; + public DateTimeOffset CreatedAt { get; init; } + public DateTimeOffset UpdatedAt { get; init; } +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/DigestRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/DigestRepository.cs new file mode 100644 index 000000000..4bfa32b7a --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/DigestRepository.cs @@ -0,0 +1,142 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class DigestRepository : RepositoryBase, IDigestRepository +{ + public DigestRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, channel_id, recipient, digest_key, event_count, events, status, collect_until, sent_at, created_at, updated_at + FROM notify.digests WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapDigest, cancellationToken).ConfigureAwait(false); + } + + public async Task GetByKeyAsync(string tenantId, Guid channelId, string recipient, string digestKey, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, channel_id, recipient, digest_key, event_count, events, status, collect_until, sent_at, created_at, updated_at + FROM notify.digests WHERE tenant_id = @tenant_id AND channel_id = @channel_id AND recipient = @recipient AND digest_key = @digest_key + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "channel_id", channelId); + AddParameter(cmd, "recipient", recipient); + AddParameter(cmd, "digest_key", digestKey); + }, MapDigest, cancellationToken).ConfigureAwait(false); + } + + public async Task> GetReadyToSendAsync(int limit = 100, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, channel_id, recipient, digest_key, event_count, events, status, collect_until, sent_at, created_at, updated_at + FROM notify.digests WHERE status = 'collecting' AND collect_until <= NOW() + ORDER BY collect_until LIMIT @limit + """; + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "limit", limit); + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + results.Add(MapDigest(reader)); + return results; + } + + public async Task UpsertAsync(DigestEntity digest, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.digests (id, tenant_id, channel_id, recipient, digest_key, event_count, events, status, collect_until) + VALUES (@id, @tenant_id, @channel_id, @recipient, @digest_key, @event_count, @events::jsonb, @status, @collect_until) + ON CONFLICT (tenant_id, channel_id, recipient, digest_key) DO UPDATE SET + event_count = notify.digests.event_count + EXCLUDED.event_count, + events = notify.digests.events || EXCLUDED.events, + collect_until = GREATEST(notify.digests.collect_until, EXCLUDED.collect_until) + RETURNING * + """; + var id = digest.Id == Guid.Empty ? Guid.NewGuid() : digest.Id; + await using var connection = await DataSource.OpenConnectionAsync(digest.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", digest.TenantId); + AddParameter(command, "channel_id", digest.ChannelId); + AddParameter(command, "recipient", digest.Recipient); + AddParameter(command, "digest_key", digest.DigestKey); + AddParameter(command, "event_count", digest.EventCount); + AddJsonbParameter(command, "events", digest.Events); + AddParameter(command, "status", digest.Status); + AddParameter(command, "collect_until", digest.CollectUntil); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapDigest(reader); + } + + public async Task AddEventAsync(string tenantId, Guid id, string eventJson, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.digests SET event_count = event_count + 1, events = events || @event::jsonb + WHERE tenant_id = @tenant_id AND id = @id AND status = 'collecting' + """; + var rows = await ExecuteAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", id); + AddJsonbParameter(cmd, "event", eventJson); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task MarkSendingAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.digests SET status = 'sending' WHERE tenant_id = @tenant_id AND id = @id AND status = 'collecting'"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task MarkSentAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.digests SET status = 'sent', sent_at = NOW() WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteOldAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.digests WHERE status = 'sent' AND sent_at < @cutoff"; + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "cutoff", cutoff); + return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + private static DigestEntity MapDigest(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + ChannelId = reader.GetGuid(2), + Recipient = reader.GetString(3), + DigestKey = reader.GetString(4), + EventCount = reader.GetInt32(5), + Events = reader.GetString(6), + Status = reader.GetString(7), + CollectUntil = reader.GetFieldValue(8), + SentAt = GetNullableDateTimeOffset(reader, 9), + CreatedAt = reader.GetFieldValue(10), + UpdatedAt = reader.GetFieldValue(11) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/EscalationRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/EscalationRepository.cs new file mode 100644 index 000000000..f1788abde --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/EscalationRepository.cs @@ -0,0 +1,252 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class EscalationPolicyRepository : RepositoryBase, IEscalationPolicyRepository +{ + public EscalationPolicyRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, enabled, steps, repeat_count, metadata, created_at, updated_at + FROM notify.escalation_policies WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapPolicy, cancellationToken).ConfigureAwait(false); + } + + public async Task GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, enabled, steps, repeat_count, metadata, created_at, updated_at + FROM notify.escalation_policies WHERE tenant_id = @tenant_id AND name = @name + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "name", name); }, + MapPolicy, cancellationToken).ConfigureAwait(false); + } + + public async Task> ListAsync(string tenantId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, enabled, steps, repeat_count, metadata, created_at, updated_at + FROM notify.escalation_policies WHERE tenant_id = @tenant_id ORDER BY name + """; + return await QueryAsync(tenantId, sql, + cmd => AddParameter(cmd, "tenant_id", tenantId), + MapPolicy, cancellationToken).ConfigureAwait(false); + } + + public async Task CreateAsync(EscalationPolicyEntity policy, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.escalation_policies (id, tenant_id, name, description, enabled, steps, repeat_count, metadata) + VALUES (@id, @tenant_id, @name, @description, @enabled, @steps::jsonb, @repeat_count, @metadata::jsonb) + RETURNING * + """; + var id = policy.Id == Guid.Empty ? Guid.NewGuid() : policy.Id; + await using var connection = await DataSource.OpenConnectionAsync(policy.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", policy.TenantId); + AddParameter(command, "name", policy.Name); + AddParameter(command, "description", policy.Description); + AddParameter(command, "enabled", policy.Enabled); + AddJsonbParameter(command, "steps", policy.Steps); + AddParameter(command, "repeat_count", policy.RepeatCount); + AddJsonbParameter(command, "metadata", policy.Metadata); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapPolicy(reader); + } + + public async Task UpdateAsync(EscalationPolicyEntity policy, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.escalation_policies SET name = @name, description = @description, enabled = @enabled, + steps = @steps::jsonb, repeat_count = @repeat_count, metadata = @metadata::jsonb + WHERE tenant_id = @tenant_id AND id = @id + """; + var rows = await ExecuteAsync(policy.TenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", policy.TenantId); + AddParameter(cmd, "id", policy.Id); + AddParameter(cmd, "name", policy.Name); + AddParameter(cmd, "description", policy.Description); + AddParameter(cmd, "enabled", policy.Enabled); + AddJsonbParameter(cmd, "steps", policy.Steps); + AddParameter(cmd, "repeat_count", policy.RepeatCount); + AddJsonbParameter(cmd, "metadata", policy.Metadata); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.escalation_policies WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + private static EscalationPolicyEntity MapPolicy(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + Name = reader.GetString(2), + Description = GetNullableString(reader, 3), + Enabled = reader.GetBoolean(4), + Steps = reader.GetString(5), + RepeatCount = reader.GetInt32(6), + Metadata = reader.GetString(7), + CreatedAt = reader.GetFieldValue(8), + UpdatedAt = reader.GetFieldValue(9) + }; +} + +public sealed class EscalationStateRepository : RepositoryBase, IEscalationStateRepository +{ + public EscalationStateRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, policy_id, incident_id, correlation_id, current_step, repeat_iteration, status, + started_at, next_escalation_at, acknowledged_at, acknowledged_by, resolved_at, resolved_by, metadata + FROM notify.escalation_states WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapState, cancellationToken).ConfigureAwait(false); + } + + public async Task GetByCorrelationIdAsync(string tenantId, string correlationId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, policy_id, incident_id, correlation_id, current_step, repeat_iteration, status, + started_at, next_escalation_at, acknowledged_at, acknowledged_by, resolved_at, resolved_by, metadata + FROM notify.escalation_states WHERE tenant_id = @tenant_id AND correlation_id = @correlation_id AND status = 'active' + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "correlation_id", correlationId); }, + MapState, cancellationToken).ConfigureAwait(false); + } + + public async Task> GetActiveAsync(int limit = 100, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, policy_id, incident_id, correlation_id, current_step, repeat_iteration, status, + started_at, next_escalation_at, acknowledged_at, acknowledged_by, resolved_at, resolved_by, metadata + FROM notify.escalation_states WHERE status = 'active' AND next_escalation_at <= NOW() + ORDER BY next_escalation_at LIMIT @limit + """; + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "limit", limit); + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + results.Add(MapState(reader)); + return results; + } + + public async Task CreateAsync(EscalationStateEntity state, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.escalation_states (id, tenant_id, policy_id, incident_id, correlation_id, current_step, repeat_iteration, status, next_escalation_at, metadata) + VALUES (@id, @tenant_id, @policy_id, @incident_id, @correlation_id, @current_step, @repeat_iteration, @status, @next_escalation_at, @metadata::jsonb) + RETURNING * + """; + var id = state.Id == Guid.Empty ? Guid.NewGuid() : state.Id; + await using var connection = await DataSource.OpenConnectionAsync(state.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", state.TenantId); + AddParameter(command, "policy_id", state.PolicyId); + AddParameter(command, "incident_id", state.IncidentId); + AddParameter(command, "correlation_id", state.CorrelationId); + AddParameter(command, "current_step", state.CurrentStep); + AddParameter(command, "repeat_iteration", state.RepeatIteration); + AddParameter(command, "status", state.Status); + AddParameter(command, "next_escalation_at", state.NextEscalationAt); + AddJsonbParameter(command, "metadata", state.Metadata); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapState(reader); + } + + public async Task EscalateAsync(string tenantId, Guid id, int newStep, DateTimeOffset? nextEscalationAt, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.escalation_states SET current_step = @new_step, next_escalation_at = @next_escalation_at + WHERE tenant_id = @tenant_id AND id = @id AND status = 'active' + """; + var rows = await ExecuteAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", id); + AddParameter(cmd, "new_step", newStep); + AddParameter(cmd, "next_escalation_at", nextEscalationAt); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task AcknowledgeAsync(string tenantId, Guid id, string acknowledgedBy, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.escalation_states SET status = 'acknowledged', acknowledged_at = NOW(), acknowledged_by = @acknowledged_by + WHERE tenant_id = @tenant_id AND id = @id AND status = 'active' + """; + var rows = await ExecuteAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", id); + AddParameter(cmd, "acknowledged_by", acknowledgedBy); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task ResolveAsync(string tenantId, Guid id, string resolvedBy, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.escalation_states SET status = 'resolved', resolved_at = NOW(), resolved_by = @resolved_by + WHERE tenant_id = @tenant_id AND id = @id AND status IN ('active', 'acknowledged') + """; + var rows = await ExecuteAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", id); + AddParameter(cmd, "resolved_by", resolvedBy); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + private static EscalationStateEntity MapState(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + PolicyId = reader.GetGuid(2), + IncidentId = GetNullableGuid(reader, 3), + CorrelationId = reader.GetString(4), + CurrentStep = reader.GetInt32(5), + RepeatIteration = reader.GetInt32(6), + Status = reader.GetString(7), + StartedAt = reader.GetFieldValue(8), + NextEscalationAt = GetNullableDateTimeOffset(reader, 9), + AcknowledgedAt = GetNullableDateTimeOffset(reader, 10), + AcknowledgedBy = GetNullableString(reader, 11), + ResolvedAt = GetNullableDateTimeOffset(reader, 12), + ResolvedBy = GetNullableString(reader, 13), + Metadata = reader.GetString(14) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IDigestRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IDigestRepository.cs new file mode 100644 index 000000000..1b5ae0ba6 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IDigestRepository.cs @@ -0,0 +1,15 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface IDigestRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task GetByKeyAsync(string tenantId, Guid channelId, string recipient, string digestKey, CancellationToken cancellationToken = default); + Task> GetReadyToSendAsync(int limit = 100, CancellationToken cancellationToken = default); + Task UpsertAsync(DigestEntity digest, CancellationToken cancellationToken = default); + Task AddEventAsync(string tenantId, Guid id, string eventJson, CancellationToken cancellationToken = default); + Task MarkSendingAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task MarkSentAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task DeleteOldAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IEscalationRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IEscalationRepository.cs new file mode 100644 index 000000000..19593bd7e --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IEscalationRepository.cs @@ -0,0 +1,24 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface IEscalationPolicyRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default); + Task> ListAsync(string tenantId, CancellationToken cancellationToken = default); + Task CreateAsync(EscalationPolicyEntity policy, CancellationToken cancellationToken = default); + Task UpdateAsync(EscalationPolicyEntity policy, CancellationToken cancellationToken = default); + Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); +} + +public interface IEscalationStateRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task GetByCorrelationIdAsync(string tenantId, string correlationId, CancellationToken cancellationToken = default); + Task> GetActiveAsync(int limit = 100, CancellationToken cancellationToken = default); + Task CreateAsync(EscalationStateEntity state, CancellationToken cancellationToken = default); + Task EscalateAsync(string tenantId, Guid id, int newStep, DateTimeOffset? nextEscalationAt, CancellationToken cancellationToken = default); + Task AcknowledgeAsync(string tenantId, Guid id, string acknowledgedBy, CancellationToken cancellationToken = default); + Task ResolveAsync(string tenantId, Guid id, string resolvedBy, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IInboxRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IInboxRepository.cs new file mode 100644 index 000000000..084f4b515 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IInboxRepository.cs @@ -0,0 +1,16 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface IInboxRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task> GetForUserAsync(string tenantId, Guid userId, bool unreadOnly = false, int limit = 50, int offset = 0, CancellationToken cancellationToken = default); + Task GetUnreadCountAsync(string tenantId, Guid userId, CancellationToken cancellationToken = default); + Task CreateAsync(InboxEntity inbox, CancellationToken cancellationToken = default); + Task MarkReadAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task MarkAllReadAsync(string tenantId, Guid userId, CancellationToken cancellationToken = default); + Task ArchiveAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task DeleteOldAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IIncidentRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IIncidentRepository.cs new file mode 100644 index 000000000..30695ed15 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IIncidentRepository.cs @@ -0,0 +1,16 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface IIncidentRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task GetByCorrelationIdAsync(string tenantId, string correlationId, CancellationToken cancellationToken = default); + Task> ListAsync(string tenantId, string? status = null, string? severity = null, int limit = 100, int offset = 0, CancellationToken cancellationToken = default); + Task CreateAsync(IncidentEntity incident, CancellationToken cancellationToken = default); + Task UpdateAsync(IncidentEntity incident, CancellationToken cancellationToken = default); + Task AcknowledgeAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task ResolveAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task CloseAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task AssignAsync(string tenantId, Guid id, Guid assignedTo, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IMaintenanceWindowRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IMaintenanceWindowRepository.cs new file mode 100644 index 000000000..ffe78bcdd --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IMaintenanceWindowRepository.cs @@ -0,0 +1,14 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface IMaintenanceWindowRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task> ListAsync(string tenantId, CancellationToken cancellationToken = default); + Task> GetActiveAsync(string tenantId, CancellationToken cancellationToken = default); + Task CreateAsync(MaintenanceWindowEntity window, CancellationToken cancellationToken = default); + Task UpdateAsync(MaintenanceWindowEntity window, CancellationToken cancellationToken = default); + Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task DeleteExpiredAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/INotifyAuditRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/INotifyAuditRepository.cs new file mode 100644 index 000000000..15a98c0d1 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/INotifyAuditRepository.cs @@ -0,0 +1,12 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface INotifyAuditRepository +{ + Task CreateAsync(NotifyAuditEntity audit, CancellationToken cancellationToken = default); + Task> ListAsync(string tenantId, int limit = 100, int offset = 0, CancellationToken cancellationToken = default); + Task> GetByResourceAsync(string tenantId, string resourceType, string? resourceId = null, int limit = 100, CancellationToken cancellationToken = default); + Task> GetByCorrelationIdAsync(string tenantId, string correlationId, CancellationToken cancellationToken = default); + Task DeleteOldAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IOnCallScheduleRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IOnCallScheduleRepository.cs new file mode 100644 index 000000000..373dda5ec --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IOnCallScheduleRepository.cs @@ -0,0 +1,13 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface IOnCallScheduleRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default); + Task> ListAsync(string tenantId, CancellationToken cancellationToken = default); + Task CreateAsync(OnCallScheduleEntity schedule, CancellationToken cancellationToken = default); + Task UpdateAsync(OnCallScheduleEntity schedule, CancellationToken cancellationToken = default); + Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IQuietHoursRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IQuietHoursRepository.cs new file mode 100644 index 000000000..d7f0000b2 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IQuietHoursRepository.cs @@ -0,0 +1,13 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface IQuietHoursRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task> ListAsync(string tenantId, CancellationToken cancellationToken = default); + Task> GetForUserAsync(string tenantId, Guid userId, CancellationToken cancellationToken = default); + Task CreateAsync(QuietHoursEntity quietHours, CancellationToken cancellationToken = default); + Task UpdateAsync(QuietHoursEntity quietHours, CancellationToken cancellationToken = default); + Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IRuleRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IRuleRepository.cs new file mode 100644 index 000000000..df8233431 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IRuleRepository.cs @@ -0,0 +1,14 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface IRuleRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default); + Task> ListAsync(string tenantId, bool? enabled = null, CancellationToken cancellationToken = default); + Task> GetMatchingRulesAsync(string tenantId, string eventType, CancellationToken cancellationToken = default); + Task CreateAsync(RuleEntity rule, CancellationToken cancellationToken = default); + Task UpdateAsync(RuleEntity rule, CancellationToken cancellationToken = default); + Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/ITemplateRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/ITemplateRepository.cs new file mode 100644 index 000000000..5199a71bf --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/ITemplateRepository.cs @@ -0,0 +1,13 @@ +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public interface ITemplateRepository +{ + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + Task GetByNameAsync(string tenantId, string name, ChannelType channelType, string locale = "en", CancellationToken cancellationToken = default); + Task> ListAsync(string tenantId, ChannelType? channelType = null, CancellationToken cancellationToken = default); + Task CreateAsync(TemplateEntity template, CancellationToken cancellationToken = default); + Task UpdateAsync(TemplateEntity template, CancellationToken cancellationToken = default); + Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/InboxRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/InboxRepository.cs new file mode 100644 index 000000000..2cea605bc --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/InboxRepository.cs @@ -0,0 +1,139 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class InboxRepository : RepositoryBase, IInboxRepository +{ + public InboxRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, user_id, title, body, event_type, event_payload, read, archived, action_url, correlation_id, created_at, read_at, archived_at + FROM notify.inbox WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapInbox, cancellationToken).ConfigureAwait(false); + } + + public async Task> GetForUserAsync(string tenantId, Guid userId, bool unreadOnly = false, int limit = 50, int offset = 0, CancellationToken cancellationToken = default) + { + var sql = """ + SELECT id, tenant_id, user_id, title, body, event_type, event_payload, read, archived, action_url, correlation_id, created_at, read_at, archived_at + FROM notify.inbox WHERE tenant_id = @tenant_id AND user_id = @user_id AND archived = FALSE + """; + if (unreadOnly) sql += " AND read = FALSE"; + sql += " ORDER BY created_at DESC LIMIT @limit OFFSET @offset"; + + return await QueryAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "user_id", userId); + AddParameter(cmd, "limit", limit); + AddParameter(cmd, "offset", offset); + }, MapInbox, cancellationToken).ConfigureAwait(false); + } + + public async Task GetUnreadCountAsync(string tenantId, Guid userId, CancellationToken cancellationToken = default) + { + const string sql = "SELECT COUNT(*) FROM notify.inbox WHERE tenant_id = @tenant_id AND user_id = @user_id AND read = FALSE AND archived = FALSE"; + await using var connection = await DataSource.OpenConnectionAsync(tenantId, "reader", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "tenant_id", tenantId); + AddParameter(command, "user_id", userId); + var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false); + return Convert.ToInt32(result); + } + + public async Task CreateAsync(InboxEntity inbox, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.inbox (id, tenant_id, user_id, title, body, event_type, event_payload, action_url, correlation_id) + VALUES (@id, @tenant_id, @user_id, @title, @body, @event_type, @event_payload::jsonb, @action_url, @correlation_id) + RETURNING * + """; + var id = inbox.Id == Guid.Empty ? Guid.NewGuid() : inbox.Id; + await using var connection = await DataSource.OpenConnectionAsync(inbox.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", inbox.TenantId); + AddParameter(command, "user_id", inbox.UserId); + AddParameter(command, "title", inbox.Title); + AddParameter(command, "body", inbox.Body); + AddParameter(command, "event_type", inbox.EventType); + AddJsonbParameter(command, "event_payload", inbox.EventPayload); + AddParameter(command, "action_url", inbox.ActionUrl); + AddParameter(command, "correlation_id", inbox.CorrelationId); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapInbox(reader); + } + + public async Task MarkReadAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.inbox SET read = TRUE, read_at = NOW() WHERE tenant_id = @tenant_id AND id = @id AND read = FALSE"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task MarkAllReadAsync(string tenantId, Guid userId, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.inbox SET read = TRUE, read_at = NOW() WHERE tenant_id = @tenant_id AND user_id = @user_id AND read = FALSE"; + return await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "user_id", userId); }, + cancellationToken).ConfigureAwait(false); + } + + public async Task ArchiveAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.inbox SET archived = TRUE, archived_at = NOW() WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.inbox WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteOldAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.inbox WHERE archived = TRUE AND archived_at < @cutoff"; + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "cutoff", cutoff); + return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + private static InboxEntity MapInbox(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + UserId = reader.GetGuid(2), + Title = reader.GetString(3), + Body = GetNullableString(reader, 4), + EventType = reader.GetString(5), + EventPayload = reader.GetString(6), + Read = reader.GetBoolean(7), + Archived = reader.GetBoolean(8), + ActionUrl = GetNullableString(reader, 9), + CorrelationId = GetNullableString(reader, 10), + CreatedAt = reader.GetFieldValue(11), + ReadAt = GetNullableDateTimeOffset(reader, 12), + ArchivedAt = GetNullableDateTimeOffset(reader, 13) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IncidentRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IncidentRepository.cs new file mode 100644 index 000000000..3537f8b38 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/IncidentRepository.cs @@ -0,0 +1,167 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class IncidentRepository : RepositoryBase, IIncidentRepository +{ + public IncidentRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, title, description, severity, status, source, correlation_id, assigned_to, escalation_policy_id, + metadata, created_at, acknowledged_at, resolved_at, closed_at, created_by + FROM notify.incidents WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapIncident, cancellationToken).ConfigureAwait(false); + } + + public async Task GetByCorrelationIdAsync(string tenantId, string correlationId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, title, description, severity, status, source, correlation_id, assigned_to, escalation_policy_id, + metadata, created_at, acknowledged_at, resolved_at, closed_at, created_by + FROM notify.incidents WHERE tenant_id = @tenant_id AND correlation_id = @correlation_id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "correlation_id", correlationId); }, + MapIncident, cancellationToken).ConfigureAwait(false); + } + + public async Task> ListAsync(string tenantId, string? status = null, string? severity = null, int limit = 100, int offset = 0, CancellationToken cancellationToken = default) + { + var sql = """ + SELECT id, tenant_id, title, description, severity, status, source, correlation_id, assigned_to, escalation_policy_id, + metadata, created_at, acknowledged_at, resolved_at, closed_at, created_by + FROM notify.incidents WHERE tenant_id = @tenant_id + """; + if (status != null) sql += " AND status = @status"; + if (severity != null) sql += " AND severity = @severity"; + sql += " ORDER BY created_at DESC LIMIT @limit OFFSET @offset"; + + return await QueryAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + if (status != null) AddParameter(cmd, "status", status); + if (severity != null) AddParameter(cmd, "severity", severity); + AddParameter(cmd, "limit", limit); + AddParameter(cmd, "offset", offset); + }, MapIncident, cancellationToken).ConfigureAwait(false); + } + + public async Task CreateAsync(IncidentEntity incident, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.incidents (id, tenant_id, title, description, severity, status, source, correlation_id, assigned_to, escalation_policy_id, metadata, created_by) + VALUES (@id, @tenant_id, @title, @description, @severity, @status, @source, @correlation_id, @assigned_to, @escalation_policy_id, @metadata::jsonb, @created_by) + RETURNING * + """; + var id = incident.Id == Guid.Empty ? Guid.NewGuid() : incident.Id; + await using var connection = await DataSource.OpenConnectionAsync(incident.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", incident.TenantId); + AddParameter(command, "title", incident.Title); + AddParameter(command, "description", incident.Description); + AddParameter(command, "severity", incident.Severity); + AddParameter(command, "status", incident.Status); + AddParameter(command, "source", incident.Source); + AddParameter(command, "correlation_id", incident.CorrelationId); + AddParameter(command, "assigned_to", incident.AssignedTo); + AddParameter(command, "escalation_policy_id", incident.EscalationPolicyId); + AddJsonbParameter(command, "metadata", incident.Metadata); + AddParameter(command, "created_by", incident.CreatedBy); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapIncident(reader); + } + + public async Task UpdateAsync(IncidentEntity incident, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.incidents SET title = @title, description = @description, severity = @severity, status = @status, + source = @source, assigned_to = @assigned_to, escalation_policy_id = @escalation_policy_id, metadata = @metadata::jsonb + WHERE tenant_id = @tenant_id AND id = @id + """; + var rows = await ExecuteAsync(incident.TenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", incident.TenantId); + AddParameter(cmd, "id", incident.Id); + AddParameter(cmd, "title", incident.Title); + AddParameter(cmd, "description", incident.Description); + AddParameter(cmd, "severity", incident.Severity); + AddParameter(cmd, "status", incident.Status); + AddParameter(cmd, "source", incident.Source); + AddParameter(cmd, "assigned_to", incident.AssignedTo); + AddParameter(cmd, "escalation_policy_id", incident.EscalationPolicyId); + AddJsonbParameter(cmd, "metadata", incident.Metadata); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task AcknowledgeAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.incidents SET status = 'acknowledged', acknowledged_at = NOW() WHERE tenant_id = @tenant_id AND id = @id AND status = 'open'"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task ResolveAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.incidents SET status = 'resolved', resolved_at = NOW() WHERE tenant_id = @tenant_id AND id = @id AND status IN ('open', 'acknowledged')"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task CloseAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.incidents SET status = 'closed', closed_at = NOW() WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task AssignAsync(string tenantId, Guid id, Guid assignedTo, CancellationToken cancellationToken = default) + { + const string sql = "UPDATE notify.incidents SET assigned_to = @assigned_to WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", id); + AddParameter(cmd, "assigned_to", assignedTo); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + private static IncidentEntity MapIncident(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + Title = reader.GetString(2), + Description = GetNullableString(reader, 3), + Severity = reader.GetString(4), + Status = reader.GetString(5), + Source = GetNullableString(reader, 6), + CorrelationId = GetNullableString(reader, 7), + AssignedTo = GetNullableGuid(reader, 8), + EscalationPolicyId = GetNullableGuid(reader, 9), + Metadata = reader.GetString(10), + CreatedAt = reader.GetFieldValue(11), + AcknowledgedAt = GetNullableDateTimeOffset(reader, 12), + ResolvedAt = GetNullableDateTimeOffset(reader, 13), + ClosedAt = GetNullableDateTimeOffset(reader, 14), + CreatedBy = GetNullableString(reader, 15) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/MaintenanceWindowRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/MaintenanceWindowRepository.cs new file mode 100644 index 000000000..04e658a77 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/MaintenanceWindowRepository.cs @@ -0,0 +1,123 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class MaintenanceWindowRepository : RepositoryBase, IMaintenanceWindowRepository +{ + public MaintenanceWindowRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, start_at, end_at, suppress_channels, suppress_event_types, created_at, created_by + FROM notify.maintenance_windows WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapWindow, cancellationToken).ConfigureAwait(false); + } + + public async Task> ListAsync(string tenantId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, start_at, end_at, suppress_channels, suppress_event_types, created_at, created_by + FROM notify.maintenance_windows WHERE tenant_id = @tenant_id ORDER BY start_at DESC + """; + return await QueryAsync(tenantId, sql, + cmd => AddParameter(cmd, "tenant_id", tenantId), + MapWindow, cancellationToken).ConfigureAwait(false); + } + + public async Task> GetActiveAsync(string tenantId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, start_at, end_at, suppress_channels, suppress_event_types, created_at, created_by + FROM notify.maintenance_windows WHERE tenant_id = @tenant_id AND start_at <= NOW() AND end_at > NOW() + """; + return await QueryAsync(tenantId, sql, + cmd => AddParameter(cmd, "tenant_id", tenantId), + MapWindow, cancellationToken).ConfigureAwait(false); + } + + public async Task CreateAsync(MaintenanceWindowEntity window, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.maintenance_windows (id, tenant_id, name, description, start_at, end_at, suppress_channels, suppress_event_types, created_by) + VALUES (@id, @tenant_id, @name, @description, @start_at, @end_at, @suppress_channels, @suppress_event_types, @created_by) + RETURNING * + """; + var id = window.Id == Guid.Empty ? Guid.NewGuid() : window.Id; + await using var connection = await DataSource.OpenConnectionAsync(window.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", window.TenantId); + AddParameter(command, "name", window.Name); + AddParameter(command, "description", window.Description); + AddParameter(command, "start_at", window.StartAt); + AddParameter(command, "end_at", window.EndAt); + AddParameter(command, "suppress_channels", window.SuppressChannels); + AddTextArrayParameter(command, "suppress_event_types", window.SuppressEventTypes ?? []); + AddParameter(command, "created_by", window.CreatedBy); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapWindow(reader); + } + + public async Task UpdateAsync(MaintenanceWindowEntity window, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.maintenance_windows SET name = @name, description = @description, start_at = @start_at, end_at = @end_at, + suppress_channels = @suppress_channels, suppress_event_types = @suppress_event_types + WHERE tenant_id = @tenant_id AND id = @id + """; + var rows = await ExecuteAsync(window.TenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", window.TenantId); + AddParameter(cmd, "id", window.Id); + AddParameter(cmd, "name", window.Name); + AddParameter(cmd, "description", window.Description); + AddParameter(cmd, "start_at", window.StartAt); + AddParameter(cmd, "end_at", window.EndAt); + AddParameter(cmd, "suppress_channels", window.SuppressChannels); + AddTextArrayParameter(cmd, "suppress_event_types", window.SuppressEventTypes ?? []); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.maintenance_windows WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteExpiredAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.maintenance_windows WHERE end_at < @cutoff"; + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "cutoff", cutoff); + return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + private static MaintenanceWindowEntity MapWindow(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + Name = reader.GetString(2), + Description = GetNullableString(reader, 3), + StartAt = reader.GetFieldValue(4), + EndAt = reader.GetFieldValue(5), + SuppressChannels = reader.IsDBNull(6) ? null : reader.GetFieldValue(6), + SuppressEventTypes = reader.IsDBNull(7) ? null : reader.GetFieldValue(7), + CreatedAt = reader.GetFieldValue(8), + CreatedBy = GetNullableString(reader, 9) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/NotifyAuditRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/NotifyAuditRepository.cs new file mode 100644 index 000000000..1f0854127 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/NotifyAuditRepository.cs @@ -0,0 +1,100 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class NotifyAuditRepository : RepositoryBase, INotifyAuditRepository +{ + public NotifyAuditRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task CreateAsync(NotifyAuditEntity audit, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.audit (tenant_id, user_id, action, resource_type, resource_id, details, correlation_id) + VALUES (@tenant_id, @user_id, @action, @resource_type, @resource_id, @details::jsonb, @correlation_id) + RETURNING id + """; + await using var connection = await DataSource.OpenConnectionAsync(audit.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "tenant_id", audit.TenantId); + AddParameter(command, "user_id", audit.UserId); + AddParameter(command, "action", audit.Action); + AddParameter(command, "resource_type", audit.ResourceType); + AddParameter(command, "resource_id", audit.ResourceId); + AddJsonbParameter(command, "details", audit.Details); + AddParameter(command, "correlation_id", audit.CorrelationId); + + var result = await command.ExecuteScalarAsync(cancellationToken).ConfigureAwait(false); + return (long)result!; + } + + public async Task> ListAsync(string tenantId, int limit = 100, int offset = 0, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, user_id, action, resource_type, resource_id, details, correlation_id, created_at + FROM notify.audit WHERE tenant_id = @tenant_id + ORDER BY created_at DESC LIMIT @limit OFFSET @offset + """; + return await QueryAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "limit", limit); + AddParameter(cmd, "offset", offset); + }, MapAudit, cancellationToken).ConfigureAwait(false); + } + + public async Task> GetByResourceAsync(string tenantId, string resourceType, string? resourceId = null, int limit = 100, CancellationToken cancellationToken = default) + { + var sql = """ + SELECT id, tenant_id, user_id, action, resource_type, resource_id, details, correlation_id, created_at + FROM notify.audit WHERE tenant_id = @tenant_id AND resource_type = @resource_type + """; + if (resourceId != null) sql += " AND resource_id = @resource_id"; + sql += " ORDER BY created_at DESC LIMIT @limit"; + + return await QueryAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "resource_type", resourceType); + if (resourceId != null) AddParameter(cmd, "resource_id", resourceId); + AddParameter(cmd, "limit", limit); + }, MapAudit, cancellationToken).ConfigureAwait(false); + } + + public async Task> GetByCorrelationIdAsync(string tenantId, string correlationId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, user_id, action, resource_type, resource_id, details, correlation_id, created_at + FROM notify.audit WHERE tenant_id = @tenant_id AND correlation_id = @correlation_id + ORDER BY created_at + """; + return await QueryAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "correlation_id", correlationId); }, + MapAudit, cancellationToken).ConfigureAwait(false); + } + + public async Task DeleteOldAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.audit WHERE created_at < @cutoff"; + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "cutoff", cutoff); + return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + private static NotifyAuditEntity MapAudit(NpgsqlDataReader reader) => new() + { + Id = reader.GetInt64(0), + TenantId = reader.GetString(1), + UserId = GetNullableGuid(reader, 2), + Action = reader.GetString(3), + ResourceType = reader.GetString(4), + ResourceId = GetNullableString(reader, 5), + Details = GetNullableString(reader, 6), + CorrelationId = GetNullableString(reader, 7), + CreatedAt = reader.GetFieldValue(8) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/OnCallScheduleRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/OnCallScheduleRepository.cs new file mode 100644 index 000000000..720299a34 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/OnCallScheduleRepository.cs @@ -0,0 +1,116 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class OnCallScheduleRepository : RepositoryBase, IOnCallScheduleRepository +{ + public OnCallScheduleRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, timezone, rotation_type, participants, overrides, metadata, created_at, updated_at + FROM notify.on_call_schedules WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapSchedule, cancellationToken).ConfigureAwait(false); + } + + public async Task GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, timezone, rotation_type, participants, overrides, metadata, created_at, updated_at + FROM notify.on_call_schedules WHERE tenant_id = @tenant_id AND name = @name + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "name", name); }, + MapSchedule, cancellationToken).ConfigureAwait(false); + } + + public async Task> ListAsync(string tenantId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, timezone, rotation_type, participants, overrides, metadata, created_at, updated_at + FROM notify.on_call_schedules WHERE tenant_id = @tenant_id ORDER BY name + """; + return await QueryAsync(tenantId, sql, + cmd => AddParameter(cmd, "tenant_id", tenantId), + MapSchedule, cancellationToken).ConfigureAwait(false); + } + + public async Task CreateAsync(OnCallScheduleEntity schedule, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.on_call_schedules (id, tenant_id, name, description, timezone, rotation_type, participants, overrides, metadata) + VALUES (@id, @tenant_id, @name, @description, @timezone, @rotation_type, @participants::jsonb, @overrides::jsonb, @metadata::jsonb) + RETURNING * + """; + var id = schedule.Id == Guid.Empty ? Guid.NewGuid() : schedule.Id; + await using var connection = await DataSource.OpenConnectionAsync(schedule.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", schedule.TenantId); + AddParameter(command, "name", schedule.Name); + AddParameter(command, "description", schedule.Description); + AddParameter(command, "timezone", schedule.Timezone); + AddParameter(command, "rotation_type", schedule.RotationType); + AddJsonbParameter(command, "participants", schedule.Participants); + AddJsonbParameter(command, "overrides", schedule.Overrides); + AddJsonbParameter(command, "metadata", schedule.Metadata); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapSchedule(reader); + } + + public async Task UpdateAsync(OnCallScheduleEntity schedule, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.on_call_schedules SET name = @name, description = @description, timezone = @timezone, + rotation_type = @rotation_type, participants = @participants::jsonb, overrides = @overrides::jsonb, metadata = @metadata::jsonb + WHERE tenant_id = @tenant_id AND id = @id + """; + var rows = await ExecuteAsync(schedule.TenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", schedule.TenantId); + AddParameter(cmd, "id", schedule.Id); + AddParameter(cmd, "name", schedule.Name); + AddParameter(cmd, "description", schedule.Description); + AddParameter(cmd, "timezone", schedule.Timezone); + AddParameter(cmd, "rotation_type", schedule.RotationType); + AddJsonbParameter(cmd, "participants", schedule.Participants); + AddJsonbParameter(cmd, "overrides", schedule.Overrides); + AddJsonbParameter(cmd, "metadata", schedule.Metadata); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.on_call_schedules WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + private static OnCallScheduleEntity MapSchedule(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + Name = reader.GetString(2), + Description = GetNullableString(reader, 3), + Timezone = reader.GetString(4), + RotationType = reader.GetString(5), + Participants = reader.GetString(6), + Overrides = reader.GetString(7), + Metadata = reader.GetString(8), + CreatedAt = reader.GetFieldValue(9), + UpdatedAt = reader.GetFieldValue(10) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/QuietHoursRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/QuietHoursRepository.cs new file mode 100644 index 000000000..0947a78f2 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/QuietHoursRepository.cs @@ -0,0 +1,116 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class QuietHoursRepository : RepositoryBase, IQuietHoursRepository +{ + public QuietHoursRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, user_id, channel_id, start_time, end_time, timezone, days_of_week, enabled, created_at, updated_at + FROM notify.quiet_hours WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapQuietHours, cancellationToken).ConfigureAwait(false); + } + + public async Task> ListAsync(string tenantId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, user_id, channel_id, start_time, end_time, timezone, days_of_week, enabled, created_at, updated_at + FROM notify.quiet_hours WHERE tenant_id = @tenant_id ORDER BY start_time + """; + return await QueryAsync(tenantId, sql, + cmd => AddParameter(cmd, "tenant_id", tenantId), + MapQuietHours, cancellationToken).ConfigureAwait(false); + } + + public async Task> GetForUserAsync(string tenantId, Guid userId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, user_id, channel_id, start_time, end_time, timezone, days_of_week, enabled, created_at, updated_at + FROM notify.quiet_hours WHERE tenant_id = @tenant_id AND (user_id IS NULL OR user_id = @user_id) AND enabled = TRUE + """; + return await QueryAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "user_id", userId); }, + MapQuietHours, cancellationToken).ConfigureAwait(false); + } + + public async Task CreateAsync(QuietHoursEntity quietHours, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.quiet_hours (id, tenant_id, user_id, channel_id, start_time, end_time, timezone, days_of_week, enabled) + VALUES (@id, @tenant_id, @user_id, @channel_id, @start_time, @end_time, @timezone, @days_of_week, @enabled) + RETURNING * + """; + var id = quietHours.Id == Guid.Empty ? Guid.NewGuid() : quietHours.Id; + await using var connection = await DataSource.OpenConnectionAsync(quietHours.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", quietHours.TenantId); + AddParameter(command, "user_id", quietHours.UserId); + AddParameter(command, "channel_id", quietHours.ChannelId); + AddParameter(command, "start_time", quietHours.StartTime); + AddParameter(command, "end_time", quietHours.EndTime); + AddParameter(command, "timezone", quietHours.Timezone); + AddParameter(command, "days_of_week", quietHours.DaysOfWeek); + AddParameter(command, "enabled", quietHours.Enabled); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapQuietHours(reader); + } + + public async Task UpdateAsync(QuietHoursEntity quietHours, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.quiet_hours SET user_id = @user_id, channel_id = @channel_id, start_time = @start_time, end_time = @end_time, + timezone = @timezone, days_of_week = @days_of_week, enabled = @enabled + WHERE tenant_id = @tenant_id AND id = @id + """; + var rows = await ExecuteAsync(quietHours.TenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", quietHours.TenantId); + AddParameter(cmd, "id", quietHours.Id); + AddParameter(cmd, "user_id", quietHours.UserId); + AddParameter(cmd, "channel_id", quietHours.ChannelId); + AddParameter(cmd, "start_time", quietHours.StartTime); + AddParameter(cmd, "end_time", quietHours.EndTime); + AddParameter(cmd, "timezone", quietHours.Timezone); + AddParameter(cmd, "days_of_week", quietHours.DaysOfWeek); + AddParameter(cmd, "enabled", quietHours.Enabled); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.quiet_hours WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + private static QuietHoursEntity MapQuietHours(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + UserId = GetNullableGuid(reader, 2), + ChannelId = GetNullableGuid(reader, 3), + StartTime = reader.GetFieldValue(4), + EndTime = reader.GetFieldValue(5), + Timezone = reader.GetString(6), + DaysOfWeek = reader.IsDBNull(7) ? [0, 1, 2, 3, 4, 5, 6] : reader.GetFieldValue(7), + Enabled = reader.GetBoolean(8), + CreatedAt = reader.GetFieldValue(9), + UpdatedAt = reader.GetFieldValue(10) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/RuleRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/RuleRepository.cs new file mode 100644 index 000000000..853711e80 --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/RuleRepository.cs @@ -0,0 +1,139 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class RuleRepository : RepositoryBase, IRuleRepository +{ + public RuleRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, enabled, priority, event_types, filter, channel_ids, template_id, metadata, created_at, updated_at + FROM notify.rules WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapRule, cancellationToken).ConfigureAwait(false); + } + + public async Task GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, enabled, priority, event_types, filter, channel_ids, template_id, metadata, created_at, updated_at + FROM notify.rules WHERE tenant_id = @tenant_id AND name = @name + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "name", name); }, + MapRule, cancellationToken).ConfigureAwait(false); + } + + public async Task> ListAsync(string tenantId, bool? enabled = null, CancellationToken cancellationToken = default) + { + var sql = """ + SELECT id, tenant_id, name, description, enabled, priority, event_types, filter, channel_ids, template_id, metadata, created_at, updated_at + FROM notify.rules WHERE tenant_id = @tenant_id + """; + if (enabled.HasValue) sql += " AND enabled = @enabled"; + sql += " ORDER BY priority DESC, name"; + + return await QueryAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + if (enabled.HasValue) AddParameter(cmd, "enabled", enabled.Value); + }, MapRule, cancellationToken).ConfigureAwait(false); + } + + public async Task> GetMatchingRulesAsync(string tenantId, string eventType, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, enabled, priority, event_types, filter, channel_ids, template_id, metadata, created_at, updated_at + FROM notify.rules WHERE tenant_id = @tenant_id AND enabled = TRUE AND @event_type = ANY(event_types) + ORDER BY priority DESC + """; + return await QueryAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "event_type", eventType); }, + MapRule, cancellationToken).ConfigureAwait(false); + } + + public async Task CreateAsync(RuleEntity rule, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.rules (id, tenant_id, name, description, enabled, priority, event_types, filter, channel_ids, template_id, metadata) + VALUES (@id, @tenant_id, @name, @description, @enabled, @priority, @event_types, @filter::jsonb, @channel_ids, @template_id, @metadata::jsonb) + RETURNING * + """; + var id = rule.Id == Guid.Empty ? Guid.NewGuid() : rule.Id; + await using var connection = await DataSource.OpenConnectionAsync(rule.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", rule.TenantId); + AddParameter(command, "name", rule.Name); + AddParameter(command, "description", rule.Description); + AddParameter(command, "enabled", rule.Enabled); + AddParameter(command, "priority", rule.Priority); + AddTextArrayParameter(command, "event_types", rule.EventTypes); + AddJsonbParameter(command, "filter", rule.Filter); + AddParameter(command, "channel_ids", rule.ChannelIds); + AddParameter(command, "template_id", rule.TemplateId); + AddJsonbParameter(command, "metadata", rule.Metadata); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapRule(reader); + } + + public async Task UpdateAsync(RuleEntity rule, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.rules SET name = @name, description = @description, enabled = @enabled, priority = @priority, + event_types = @event_types, filter = @filter::jsonb, channel_ids = @channel_ids, template_id = @template_id, metadata = @metadata::jsonb + WHERE tenant_id = @tenant_id AND id = @id + """; + var rows = await ExecuteAsync(rule.TenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", rule.TenantId); + AddParameter(cmd, "id", rule.Id); + AddParameter(cmd, "name", rule.Name); + AddParameter(cmd, "description", rule.Description); + AddParameter(cmd, "enabled", rule.Enabled); + AddParameter(cmd, "priority", rule.Priority); + AddTextArrayParameter(cmd, "event_types", rule.EventTypes); + AddJsonbParameter(cmd, "filter", rule.Filter); + AddParameter(cmd, "channel_ids", rule.ChannelIds); + AddParameter(cmd, "template_id", rule.TemplateId); + AddJsonbParameter(cmd, "metadata", rule.Metadata); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.rules WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + private static RuleEntity MapRule(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + Name = reader.GetString(2), + Description = GetNullableString(reader, 3), + Enabled = reader.GetBoolean(4), + Priority = reader.GetInt32(5), + EventTypes = reader.IsDBNull(6) ? [] : reader.GetFieldValue(6), + Filter = reader.GetString(7), + ChannelIds = reader.IsDBNull(8) ? [] : reader.GetFieldValue(8), + TemplateId = GetNullableGuid(reader, 9), + Metadata = reader.GetString(10), + CreatedAt = reader.GetFieldValue(11), + UpdatedAt = reader.GetFieldValue(12) + }; +} diff --git a/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/TemplateRepository.cs b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/TemplateRepository.cs new file mode 100644 index 000000000..04f64729e --- /dev/null +++ b/src/Notify/__Libraries/StellaOps.Notify.Storage.Postgres/Repositories/TemplateRepository.cs @@ -0,0 +1,136 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Notify.Storage.Postgres.Models; + +namespace StellaOps.Notify.Storage.Postgres.Repositories; + +public sealed class TemplateRepository : RepositoryBase, ITemplateRepository +{ + public TemplateRepository(NotifyDataSource dataSource, ILogger logger) + : base(dataSource, logger) { } + + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, channel_type::text, subject_template, body_template, locale, metadata, created_at, updated_at + FROM notify.templates WHERE tenant_id = @tenant_id AND id = @id + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + MapTemplate, cancellationToken).ConfigureAwait(false); + } + + public async Task GetByNameAsync(string tenantId, string name, ChannelType channelType, string locale = "en", CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, channel_type::text, subject_template, body_template, locale, metadata, created_at, updated_at + FROM notify.templates WHERE tenant_id = @tenant_id AND name = @name AND channel_type = @channel_type::notify.channel_type AND locale = @locale + """; + return await QuerySingleOrDefaultAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "name", name); + AddParameter(cmd, "channel_type", ChannelTypeToString(channelType)); + AddParameter(cmd, "locale", locale); + }, MapTemplate, cancellationToken).ConfigureAwait(false); + } + + public async Task> ListAsync(string tenantId, ChannelType? channelType = null, CancellationToken cancellationToken = default) + { + var sql = """ + SELECT id, tenant_id, name, channel_type::text, subject_template, body_template, locale, metadata, created_at, updated_at + FROM notify.templates WHERE tenant_id = @tenant_id + """; + if (channelType.HasValue) sql += " AND channel_type = @channel_type::notify.channel_type"; + sql += " ORDER BY name, locale"; + + return await QueryAsync(tenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + if (channelType.HasValue) AddParameter(cmd, "channel_type", ChannelTypeToString(channelType.Value)); + }, MapTemplate, cancellationToken).ConfigureAwait(false); + } + + public async Task CreateAsync(TemplateEntity template, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO notify.templates (id, tenant_id, name, channel_type, subject_template, body_template, locale, metadata) + VALUES (@id, @tenant_id, @name, @channel_type::notify.channel_type, @subject_template, @body_template, @locale, @metadata::jsonb) + RETURNING id, tenant_id, name, channel_type::text, subject_template, body_template, locale, metadata, created_at, updated_at + """; + var id = template.Id == Guid.Empty ? Guid.NewGuid() : template.Id; + await using var connection = await DataSource.OpenConnectionAsync(template.TenantId, "writer", cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", template.TenantId); + AddParameter(command, "name", template.Name); + AddParameter(command, "channel_type", ChannelTypeToString(template.ChannelType)); + AddParameter(command, "subject_template", template.SubjectTemplate); + AddParameter(command, "body_template", template.BodyTemplate); + AddParameter(command, "locale", template.Locale); + AddJsonbParameter(command, "metadata", template.Metadata); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapTemplate(reader); + } + + public async Task UpdateAsync(TemplateEntity template, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE notify.templates SET name = @name, channel_type = @channel_type::notify.channel_type, + subject_template = @subject_template, body_template = @body_template, locale = @locale, metadata = @metadata::jsonb + WHERE tenant_id = @tenant_id AND id = @id + """; + var rows = await ExecuteAsync(template.TenantId, sql, cmd => + { + AddParameter(cmd, "tenant_id", template.TenantId); + AddParameter(cmd, "id", template.Id); + AddParameter(cmd, "name", template.Name); + AddParameter(cmd, "channel_type", ChannelTypeToString(template.ChannelType)); + AddParameter(cmd, "subject_template", template.SubjectTemplate); + AddParameter(cmd, "body_template", template.BodyTemplate); + AddParameter(cmd, "locale", template.Locale); + AddJsonbParameter(cmd, "metadata", template.Metadata); + }, cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + public async Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM notify.templates WHERE tenant_id = @tenant_id AND id = @id"; + var rows = await ExecuteAsync(tenantId, sql, + cmd => { AddParameter(cmd, "tenant_id", tenantId); AddParameter(cmd, "id", id); }, + cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + private static TemplateEntity MapTemplate(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(0), + TenantId = reader.GetString(1), + Name = reader.GetString(2), + ChannelType = ParseChannelType(reader.GetString(3)), + SubjectTemplate = GetNullableString(reader, 4), + BodyTemplate = reader.GetString(5), + Locale = reader.GetString(6), + Metadata = reader.GetString(7), + CreatedAt = reader.GetFieldValue(8), + UpdatedAt = reader.GetFieldValue(9) + }; + + private static string ChannelTypeToString(ChannelType t) => t switch + { + ChannelType.Email => "email", ChannelType.Slack => "slack", ChannelType.Teams => "teams", + ChannelType.Webhook => "webhook", ChannelType.PagerDuty => "pagerduty", ChannelType.OpsGenie => "opsgenie", + _ => throw new ArgumentException($"Unknown: {t}") + }; + + private static ChannelType ParseChannelType(string s) => s switch + { + "email" => ChannelType.Email, "slack" => ChannelType.Slack, "teams" => ChannelType.Teams, + "webhook" => ChannelType.Webhook, "pagerduty" => ChannelType.PagerDuty, "opsgenie" => ChannelType.OpsGenie, + _ => throw new ArgumentException($"Unknown: {s}") + }; +} diff --git a/src/Policy/StellaOps.Policy.Scoring/Receipts/ReceiptBuilder.cs b/src/Policy/StellaOps.Policy.Scoring/Receipts/ReceiptBuilder.cs index a9f0d0e6f..d708c842a 100644 --- a/src/Policy/StellaOps.Policy.Scoring/Receipts/ReceiptBuilder.cs +++ b/src/Policy/StellaOps.Policy.Scoring/Receipts/ReceiptBuilder.cs @@ -34,7 +34,7 @@ public interface IReceiptBuilder /// public sealed class ReceiptBuilder : IReceiptBuilder { - internal static readonly JsonSerializerOptions SerializerOptions = new() + public static readonly JsonSerializerOptions SerializerOptions = new() { PropertyNamingPolicy = null, DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, diff --git a/src/Policy/__Libraries/StellaOps.Policy.Storage.Postgres/Repositories/PostgresReceiptRepository.cs b/src/Policy/__Libraries/StellaOps.Policy.Storage.Postgres/Repositories/PostgresReceiptRepository.cs index b5900ab06..b781e4a6b 100644 --- a/src/Policy/__Libraries/StellaOps.Policy.Storage.Postgres/Repositories/PostgresReceiptRepository.cs +++ b/src/Policy/__Libraries/StellaOps.Policy.Storage.Postgres/Repositories/PostgresReceiptRepository.cs @@ -1,5 +1,6 @@ using System.Text.Json; using System; +using System.Collections.Immutable; using Microsoft.Extensions.Logging; using Npgsql; using NpgsqlTypes; @@ -164,7 +165,8 @@ returning {Columns};"; Hash = reader.GetString(idx.PolicyHash), ActivatedAt = null }, - BaseMetrics = Deserialize(reader, idx.BaseMetrics), + BaseMetrics = Deserialize(reader, idx.BaseMetrics) + ?? throw new InvalidOperationException("cvss_receipts.base_metrics missing"), ThreatMetrics = Deserialize(reader, idx.ThreatMetrics), EnvironmentalMetrics = Deserialize(reader, idx.EnvironmentalMetrics), SupplementalMetrics = Deserialize(reader, idx.SupplementalMetrics), diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/JobHistoryEntity.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/JobHistoryEntity.cs new file mode 100644 index 000000000..fa376ab70 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/JobHistoryEntity.cs @@ -0,0 +1,82 @@ +namespace StellaOps.Scheduler.Storage.Postgres.Models; + +/// +/// Represents a job history entity in the scheduler schema. +/// +public sealed class JobHistoryEntity +{ + /// + /// Unique history entry identifier. + /// + public long Id { get; init; } + + /// + /// Original job ID. + /// + public required Guid JobId { get; init; } + + /// + /// Tenant this job belonged to. + /// + public required string TenantId { get; init; } + + /// + /// Optional project identifier. + /// + public string? ProjectId { get; init; } + + /// + /// Type of job that was executed. + /// + public required string JobType { get; init; } + + /// + /// Final job status. + /// + public JobStatus Status { get; init; } + + /// + /// Attempt number when archived. + /// + public int Attempt { get; init; } + + /// + /// SHA256 digest of payload. + /// + public required string PayloadDigest { get; init; } + + /// + /// Job result as JSON. + /// + public string? Result { get; init; } + + /// + /// Reason for failure/cancellation. + /// + public string? Reason { get; init; } + + /// + /// Worker that executed the job. + /// + public string? WorkerId { get; init; } + + /// + /// Duration in milliseconds. + /// + public long? DurationMs { get; init; } + + /// + /// When the job was created. + /// + public DateTimeOffset CreatedAt { get; init; } + + /// + /// When the job completed. + /// + public DateTimeOffset CompletedAt { get; init; } + + /// + /// When the job was archived to history. + /// + public DateTimeOffset ArchivedAt { get; init; } +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/LockEntity.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/LockEntity.cs new file mode 100644 index 000000000..a8bb6fd63 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/LockEntity.cs @@ -0,0 +1,37 @@ +namespace StellaOps.Scheduler.Storage.Postgres.Models; + +/// +/// Represents a distributed lock entity in the scheduler schema. +/// +public sealed class LockEntity +{ + /// + /// Lock key (primary key). + /// + public required string LockKey { get; init; } + + /// + /// Tenant this lock belongs to. + /// + public required string TenantId { get; init; } + + /// + /// ID of the holder that acquired the lock. + /// + public required string HolderId { get; init; } + + /// + /// When the lock was acquired. + /// + public DateTimeOffset AcquiredAt { get; init; } + + /// + /// When the lock expires. + /// + public DateTimeOffset ExpiresAt { get; init; } + + /// + /// Lock metadata as JSON. + /// + public string Metadata { get; init; } = "{}"; +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/MetricsEntity.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/MetricsEntity.cs new file mode 100644 index 000000000..4f84b31f5 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/MetricsEntity.cs @@ -0,0 +1,77 @@ +namespace StellaOps.Scheduler.Storage.Postgres.Models; + +/// +/// Represents a metrics entity in the scheduler schema. +/// +public sealed class MetricsEntity +{ + /// + /// Unique metrics entry identifier. + /// + public long Id { get; init; } + + /// + /// Tenant this metric belongs to. + /// + public required string TenantId { get; init; } + + /// + /// Job type for this metric. + /// + public required string JobType { get; init; } + + /// + /// Period start time. + /// + public DateTimeOffset PeriodStart { get; init; } + + /// + /// Period end time. + /// + public DateTimeOffset PeriodEnd { get; init; } + + /// + /// Number of jobs created in this period. + /// + public long JobsCreated { get; init; } + + /// + /// Number of jobs completed in this period. + /// + public long JobsCompleted { get; init; } + + /// + /// Number of jobs failed in this period. + /// + public long JobsFailed { get; init; } + + /// + /// Number of jobs timed out in this period. + /// + public long JobsTimedOut { get; init; } + + /// + /// Average duration in milliseconds. + /// + public long? AvgDurationMs { get; init; } + + /// + /// 50th percentile duration. + /// + public long? P50DurationMs { get; init; } + + /// + /// 95th percentile duration. + /// + public long? P95DurationMs { get; init; } + + /// + /// 99th percentile duration. + /// + public long? P99DurationMs { get; init; } + + /// + /// When this metric was created. + /// + public DateTimeOffset CreatedAt { get; init; } +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/WorkerEntity.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/WorkerEntity.cs new file mode 100644 index 000000000..0a0e8ce9a --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Models/WorkerEntity.cs @@ -0,0 +1,72 @@ +namespace StellaOps.Scheduler.Storage.Postgres.Models; + +/// +/// Worker status values. +/// +public static class WorkerStatus +{ + public const string Active = "active"; + public const string Draining = "draining"; + public const string Stopped = "stopped"; +} + +/// +/// Represents a worker entity in the scheduler schema. +/// +public sealed class WorkerEntity +{ + /// + /// Unique worker identifier. + /// + public required string Id { get; init; } + + /// + /// Optional tenant this worker is dedicated to. + /// + public string? TenantId { get; init; } + + /// + /// Hostname of the worker. + /// + public required string Hostname { get; init; } + + /// + /// Process ID of the worker. + /// + public int? ProcessId { get; init; } + + /// + /// Job types this worker can process. + /// + public string[] JobTypes { get; init; } = []; + + /// + /// Maximum concurrent jobs this worker can handle. + /// + public int MaxConcurrentJobs { get; init; } = 1; + + /// + /// Current number of jobs being processed. + /// + public int CurrentJobs { get; init; } + + /// + /// Worker status. + /// + public string Status { get; init; } = WorkerStatus.Active; + + /// + /// Last heartbeat timestamp. + /// + public DateTimeOffset LastHeartbeatAt { get; init; } + + /// + /// When the worker was registered. + /// + public DateTimeOffset RegisteredAt { get; init; } + + /// + /// Worker metadata as JSON. + /// + public string Metadata { get; init; } = "{}"; +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/DistributedLockRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/DistributedLockRepository.cs new file mode 100644 index 000000000..e49a1ef57 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/DistributedLockRepository.cs @@ -0,0 +1,145 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// PostgreSQL repository for distributed lock operations. +/// +public sealed class DistributedLockRepository : RepositoryBase, IDistributedLockRepository +{ + /// + /// Creates a new distributed lock repository. + /// + public DistributedLockRepository(SchedulerDataSource dataSource, ILogger logger) + : base(dataSource, logger) + { + } + + /// + public async Task TryAcquireAsync(string tenantId, string lockKey, string holderId, TimeSpan duration, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO scheduler.locks (lock_key, tenant_id, holder_id, expires_at) + VALUES (@lock_key, @tenant_id, @holder_id, NOW() + @duration) + ON CONFLICT (lock_key) DO UPDATE SET + holder_id = EXCLUDED.holder_id, + tenant_id = EXCLUDED.tenant_id, + acquired_at = NOW(), + expires_at = NOW() + EXCLUDED.expires_at - EXCLUDED.acquired_at + WHERE scheduler.locks.expires_at < NOW() + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + AddParameter(command, "lock_key", lockKey); + AddParameter(command, "tenant_id", tenantId); + AddParameter(command, "holder_id", holderId); + AddParameter(command, "duration", duration); + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + /// + public async Task GetAsync(string lockKey, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT lock_key, tenant_id, holder_id, acquired_at, expires_at, metadata + FROM scheduler.locks + WHERE lock_key = @lock_key AND expires_at > NOW() + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "lock_key", lockKey); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapLock(reader) : null; + } + + /// + public async Task ExtendAsync(string lockKey, string holderId, TimeSpan extension, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE scheduler.locks + SET expires_at = expires_at + @extension + WHERE lock_key = @lock_key AND holder_id = @holder_id AND expires_at > NOW() + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + AddParameter(command, "lock_key", lockKey); + AddParameter(command, "holder_id", holderId); + AddParameter(command, "extension", extension); + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + /// + public async Task ReleaseAsync(string lockKey, string holderId, CancellationToken cancellationToken = default) + { + const string sql = """ + DELETE FROM scheduler.locks + WHERE lock_key = @lock_key AND holder_id = @holder_id + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + AddParameter(command, "lock_key", lockKey); + AddParameter(command, "holder_id", holderId); + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + /// + public async Task CleanupExpiredAsync(CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM scheduler.locks WHERE expires_at < NOW()"; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + /// + public async Task> ListByTenantAsync(string tenantId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT lock_key, tenant_id, holder_id, acquired_at, expires_at, metadata + FROM scheduler.locks + WHERE tenant_id = @tenant_id AND expires_at > NOW() + ORDER BY acquired_at DESC + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "tenant_id", tenantId); + + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(MapLock(reader)); + } + return results; + } + + private static LockEntity MapLock(NpgsqlDataReader reader) => new() + { + LockKey = reader.GetString(reader.GetOrdinal("lock_key")), + TenantId = reader.GetString(reader.GetOrdinal("tenant_id")), + HolderId = reader.GetString(reader.GetOrdinal("holder_id")), + AcquiredAt = reader.GetFieldValue(reader.GetOrdinal("acquired_at")), + ExpiresAt = reader.GetFieldValue(reader.GetOrdinal("expires_at")), + Metadata = reader.GetString(reader.GetOrdinal("metadata")) + }; +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IDistributedLockRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IDistributedLockRepository.cs new file mode 100644 index 000000000..f58a10cd5 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IDistributedLockRepository.cs @@ -0,0 +1,39 @@ +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// Repository interface for distributed lock operations. +/// +public interface IDistributedLockRepository +{ + /// + /// Tries to acquire a lock. + /// + Task TryAcquireAsync(string tenantId, string lockKey, string holderId, TimeSpan duration, CancellationToken cancellationToken = default); + + /// + /// Gets a lock by key. + /// + Task GetAsync(string lockKey, CancellationToken cancellationToken = default); + + /// + /// Extends a lock. + /// + Task ExtendAsync(string lockKey, string holderId, TimeSpan extension, CancellationToken cancellationToken = default); + + /// + /// Releases a lock. + /// + Task ReleaseAsync(string lockKey, string holderId, CancellationToken cancellationToken = default); + + /// + /// Cleans up expired locks. + /// + Task CleanupExpiredAsync(CancellationToken cancellationToken = default); + + /// + /// Lists all locks for a tenant. + /// + Task> ListByTenantAsync(string tenantId, CancellationToken cancellationToken = default); +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IJobHistoryRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IJobHistoryRepository.cs new file mode 100644 index 000000000..51f92d0ef --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IJobHistoryRepository.cs @@ -0,0 +1,61 @@ +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// Repository interface for job history operations. +/// +public interface IJobHistoryRepository +{ + /// + /// Archives a completed job. + /// + Task ArchiveAsync(JobHistoryEntity history, CancellationToken cancellationToken = default); + + /// + /// Gets history for a specific job. + /// + Task> GetByJobIdAsync(Guid jobId, CancellationToken cancellationToken = default); + + /// + /// Lists job history for a tenant. + /// + Task> ListAsync( + string tenantId, + int limit = 100, + int offset = 0, + CancellationToken cancellationToken = default); + + /// + /// Lists job history by type. + /// + Task> ListByJobTypeAsync( + string tenantId, + string jobType, + int limit = 100, + CancellationToken cancellationToken = default); + + /// + /// Lists job history by status. + /// + Task> ListByStatusAsync( + string tenantId, + JobStatus status, + int limit = 100, + CancellationToken cancellationToken = default); + + /// + /// Lists job history in a time range. + /// + Task> ListByTimeRangeAsync( + string tenantId, + DateTimeOffset from, + DateTimeOffset to, + int limit = 1000, + CancellationToken cancellationToken = default); + + /// + /// Deletes old history entries. + /// + Task DeleteOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default); +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IMetricsRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IMetricsRepository.cs new file mode 100644 index 000000000..44a269776 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IMetricsRepository.cs @@ -0,0 +1,45 @@ +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// Repository interface for metrics operations. +/// +public interface IMetricsRepository +{ + /// + /// Records or updates metrics for a period. + /// + Task UpsertAsync(MetricsEntity metrics, CancellationToken cancellationToken = default); + + /// + /// Gets metrics for a tenant and job type. + /// + Task> GetAsync( + string tenantId, + string jobType, + DateTimeOffset from, + DateTimeOffset to, + CancellationToken cancellationToken = default); + + /// + /// Gets aggregated metrics for a tenant. + /// + Task> GetByTenantAsync( + string tenantId, + DateTimeOffset from, + DateTimeOffset to, + CancellationToken cancellationToken = default); + + /// + /// Gets latest metrics for all job types. + /// + Task> GetLatestAsync( + string tenantId, + CancellationToken cancellationToken = default); + + /// + /// Deletes old metrics. + /// + Task DeleteOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default); +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/ITriggerRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/ITriggerRepository.cs new file mode 100644 index 000000000..0b78c7470 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/ITriggerRepository.cs @@ -0,0 +1,59 @@ +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// Repository interface for trigger operations. +/// +public interface ITriggerRepository +{ + /// + /// Gets a trigger by ID. + /// + Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); + + /// + /// Gets a trigger by name. + /// + Task GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default); + + /// + /// Lists all triggers for a tenant. + /// + Task> ListAsync(string tenantId, CancellationToken cancellationToken = default); + + /// + /// Gets triggers that are due to fire. + /// + Task> GetDueTriggersAsync(int limit = 100, CancellationToken cancellationToken = default); + + /// + /// Creates a new trigger. + /// + Task CreateAsync(TriggerEntity trigger, CancellationToken cancellationToken = default); + + /// + /// Updates a trigger. + /// + Task UpdateAsync(TriggerEntity trigger, CancellationToken cancellationToken = default); + + /// + /// Records a trigger fire event. + /// + Task RecordFireAsync(string tenantId, Guid triggerId, Guid jobId, DateTimeOffset? nextFireAt, CancellationToken cancellationToken = default); + + /// + /// Records a trigger misfire. + /// + Task RecordMisfireAsync(string tenantId, Guid triggerId, CancellationToken cancellationToken = default); + + /// + /// Enables or disables a trigger. + /// + Task SetEnabledAsync(string tenantId, Guid id, bool enabled, CancellationToken cancellationToken = default); + + /// + /// Deletes a trigger. + /// + Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default); +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IWorkerRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IWorkerRepository.cs new file mode 100644 index 000000000..770e102fc --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/IWorkerRepository.cs @@ -0,0 +1,54 @@ +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// Repository interface for worker operations. +/// +public interface IWorkerRepository +{ + /// + /// Gets a worker by ID. + /// + Task GetByIdAsync(string id, CancellationToken cancellationToken = default); + + /// + /// Lists all workers. + /// + Task> ListAsync(CancellationToken cancellationToken = default); + + /// + /// Lists workers by status. + /// + Task> ListByStatusAsync(string status, CancellationToken cancellationToken = default); + + /// + /// Gets workers for a specific tenant. + /// + Task> GetByTenantIdAsync(string tenantId, CancellationToken cancellationToken = default); + + /// + /// Registers a new worker or updates existing. + /// + Task UpsertAsync(WorkerEntity worker, CancellationToken cancellationToken = default); + + /// + /// Updates worker heartbeat. + /// + Task HeartbeatAsync(string id, int currentJobs, CancellationToken cancellationToken = default); + + /// + /// Updates worker status. + /// + Task SetStatusAsync(string id, string status, CancellationToken cancellationToken = default); + + /// + /// Removes a worker. + /// + Task DeleteAsync(string id, CancellationToken cancellationToken = default); + + /// + /// Gets stale workers (no heartbeat in duration). + /// + Task> GetStaleWorkersAsync(TimeSpan staleDuration, CancellationToken cancellationToken = default); +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/JobHistoryRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/JobHistoryRepository.cs new file mode 100644 index 000000000..2e79249c2 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/JobHistoryRepository.cs @@ -0,0 +1,244 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// PostgreSQL repository for job history operations. +/// +public sealed class JobHistoryRepository : RepositoryBase, IJobHistoryRepository +{ + /// + /// Creates a new job history repository. + /// + public JobHistoryRepository(SchedulerDataSource dataSource, ILogger logger) + : base(dataSource, logger) + { + } + + /// + public async Task ArchiveAsync(JobHistoryEntity history, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO scheduler.job_history ( + job_id, tenant_id, project_id, job_type, status, attempt, payload_digest, + result, reason, worker_id, duration_ms, created_at, completed_at + ) + VALUES ( + @job_id, @tenant_id, @project_id, @job_type, @status::scheduler.job_status, @attempt, @payload_digest, + @result::jsonb, @reason, @worker_id, @duration_ms, @created_at, @completed_at + ) + RETURNING * + """; + + await using var connection = await DataSource.OpenConnectionAsync(history.TenantId, "writer", cancellationToken) + .ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + AddParameter(command, "job_id", history.JobId); + AddParameter(command, "tenant_id", history.TenantId); + AddParameter(command, "project_id", history.ProjectId); + AddParameter(command, "job_type", history.JobType); + AddParameter(command, "status", history.Status.ToString().ToLowerInvariant()); + AddParameter(command, "attempt", history.Attempt); + AddParameter(command, "payload_digest", history.PayloadDigest); + AddJsonbParameter(command, "result", history.Result); + AddParameter(command, "reason", history.Reason); + AddParameter(command, "worker_id", history.WorkerId); + AddParameter(command, "duration_ms", history.DurationMs); + AddParameter(command, "created_at", history.CreatedAt); + AddParameter(command, "completed_at", history.CompletedAt); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapJobHistory(reader); + } + + /// + public async Task> GetByJobIdAsync(Guid jobId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest, + result, reason, worker_id, duration_ms, created_at, completed_at, archived_at + FROM scheduler.job_history + WHERE job_id = @job_id + ORDER BY archived_at DESC + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "job_id", jobId); + + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(MapJobHistory(reader)); + } + return results; + } + + /// + public async Task> ListAsync( + string tenantId, + int limit = 100, + int offset = 0, + CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest, + result, reason, worker_id, duration_ms, created_at, completed_at, archived_at + FROM scheduler.job_history + WHERE tenant_id = @tenant_id + ORDER BY completed_at DESC + LIMIT @limit OFFSET @offset + """; + + return await QueryAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "limit", limit); + AddParameter(cmd, "offset", offset); + }, + MapJobHistory, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task> ListByJobTypeAsync( + string tenantId, + string jobType, + int limit = 100, + CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest, + result, reason, worker_id, duration_ms, created_at, completed_at, archived_at + FROM scheduler.job_history + WHERE tenant_id = @tenant_id AND job_type = @job_type + ORDER BY completed_at DESC + LIMIT @limit + """; + + return await QueryAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "job_type", jobType); + AddParameter(cmd, "limit", limit); + }, + MapJobHistory, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task> ListByStatusAsync( + string tenantId, + JobStatus status, + int limit = 100, + CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest, + result, reason, worker_id, duration_ms, created_at, completed_at, archived_at + FROM scheduler.job_history + WHERE tenant_id = @tenant_id AND status = @status::scheduler.job_status + ORDER BY completed_at DESC + LIMIT @limit + """; + + return await QueryAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "status", status.ToString().ToLowerInvariant()); + AddParameter(cmd, "limit", limit); + }, + MapJobHistory, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task> ListByTimeRangeAsync( + string tenantId, + DateTimeOffset from, + DateTimeOffset to, + int limit = 1000, + CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, job_id, tenant_id, project_id, job_type, status, attempt, payload_digest, + result, reason, worker_id, duration_ms, created_at, completed_at, archived_at + FROM scheduler.job_history + WHERE tenant_id = @tenant_id AND completed_at >= @from AND completed_at < @to + ORDER BY completed_at DESC + LIMIT @limit + """; + + return await QueryAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "from", from); + AddParameter(cmd, "to", to); + AddParameter(cmd, "limit", limit); + }, + MapJobHistory, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task DeleteOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM scheduler.job_history WHERE archived_at < @cutoff"; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "cutoff", cutoff); + + return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + private static JobHistoryEntity MapJobHistory(NpgsqlDataReader reader) => new() + { + Id = reader.GetInt64(reader.GetOrdinal("id")), + JobId = reader.GetGuid(reader.GetOrdinal("job_id")), + TenantId = reader.GetString(reader.GetOrdinal("tenant_id")), + ProjectId = GetNullableString(reader, reader.GetOrdinal("project_id")), + JobType = reader.GetString(reader.GetOrdinal("job_type")), + Status = ParseJobStatus(reader.GetString(reader.GetOrdinal("status"))), + Attempt = reader.GetInt32(reader.GetOrdinal("attempt")), + PayloadDigest = reader.GetString(reader.GetOrdinal("payload_digest")), + Result = GetNullableString(reader, reader.GetOrdinal("result")), + Reason = GetNullableString(reader, reader.GetOrdinal("reason")), + WorkerId = GetNullableString(reader, reader.GetOrdinal("worker_id")), + DurationMs = reader.IsDBNull(reader.GetOrdinal("duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("duration_ms")), + CreatedAt = reader.GetFieldValue(reader.GetOrdinal("created_at")), + CompletedAt = reader.GetFieldValue(reader.GetOrdinal("completed_at")), + ArchivedAt = reader.GetFieldValue(reader.GetOrdinal("archived_at")) + }; + + private static JobStatus ParseJobStatus(string status) => status switch + { + "pending" => JobStatus.Pending, + "scheduled" => JobStatus.Scheduled, + "leased" => JobStatus.Leased, + "running" => JobStatus.Running, + "succeeded" => JobStatus.Succeeded, + "failed" => JobStatus.Failed, + "canceled" => JobStatus.Canceled, + "timed_out" => JobStatus.TimedOut, + _ => throw new ArgumentException($"Unknown job status: {status}", nameof(status)) + }; +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/MetricsRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/MetricsRepository.cs new file mode 100644 index 000000000..87490085a --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/MetricsRepository.cs @@ -0,0 +1,178 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// PostgreSQL repository for metrics operations. +/// +public sealed class MetricsRepository : RepositoryBase, IMetricsRepository +{ + /// + /// Creates a new metrics repository. + /// + public MetricsRepository(SchedulerDataSource dataSource, ILogger logger) + : base(dataSource, logger) + { + } + + /// + public async Task UpsertAsync(MetricsEntity metrics, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO scheduler.metrics ( + tenant_id, job_type, period_start, period_end, jobs_created, jobs_completed, + jobs_failed, jobs_timed_out, avg_duration_ms, p50_duration_ms, p95_duration_ms, p99_duration_ms + ) + VALUES ( + @tenant_id, @job_type, @period_start, @period_end, @jobs_created, @jobs_completed, + @jobs_failed, @jobs_timed_out, @avg_duration_ms, @p50_duration_ms, @p95_duration_ms, @p99_duration_ms + ) + ON CONFLICT (tenant_id, job_type, period_start) DO UPDATE SET + period_end = EXCLUDED.period_end, + jobs_created = EXCLUDED.jobs_created, + jobs_completed = EXCLUDED.jobs_completed, + jobs_failed = EXCLUDED.jobs_failed, + jobs_timed_out = EXCLUDED.jobs_timed_out, + avg_duration_ms = EXCLUDED.avg_duration_ms, + p50_duration_ms = EXCLUDED.p50_duration_ms, + p95_duration_ms = EXCLUDED.p95_duration_ms, + p99_duration_ms = EXCLUDED.p99_duration_ms + RETURNING * + """; + + await using var connection = await DataSource.OpenConnectionAsync(metrics.TenantId, "writer", cancellationToken) + .ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + AddParameter(command, "tenant_id", metrics.TenantId); + AddParameter(command, "job_type", metrics.JobType); + AddParameter(command, "period_start", metrics.PeriodStart); + AddParameter(command, "period_end", metrics.PeriodEnd); + AddParameter(command, "jobs_created", metrics.JobsCreated); + AddParameter(command, "jobs_completed", metrics.JobsCompleted); + AddParameter(command, "jobs_failed", metrics.JobsFailed); + AddParameter(command, "jobs_timed_out", metrics.JobsTimedOut); + AddParameter(command, "avg_duration_ms", metrics.AvgDurationMs); + AddParameter(command, "p50_duration_ms", metrics.P50DurationMs); + AddParameter(command, "p95_duration_ms", metrics.P95DurationMs); + AddParameter(command, "p99_duration_ms", metrics.P99DurationMs); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapMetrics(reader); + } + + /// + public async Task> GetAsync( + string tenantId, + string jobType, + DateTimeOffset from, + DateTimeOffset to, + CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, job_type, period_start, period_end, jobs_created, jobs_completed, + jobs_failed, jobs_timed_out, avg_duration_ms, p50_duration_ms, p95_duration_ms, p99_duration_ms, created_at + FROM scheduler.metrics + WHERE tenant_id = @tenant_id AND job_type = @job_type + AND period_start >= @from AND period_start < @to + ORDER BY period_start + """; + + return await QueryAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "job_type", jobType); + AddParameter(cmd, "from", from); + AddParameter(cmd, "to", to); + }, + MapMetrics, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task> GetByTenantAsync( + string tenantId, + DateTimeOffset from, + DateTimeOffset to, + CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, job_type, period_start, period_end, jobs_created, jobs_completed, + jobs_failed, jobs_timed_out, avg_duration_ms, p50_duration_ms, p95_duration_ms, p99_duration_ms, created_at + FROM scheduler.metrics + WHERE tenant_id = @tenant_id AND period_start >= @from AND period_start < @to + ORDER BY period_start, job_type + """; + + return await QueryAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "from", from); + AddParameter(cmd, "to", to); + }, + MapMetrics, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task> GetLatestAsync( + string tenantId, + CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT DISTINCT ON (job_type) id, tenant_id, job_type, period_start, period_end, + jobs_created, jobs_completed, jobs_failed, jobs_timed_out, + avg_duration_ms, p50_duration_ms, p95_duration_ms, p99_duration_ms, created_at + FROM scheduler.metrics + WHERE tenant_id = @tenant_id + ORDER BY job_type, period_start DESC + """; + + return await QueryAsync( + tenantId, + sql, + cmd => AddParameter(cmd, "tenant_id", tenantId), + MapMetrics, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task DeleteOlderThanAsync(DateTimeOffset cutoff, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM scheduler.metrics WHERE period_end < @cutoff"; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "cutoff", cutoff); + + return await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + } + + private static MetricsEntity MapMetrics(NpgsqlDataReader reader) => new() + { + Id = reader.GetInt64(reader.GetOrdinal("id")), + TenantId = reader.GetString(reader.GetOrdinal("tenant_id")), + JobType = reader.GetString(reader.GetOrdinal("job_type")), + PeriodStart = reader.GetFieldValue(reader.GetOrdinal("period_start")), + PeriodEnd = reader.GetFieldValue(reader.GetOrdinal("period_end")), + JobsCreated = reader.GetInt64(reader.GetOrdinal("jobs_created")), + JobsCompleted = reader.GetInt64(reader.GetOrdinal("jobs_completed")), + JobsFailed = reader.GetInt64(reader.GetOrdinal("jobs_failed")), + JobsTimedOut = reader.GetInt64(reader.GetOrdinal("jobs_timed_out")), + AvgDurationMs = reader.IsDBNull(reader.GetOrdinal("avg_duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("avg_duration_ms")), + P50DurationMs = reader.IsDBNull(reader.GetOrdinal("p50_duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("p50_duration_ms")), + P95DurationMs = reader.IsDBNull(reader.GetOrdinal("p95_duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("p95_duration_ms")), + P99DurationMs = reader.IsDBNull(reader.GetOrdinal("p99_duration_ms")) ? null : reader.GetInt64(reader.GetOrdinal("p99_duration_ms")), + CreatedAt = reader.GetFieldValue(reader.GetOrdinal("created_at")) + }; +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/TriggerRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/TriggerRepository.cs new file mode 100644 index 000000000..f456b6ae4 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/TriggerRepository.cs @@ -0,0 +1,301 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// PostgreSQL repository for trigger operations. +/// +public sealed class TriggerRepository : RepositoryBase, ITriggerRepository +{ + /// + /// Creates a new trigger repository. + /// + public TriggerRepository(SchedulerDataSource dataSource, ILogger logger) + : base(dataSource, logger) + { + } + + /// + public async Task GetByIdAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone, + enabled, next_fire_at, last_fire_at, last_job_id, fire_count, misfire_count, + metadata, created_at, updated_at, created_by + FROM scheduler.triggers + WHERE tenant_id = @tenant_id AND id = @id + """; + + return await QuerySingleOrDefaultAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", id); + }, + MapTrigger, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task GetByNameAsync(string tenantId, string name, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone, + enabled, next_fire_at, last_fire_at, last_job_id, fire_count, misfire_count, + metadata, created_at, updated_at, created_by + FROM scheduler.triggers + WHERE tenant_id = @tenant_id AND name = @name + """; + + return await QuerySingleOrDefaultAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "name", name); + }, + MapTrigger, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task> ListAsync(string tenantId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone, + enabled, next_fire_at, last_fire_at, last_job_id, fire_count, misfire_count, + metadata, created_at, updated_at, created_by + FROM scheduler.triggers + WHERE tenant_id = @tenant_id + ORDER BY name + """; + + return await QueryAsync( + tenantId, + sql, + cmd => AddParameter(cmd, "tenant_id", tenantId), + MapTrigger, + cancellationToken).ConfigureAwait(false); + } + + /// + public async Task> GetDueTriggersAsync(int limit = 100, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone, + enabled, next_fire_at, last_fire_at, last_job_id, fire_count, misfire_count, + metadata, created_at, updated_at, created_by + FROM scheduler.triggers + WHERE enabled = TRUE AND next_fire_at <= NOW() + ORDER BY next_fire_at + LIMIT @limit + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "limit", limit); + + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(MapTrigger(reader)); + } + return results; + } + + /// + public async Task CreateAsync(TriggerEntity trigger, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO scheduler.triggers ( + id, tenant_id, name, description, job_type, job_payload, cron_expression, timezone, + enabled, next_fire_at, metadata, created_by + ) + VALUES ( + @id, @tenant_id, @name, @description, @job_type, @job_payload::jsonb, @cron_expression, @timezone, + @enabled, @next_fire_at, @metadata::jsonb, @created_by + ) + RETURNING * + """; + + var id = trigger.Id == Guid.Empty ? Guid.NewGuid() : trigger.Id; + await using var connection = await DataSource.OpenConnectionAsync(trigger.TenantId, "writer", cancellationToken) + .ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + AddParameter(command, "id", id); + AddParameter(command, "tenant_id", trigger.TenantId); + AddParameter(command, "name", trigger.Name); + AddParameter(command, "description", trigger.Description); + AddParameter(command, "job_type", trigger.JobType); + AddJsonbParameter(command, "job_payload", trigger.JobPayload); + AddParameter(command, "cron_expression", trigger.CronExpression); + AddParameter(command, "timezone", trigger.Timezone); + AddParameter(command, "enabled", trigger.Enabled); + AddParameter(command, "next_fire_at", trigger.NextFireAt); + AddJsonbParameter(command, "metadata", trigger.Metadata); + AddParameter(command, "created_by", trigger.CreatedBy); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapTrigger(reader); + } + + /// + public async Task UpdateAsync(TriggerEntity trigger, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE scheduler.triggers + SET name = @name, + description = @description, + job_type = @job_type, + job_payload = @job_payload::jsonb, + cron_expression = @cron_expression, + timezone = @timezone, + enabled = @enabled, + next_fire_at = @next_fire_at, + metadata = @metadata::jsonb + WHERE tenant_id = @tenant_id AND id = @id + """; + + var rows = await ExecuteAsync( + trigger.TenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", trigger.TenantId); + AddParameter(cmd, "id", trigger.Id); + AddParameter(cmd, "name", trigger.Name); + AddParameter(cmd, "description", trigger.Description); + AddParameter(cmd, "job_type", trigger.JobType); + AddJsonbParameter(cmd, "job_payload", trigger.JobPayload); + AddParameter(cmd, "cron_expression", trigger.CronExpression); + AddParameter(cmd, "timezone", trigger.Timezone); + AddParameter(cmd, "enabled", trigger.Enabled); + AddParameter(cmd, "next_fire_at", trigger.NextFireAt); + AddJsonbParameter(cmd, "metadata", trigger.Metadata); + }, + cancellationToken).ConfigureAwait(false); + + return rows > 0; + } + + /// + public async Task RecordFireAsync(string tenantId, Guid triggerId, Guid jobId, DateTimeOffset? nextFireAt, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE scheduler.triggers + SET last_fire_at = NOW(), + last_job_id = @job_id, + next_fire_at = @next_fire_at, + fire_count = fire_count + 1 + WHERE tenant_id = @tenant_id AND id = @id + """; + + var rows = await ExecuteAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", triggerId); + AddParameter(cmd, "job_id", jobId); + AddParameter(cmd, "next_fire_at", nextFireAt); + }, + cancellationToken).ConfigureAwait(false); + + return rows > 0; + } + + /// + public async Task RecordMisfireAsync(string tenantId, Guid triggerId, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE scheduler.triggers + SET misfire_count = misfire_count + 1 + WHERE tenant_id = @tenant_id AND id = @id + """; + + var rows = await ExecuteAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", triggerId); + }, + cancellationToken).ConfigureAwait(false); + + return rows > 0; + } + + /// + public async Task SetEnabledAsync(string tenantId, Guid id, bool enabled, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE scheduler.triggers + SET enabled = @enabled + WHERE tenant_id = @tenant_id AND id = @id + """; + + var rows = await ExecuteAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", id); + AddParameter(cmd, "enabled", enabled); + }, + cancellationToken).ConfigureAwait(false); + + return rows > 0; + } + + /// + public async Task DeleteAsync(string tenantId, Guid id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM scheduler.triggers WHERE tenant_id = @tenant_id AND id = @id"; + + var rows = await ExecuteAsync( + tenantId, + sql, + cmd => + { + AddParameter(cmd, "tenant_id", tenantId); + AddParameter(cmd, "id", id); + }, + cancellationToken).ConfigureAwait(false); + + return rows > 0; + } + + private static TriggerEntity MapTrigger(NpgsqlDataReader reader) => new() + { + Id = reader.GetGuid(reader.GetOrdinal("id")), + TenantId = reader.GetString(reader.GetOrdinal("tenant_id")), + Name = reader.GetString(reader.GetOrdinal("name")), + Description = GetNullableString(reader, reader.GetOrdinal("description")), + JobType = reader.GetString(reader.GetOrdinal("job_type")), + JobPayload = reader.GetString(reader.GetOrdinal("job_payload")), + CronExpression = reader.GetString(reader.GetOrdinal("cron_expression")), + Timezone = reader.GetString(reader.GetOrdinal("timezone")), + Enabled = reader.GetBoolean(reader.GetOrdinal("enabled")), + NextFireAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("next_fire_at")), + LastFireAt = GetNullableDateTimeOffset(reader, reader.GetOrdinal("last_fire_at")), + LastJobId = GetNullableGuid(reader, reader.GetOrdinal("last_job_id")), + FireCount = reader.GetInt64(reader.GetOrdinal("fire_count")), + MisfireCount = reader.GetInt32(reader.GetOrdinal("misfire_count")), + Metadata = reader.GetString(reader.GetOrdinal("metadata")), + CreatedAt = reader.GetFieldValue(reader.GetOrdinal("created_at")), + UpdatedAt = reader.GetFieldValue(reader.GetOrdinal("updated_at")), + CreatedBy = GetNullableString(reader, reader.GetOrdinal("created_by")) + }; +} diff --git a/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/WorkerRepository.cs b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/WorkerRepository.cs new file mode 100644 index 000000000..3dd957c12 --- /dev/null +++ b/src/Scheduler/__Libraries/StellaOps.Scheduler.Storage.Postgres/Repositories/WorkerRepository.cs @@ -0,0 +1,230 @@ +using Microsoft.Extensions.Logging; +using Npgsql; +using StellaOps.Infrastructure.Postgres.Repositories; +using StellaOps.Scheduler.Storage.Postgres.Models; + +namespace StellaOps.Scheduler.Storage.Postgres.Repositories; + +/// +/// PostgreSQL repository for worker operations. +/// +public sealed class WorkerRepository : RepositoryBase, IWorkerRepository +{ + /// + /// Creates a new worker repository. + /// + public WorkerRepository(SchedulerDataSource dataSource, ILogger logger) + : base(dataSource, logger) + { + } + + /// + public async Task GetByIdAsync(string id, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs, + status, last_heartbeat_at, registered_at, metadata + FROM scheduler.workers + WHERE id = @id + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + return await reader.ReadAsync(cancellationToken).ConfigureAwait(false) ? MapWorker(reader) : null; + } + + /// + public async Task> ListAsync(CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs, + status, last_heartbeat_at, registered_at, metadata + FROM scheduler.workers + ORDER BY registered_at DESC + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(MapWorker(reader)); + } + return results; + } + + /// + public async Task> ListByStatusAsync(string status, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs, + status, last_heartbeat_at, registered_at, metadata + FROM scheduler.workers + WHERE status = @status + ORDER BY registered_at DESC + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "status", status); + + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(MapWorker(reader)); + } + return results; + } + + /// + public async Task> GetByTenantIdAsync(string tenantId, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs, + status, last_heartbeat_at, registered_at, metadata + FROM scheduler.workers + WHERE tenant_id = @tenant_id OR tenant_id IS NULL + ORDER BY registered_at DESC + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "tenant_id", tenantId); + + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(MapWorker(reader)); + } + return results; + } + + /// + public async Task UpsertAsync(WorkerEntity worker, CancellationToken cancellationToken = default) + { + const string sql = """ + INSERT INTO scheduler.workers (id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, metadata) + VALUES (@id, @tenant_id, @hostname, @process_id, @job_types, @max_concurrent_jobs, @metadata::jsonb) + ON CONFLICT (id) DO UPDATE SET + tenant_id = EXCLUDED.tenant_id, + hostname = EXCLUDED.hostname, + process_id = EXCLUDED.process_id, + job_types = EXCLUDED.job_types, + max_concurrent_jobs = EXCLUDED.max_concurrent_jobs, + metadata = EXCLUDED.metadata, + last_heartbeat_at = NOW(), + status = 'active' + RETURNING * + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + + AddParameter(command, "id", worker.Id); + AddParameter(command, "tenant_id", worker.TenantId); + AddParameter(command, "hostname", worker.Hostname); + AddParameter(command, "process_id", worker.ProcessId); + AddTextArrayParameter(command, "job_types", worker.JobTypes); + AddParameter(command, "max_concurrent_jobs", worker.MaxConcurrentJobs); + AddJsonbParameter(command, "metadata", worker.Metadata); + + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + await reader.ReadAsync(cancellationToken).ConfigureAwait(false); + return MapWorker(reader); + } + + /// + public async Task HeartbeatAsync(string id, int currentJobs, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE scheduler.workers + SET last_heartbeat_at = NOW(), current_jobs = @current_jobs + WHERE id = @id + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "current_jobs", currentJobs); + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + /// + public async Task SetStatusAsync(string id, string status, CancellationToken cancellationToken = default) + { + const string sql = """ + UPDATE scheduler.workers + SET status = @status + WHERE id = @id + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + AddParameter(command, "status", status); + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + /// + public async Task DeleteAsync(string id, CancellationToken cancellationToken = default) + { + const string sql = "DELETE FROM scheduler.workers WHERE id = @id"; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "id", id); + + var rows = await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); + return rows > 0; + } + + /// + public async Task> GetStaleWorkersAsync(TimeSpan staleDuration, CancellationToken cancellationToken = default) + { + const string sql = """ + SELECT id, tenant_id, hostname, process_id, job_types, max_concurrent_jobs, current_jobs, + status, last_heartbeat_at, registered_at, metadata + FROM scheduler.workers + WHERE status = 'active' AND last_heartbeat_at < NOW() - @stale_duration + ORDER BY last_heartbeat_at + """; + + await using var connection = await DataSource.OpenSystemConnectionAsync(cancellationToken).ConfigureAwait(false); + await using var command = CreateCommand(sql, connection); + AddParameter(command, "stale_duration", staleDuration); + + var results = new List(); + await using var reader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); + while (await reader.ReadAsync(cancellationToken).ConfigureAwait(false)) + { + results.Add(MapWorker(reader)); + } + return results; + } + + private static WorkerEntity MapWorker(NpgsqlDataReader reader) => new() + { + Id = reader.GetString(reader.GetOrdinal("id")), + TenantId = GetNullableString(reader, reader.GetOrdinal("tenant_id")), + Hostname = reader.GetString(reader.GetOrdinal("hostname")), + ProcessId = reader.IsDBNull(reader.GetOrdinal("process_id")) ? null : reader.GetInt32(reader.GetOrdinal("process_id")), + JobTypes = reader.IsDBNull(reader.GetOrdinal("job_types")) ? [] : reader.GetFieldValue(reader.GetOrdinal("job_types")), + MaxConcurrentJobs = reader.GetInt32(reader.GetOrdinal("max_concurrent_jobs")), + CurrentJobs = reader.GetInt32(reader.GetOrdinal("current_jobs")), + Status = reader.GetString(reader.GetOrdinal("status")), + LastHeartbeatAt = reader.GetFieldValue(reader.GetOrdinal("last_heartbeat_at")), + RegisteredAt = reader.GetFieldValue(reader.GetOrdinal("registered_at")), + Metadata = reader.GetString(reader.GetOrdinal("metadata")) + }; +}