diff --git a/engine/src/engine/journal.rs b/engine/src/engine/journal.rs index 05ce204..8ca5807 100644 --- a/engine/src/engine/journal.rs +++ b/engine/src/engine/journal.rs @@ -120,6 +120,34 @@ pub enum Decision { /// decision / reason / count / last-seen). Low-cardinality, no secret values. record: crate::engine::policy_log::PolicyDecisionRecord, }, + /// A per-repository TOFU signing baseline (JEF-263, ADR-0020): the learned set of + /// identities/issuers that have signed images under one `registry/repo`, plus when the + /// repo was first seen signed and whether that history is `established` yet. Written as a + /// **compacted, full-state** line — the latest line for a repo supersedes every earlier + /// one on replay (last-write-wins), so re-appending it (on change / per pass) keeps a live + /// repo's baseline inside the rotation window instead of silently aging out and re-arming + /// cold-start trust. Every field is `#[serde(default)]` so a future field can be added + /// without breaking replay of older lines. The identities/issuers are UNTRUSTED Fulcio + /// cert text — a consumer MUST escape them at render (the zero-egress state never leaves + /// the cluster). + SigningBaseline { + /// The canonical `registry/repo` key (host-normalized, tag/digest stripped). + #[serde(default)] + repo: String, + /// Every signer identity observed signing an image under this repo (sorted, deduped). + #[serde(default)] + identities: Vec, + /// Every OIDC issuer observed signing under this repo (sorted, deduped). + #[serde(default)] + issuers: Vec, + /// When the repo was first observed with a verifying signature, Unix epoch millis. + #[serde(default)] + first_seen_ms: u64, + /// Whether the signed history is `established` (matured past the TOFU grace window) — + /// `false` is a freshly-learned baseline (weaker evidence). + #[serde(default)] + established: bool, + }, } /// One journal line: a [`Decision`] stamped with when it was recorded. The timestamp is diff --git a/engine/src/engine/mod.rs b/engine/src/engine/mod.rs index cfbb8cc..1fba018 100644 --- a/engine/src/engine/mod.rs +++ b/engine/src/engine/mod.rs @@ -406,6 +406,11 @@ impl Engine { // restore from the same journal, since it (not the engine) holds the shared // decision ring. journal::Decision::Admission { .. } => {} + // Per-repo signing baselines (JEF-263) restore into the dedicated + // `SigningBaselineStore`, not the engine's findings/reversion state — + // `run_watch` does that restore from the same journal, since it (not the engine + // core) owns the baseline store the sweep feeds each pass. + journal::Decision::SigningBaseline { .. } => {} } } if latest_at > std::time::SystemTime::UNIX_EPOCH { diff --git a/engine/src/engine/run_loop.rs b/engine/src/engine/run_loop.rs index 3c9409e..0fca1a3 100644 --- a/engine/src/engine/run_loop.rs +++ b/engine/src/engine/run_loop.rs @@ -294,6 +294,22 @@ pub async fn run_watch( // were already running when protector started (no admission event ever replays them). let signing_observer = build_signing_observer(); + // The durable per-repo TOFU signing baseline (JEF-263, ADR-0020): learned from the sweep's + // observed postures, persisted to (and, here on boot, replayed from) the SAME decision + // journal the engine already owns — so a repo's established signed history survives a + // restart instead of resetting to cold-start trust. Built once and mutated each pass; + // per-pass compaction inside the sweep keeps live baselines inside the journal's rotation + // window. A disabled journal ⇒ in-memory only (honest re-learn on restart). + let signing_journal = engine.journal(); + let mut signing_baselines = state::SigningBaselineStore::new(); + let restored_baselines = signing_baselines.restore(signing_journal.as_ref()); + if restored_baselines > 0 { + tracing::info!( + restored_baselines, + "restored per-repo signing baselines from the durable journal" + ); + } + // Runtime evidence (Falco alerts + the eBPF agent's behaviors) is a stream, not a // an HTTP endpoint falcosidekick POSTs to, are held in a TTL'd store, and wake // the loop so a "happening now" signal is acted on immediately (it flips a @@ -433,7 +449,14 @@ pub async fn run_watch( // shared admission-decision log (JEF-261). Bounded by the observer's cache + MAX_IMAGES; // a no-op when no observer is configured. Run before `process` so the inventory reflects // the same snapshot the engine just reasoned over. - super::signing_sweep::sweep(signing_observer.as_ref(), &snapshot, &policy_log).await; + super::signing_sweep::sweep( + signing_observer.as_ref(), + &snapshot, + &policy_log, + Some(&mut signing_baselines), + signing_journal.as_ref(), + ) + .await; engine.process(&snapshot).await; } diff --git a/engine/src/engine/signing_sweep.rs b/engine/src/engine/signing_sweep.rs index ac9d2b0..d29e01c 100644 --- a/engine/src/engine/signing_sweep.rs +++ b/engine/src/engine/signing_sweep.rs @@ -16,11 +16,14 @@ //! the bounded [`PolicyDecisionLog`] ring — no durable schema. use std::sync::Arc; +use std::time::SystemTime; use k8s_openapi::api::core::v1::Pod; +use super::journal::DecisionJournal; use super::observe::Snapshot; use super::policy_log::{PolicyDecisionLog, PolicyDecisionRecord}; +use super::state::SigningBaselineStore; use crate::policies::signature::{PostureMap, SigningObserver, SigningPosture}; /// Collect every distinct container image a running Pod references — regular, init, and @@ -96,15 +99,42 @@ fn record_postures(log: &PolicyDecisionLog, map: &PostureMap) { } } +/// Fold this pass's observed postures into the durable per-repo signing baseline (JEF-263), +/// then compact the whole store back to the journal so a live repo's baseline stays inside +/// the rotation window (never aged out). Only `Signed` postures learn a baseline; the store +/// itself ignores the rest. Every step is a no-op on a disabled journal / cold store, so this +/// is safe to call unconditionally each pass. +fn learn_baselines(store: &mut SigningBaselineStore, journal: &DecisionJournal, map: &PostureMap) { + let now_ms = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + for (image, posture) in map.entries() { + store.observe(image, posture, now_ms); + } + // Full-state compaction per pass: re-append every live repo so rotation can never drop an + // established baseline (the durability discipline that keeps cold-start trust from + // silently re-arming). Bounded by the store's repo cap; a handful of small lines for a + // real cluster. + store.compact(journal); +} + /// Run one signing-posture sweep over the snapshot's running pods and record the result. /// A no-op (zero outbound calls, nothing recorded) when no observer is configured — so a /// deploy without signature config behaves exactly as before. Bounded by the observer's /// `max_images` cap + TTL cache, so a steady cluster re-sweeps for free and a churny one /// can't amplify outbound verification. +/// +/// The observed postures also feed the durable per-repo signing baseline (JEF-263) when a +/// `baseline` store + `journal` are wired: a signed image teaches the repo's TOFU baseline, +/// which is persisted to (and, on boot, replayed from) the SAME decision journal. This is +/// pure learning — never a gate (ADR-0016); drift/enforcement are later stages. pub async fn sweep( observer: Option<&SigningObserver>, snapshot: &Snapshot, log: &Arc, + baseline: Option<&mut SigningBaselineStore>, + journal: &DecisionJournal, ) { let Some(observer) = observer else { return; @@ -115,6 +145,9 @@ pub async fn sweep( } let map = observer.sweep(images).await; record_postures(log, &map); + if let Some(store) = baseline { + learn_baselines(store, journal, &map); + } } #[cfg(test)] @@ -126,6 +159,7 @@ mod tests { use async_trait::async_trait; use super::*; + use crate::engine::state::SigningBaselineStore; use crate::policies::signature::{SignatureObserver, Signer}; fn pod(images: &[&str], init: &[&str]) -> Pod { @@ -211,7 +245,14 @@ mod tests { ..Default::default() }; let log = Arc::new(PolicyDecisionLog::new()); - sweep(Some(&obs), &snapshot, &log).await; + sweep( + Some(&obs), + &snapshot, + &log, + None, + &DecisionJournal::disabled(), + ) + .await; let rows = log.snapshot(); assert_eq!(rows.len(), 3, "one row per distinct running image"); let by_image: HashMap<_, _> = rows.iter().map(|r| (r.image.as_str(), r)).collect(); @@ -244,7 +285,14 @@ mod tests { ..Default::default() }; let log = Arc::new(PolicyDecisionLog::new()); - sweep(Some(&obs), &snapshot, &log).await; + sweep( + Some(&obs), + &snapshot, + &log, + None, + &DecisionJournal::disabled(), + ) + .await; let rows = log.snapshot(); assert_eq!(rows[0].signature, "checking"); assert_ne!( @@ -261,7 +309,7 @@ mod tests { ..Default::default() }; let log = Arc::new(PolicyDecisionLog::new()); - sweep(None, &snapshot, &log).await; + sweep(None, &snapshot, &log, None, &DecisionJournal::disabled()).await; assert!(log.snapshot().is_empty()); } @@ -276,12 +324,83 @@ mod tests { ..Default::default() }; let log = Arc::new(PolicyDecisionLog::new()); - sweep(Some(&obs), &snapshot, &log).await; - sweep(Some(&obs), &snapshot, &log).await; + sweep( + Some(&obs), + &snapshot, + &log, + None, + &DecisionJournal::disabled(), + ) + .await; + sweep( + Some(&obs), + &snapshot, + &log, + None, + &DecisionJournal::disabled(), + ) + .await; assert_eq!( calls.load(Ordering::SeqCst), 1, "the second sweep is served from the observer cache — zero new outbound calls" ); } + + #[tokio::test] + async fn sweep_teaches_the_repo_baseline_from_a_signed_image() { + // The JEF-263 wiring: a signed image observed by the sweep learns a per-repo baseline, + // keyed by registry/repo. Pure learning — the log still records `allow`. + let (obs, _calls) = observer(vec![( + "ghcr.io/org/app:1", + signed("https://github.com/org/app/.github/workflows/r.yaml@refs/tags/v1"), + )]); + let snapshot = Snapshot { + pods: vec![pod(&["ghcr.io/org/app:1"], &[])], + ..Default::default() + }; + let log = Arc::new(PolicyDecisionLog::new()); + let mut baseline = SigningBaselineStore::new(); + sweep( + Some(&obs), + &snapshot, + &log, + Some(&mut baseline), + &DecisionJournal::disabled(), + ) + .await; + let learned = baseline + .get("ghcr.io/org/app") + .expect("the signed image taught a repo baseline"); + assert!( + learned + .identities + .contains("https://github.com/org/app/.github/workflows/r.yaml@refs/tags/v1") + ); + assert!( + !learned.established, + "first sight is a fresh, weak baseline" + ); + } + + #[tokio::test] + async fn sweep_does_not_learn_a_baseline_for_an_unsigned_image() { + // A not-signed posture must never create a baseline (that's JEF-264 drift territory). + let (obs, _calls) = observer(vec![]); // unknown image ⇒ FakeObserver returns NotSigned + let snapshot = Snapshot { + pods: vec![pod(&["docker.io/library/postgres:16"], &[])], + ..Default::default() + }; + let log = Arc::new(PolicyDecisionLog::new()); + let mut baseline = SigningBaselineStore::new(); + sweep( + Some(&obs), + &snapshot, + &log, + Some(&mut baseline), + &DecisionJournal::disabled(), + ) + .await; + assert!(baseline.is_empty(), "an unsigned image learns no baseline"); + } } diff --git a/engine/src/engine/state/mod.rs b/engine/src/engine/state/mod.rs index ef12b05..8adc840 100644 --- a/engine/src/engine/state/mod.rs +++ b/engine/src/engine/state/mod.rs @@ -18,6 +18,7 @@ mod readiness; mod recency; mod report; mod reversion; +mod signing_baseline; mod verdict_store; pub use evidence::{CveEvidence, EntryEvidence, FindingEvidence}; @@ -30,4 +31,5 @@ pub(crate) use readiness::derive_readiness; pub use recency::{Delta, RecencyInfo, StoredPosture}; pub use report::{LeftAloneEntry, Report, WouldActEntry, default_window_report}; pub use reversion::{ReversionLog, ReversionRecord}; +pub use signing_baseline::{DEFAULT_MAX_REPOS, SigningBaseline, SigningBaselineStore}; pub use verdict_store::{BakeStats, ModelHealth, ReadinessConfig, VerdictEntry, VerdictStore}; diff --git a/engine/src/engine/state/signing_baseline.rs b/engine/src/engine/state/signing_baseline.rs new file mode 100644 index 0000000..b6273ac --- /dev/null +++ b/engine/src/engine/state/signing_baseline.rs @@ -0,0 +1,300 @@ +//! The durable, per-repository TOFU signing baseline (JEF-263, ADR-0020 §2). +//! +//! Observation (JEF-261) is a *snapshot*: it says "this image is signed by X right now". It +//! cannot see a **change** in signing posture over time — which is the actual supply-chain +//! attack (a repo that has always shipped signed by one CI identity suddenly ships signed by +//! a different one, or unsigned). This module learns and remembers the missing history: for +//! each **repository** (`registry/repo`, never tag/digest), the set of identities/issuers +//! that have signed images under it, when it was first seen signed, and whether that history +//! is `established` yet (Trust On First Use). +//! +//! ## Durability (same footing as the decision journal) +//! +//! The baseline rides the SAME durable [`DecisionJournal`](crate::engine::journal::DecisionJournal) +//! as every other decision atom — one file, one `PROTECTOR_ENGINE_JOURNAL_PATH`, no second +//! store or env var. On boot the engine [`restore`](SigningBaselineStore::restore)s the +//! journal's tail into memory (exactly how the admission log is repopulated), so a learned +//! baseline survives a restart. A disabled journal ⇒ in-memory only: the store still works, +//! but a restart resets it and it honestly re-learns from observation (all cold-start until +//! re-observed). +//! +//! ## Compaction, NOT rotation-aging +//! +//! The journal is bounded by size with a single-generation rotation that trims old lines. A +//! naive append-once would let an *established* baseline age out of the window on a busy +//! journal and silently re-arm cold-start trust. So each baseline line is **full state** +//! (last-write-wins on replay) and the store [`compact`](SigningBaselineStore::compact)s — +//! re-appends every live repo's baseline each pass — so a live repo's line is always inside +//! the rotation window. In practice a cluster has tens to low-hundreds of distinct repos, so +//! per-pass compaction is a handful of small lines, negligible against the journal cap; the +//! [`DEFAULT_MAX_REPOS`] cap bounds the pathological case. +//! +//! ## Scope (JEF-263) +//! +//! Persistence + in-memory store + boot replay ONLY. Drift *detection*/findings (JEF-264), +//! enforcement (JEF-265), the dashboard render (JEF-262), and Rekor history (JEF-266) consume +//! the baseline this exposes; they are NOT built here. The store only ever *learns* from a +//! `Signed` posture — it never emits a verdict, never gates, and treats a new tag/digest +//! under a known repo as the same baseline (not drift). The identities/issuers are UNTRUSTED +//! Fulcio cert text; a consumer MUST escape them at render (this state never leaves the +//! cluster). + +use std::collections::{BTreeSet, HashMap}; + +use crate::engine::journal::{Decision, DecisionJournal}; +use crate::policies::signature::{SigningPosture, repo_key}; + +/// How long after `first_seen` a baseline is considered [`established`](SigningBaseline::established). +/// +/// **Design decision (ADR-0020 addendum, JEF-263): `established` = wall-clock age, not +/// digest-count.** A baseline matures 24h after the repo was first observed signed. Rationale: +/// the whole point of a TOFU baseline is that the FIRST observation is the weakest evidence +/// (it could be the attacker's first signed push), so trust should mature over time rather +/// than on a counter an attacker can inflate by pushing many digests in a burst. Wall-clock +/// age needs no extra durable state (we already persist `first_seen_ms`) and is monotonic — +/// once established, a baseline never un-establishes on a later observation. A digest-count or +/// distinct-day refinement is a future option; `established` + `first_seen` are exposed so +/// JEF-262/JEF-264 can render/weigh the distinction however they choose. +const ESTABLISH_AGE_MS: u64 = 24 * 60 * 60 * 1000; + +/// Upper bound on distinct repositories tracked in memory. A safety cap for the pathological +/// case (thousands of distinct repos churning through the cluster); a real cluster stays far +/// below it. When inserting a NEW repo would exceed this, one entry is evicted — preferring a +/// non-`established` (cold, cheaply re-learned) entry, oldest-updated first, so a matured +/// baseline is never dropped in favour of churn. +pub const DEFAULT_MAX_REPOS: usize = 4096; + +/// One repository's learned signing baseline (JEF-263). Keyed elsewhere by the `registry/repo` +/// string; this is the value. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SigningBaseline { + /// Every signer identity observed signing an image under this repo. A `BTreeSet` so the + /// set is deduped and deterministically ordered (stable journal lines, stable render). + /// UNTRUSTED Fulcio cert text — escape at render. + pub identities: BTreeSet, + /// Every OIDC issuer observed signing under this repo (deduped, sorted). UNTRUSTED. + pub issuers: BTreeSet, + /// When the repo was first observed with a verifying signature, Unix epoch millis. + pub first_seen_ms: u64, + /// Whether the signed history has matured past the TOFU grace window (see + /// [`ESTABLISH_AGE_MS`]). `false` ⇒ a freshly-learned baseline: weaker evidence. + pub established: bool, + /// When this baseline was last updated (observed or replayed), Unix epoch millis. In-memory + /// only (not journaled) — used solely to order eviction. `pub(crate)` so it isn't part of + /// the public value shape. + pub(crate) last_updated_ms: u64, +} + +impl SigningBaseline { + /// Serialize this repo's baseline to a full-state journal decision (compaction line). + fn to_decision(&self, repo: &str) -> Decision { + Decision::SigningBaseline { + repo: repo.to_string(), + identities: self.identities.iter().cloned().collect(), + issuers: self.issuers.iter().cloned().collect(), + first_seen_ms: self.first_seen_ms, + established: self.established, + } + } +} + +/// The in-memory, per-repository signing-baseline store (JEF-263). Learns from observed +/// `Signed` postures, persists each change to the durable journal as a full-state line, and +/// is [`restore`](Self::restore)d from that journal on boot. Bounded by [`DEFAULT_MAX_REPOS`] +/// with the eviction policy documented there. +#[derive(Debug, Clone)] +pub struct SigningBaselineStore { + baselines: HashMap, + max_repos: usize, +} + +impl Default for SigningBaselineStore { + fn default() -> Self { + Self::new() + } +} + +impl SigningBaselineStore { + /// A store with the default repo cap. + pub fn new() -> Self { + Self::with_capacity(DEFAULT_MAX_REPOS) + } + + /// A store with an explicit repo cap (for tests that exercise eviction cheaply). + pub fn with_capacity(max_repos: usize) -> Self { + Self { + baselines: HashMap::new(), + max_repos: max_repos.max(1), + } + } + + /// The learned baseline for a `registry/repo` key, if any. + pub fn get(&self, repo: &str) -> Option<&SigningBaseline> { + self.baselines.get(repo) + } + + /// Number of distinct repositories with a learned baseline. + pub fn len(&self) -> usize { + self.baselines.len() + } + + pub fn is_empty(&self) -> bool { + self.baselines.is_empty() + } + + /// All learned `(repo, baseline)` pairs — what JEF-262/JEF-264 read. Order is unspecified. + pub fn entries(&self) -> impl Iterator { + self.baselines.iter().map(|(k, v)| (k.as_str(), v)) + } + + /// Learn from one observed posture, keyed by the image's `registry/repo`. ONLY a `Signed` + /// posture updates a baseline (a not-signed / invalid / checking posture is left to the + /// drift work, JEF-264, and never creates or mutates a baseline here). Returns the repo key + /// when the baseline was created or changed (new identity/issuer, or `established` flipped), + /// so the caller can persist just that change; `None` when nothing changed. + /// + /// A new tag/digest under a repo with an existing baseline folds to the same key: it does + /// NOT create a new baseline (and, by construction, is not drift here). + pub fn observe( + &mut self, + image: &str, + posture: &SigningPosture, + now_ms: u64, + ) -> Option { + let signer = posture.signer()?; + let repo = repo_key(image); + + if let Some(existing) = self.baselines.get_mut(&repo) { + let mut changed = false; + changed |= existing.identities.insert(signer.identity.clone()); + if let Some(issuer) = signer.issuer.as_ref() { + changed |= existing.issuers.insert(issuer.clone()); + } + let established = existing.established + || now_ms.saturating_sub(existing.first_seen_ms) >= ESTABLISH_AGE_MS; + if established != existing.established { + existing.established = established; + changed = true; + } + existing.last_updated_ms = now_ms; + return if changed { Some(repo) } else { None }; + } + + // First time we've seen this repo signed: establish the baseline (cold-start, weak). + let mut identities = BTreeSet::new(); + identities.insert(signer.identity.clone()); + let mut issuers = BTreeSet::new(); + if let Some(issuer) = signer.issuer.as_ref() { + issuers.insert(issuer.clone()); + } + self.evict_if_full(); + self.baselines.insert( + repo.clone(), + SigningBaseline { + identities, + issuers, + first_seen_ms: now_ms, + // First sight is always cold-start (first_seen == now): weakest evidence. + established: false, + last_updated_ms: now_ms, + }, + ); + Some(repo) + } + + /// Persist one repo's current baseline to the journal as a full-state line. A no-op if the + /// repo isn't tracked. Infallible from the caller's view (a disabled/unwritable journal is + /// itself a no-op). + pub fn persist(&self, journal: &DecisionJournal, repo: &str) { + if let Some(baseline) = self.baselines.get(repo) { + journal.record(baseline.to_decision(repo)); + } + } + + /// Re-append EVERY live repo's baseline as a fresh full-state line (compaction). Called per + /// pass so a live repo's line stays inside the journal's rotation window and is never aged + /// out — the durability guarantee that keeps an established baseline from silently + /// re-arming cold-start trust after enough journal churn. A no-op on an empty store or a + /// disabled journal. + pub fn compact(&self, journal: &DecisionJournal) { + if self.baselines.is_empty() || !journal.is_enabled() { + return; + } + journal.record_all(self.baselines.iter().map(|(repo, b)| b.to_decision(repo))); + } + + /// Replay the durable journal's [`SigningBaseline`](Decision::SigningBaseline) lines into + /// the store on boot, folding chronologically so the latest full-state line per repo wins + /// (compaction semantics). Returns how many distinct repos were restored. A + /// disabled/empty journal restores nothing. Also refreshes `established` from wall-clock + /// age at the line's timestamp, so a baseline that matured while the engine was down is + /// restored established. + pub fn restore(&mut self, journal: &DecisionJournal) -> usize { + for entry in journal.replay() { + if let Decision::SigningBaseline { + repo, + identities, + issuers, + first_seen_ms, + established, + } = entry.decision + { + if repo.is_empty() { + continue; + } + let matured = + established || entry.at_ms.saturating_sub(first_seen_ms) >= ESTABLISH_AGE_MS; + self.upsert( + SigningBaseline { + identities: identities.into_iter().collect(), + issuers: issuers.into_iter().collect(), + first_seen_ms, + established: matured, + last_updated_ms: entry.at_ms, + }, + repo, + ); + } + } + self.baselines.len() + } + + /// Insert-or-replace one repo's baseline (restore path), respecting the repo cap. An + /// existing key updates in place (no eviction); a new key over the cap triggers eviction. + fn upsert(&mut self, baseline: SigningBaseline, repo: String) { + if !self.baselines.contains_key(&repo) { + self.evict_if_full(); + } + self.baselines.insert(repo, baseline); + } + + /// Evict one entry when the store is at capacity, so a subsequent insert stays bounded. + /// Prefers a non-`established` entry (cheap to re-learn) over an established one, and among + /// the eviction candidates drops the least-recently-updated. A no-op below the cap. + fn evict_if_full(&mut self) { + if self.baselines.len() < self.max_repos { + return; + } + // Prefer non-established candidates; fall back to all entries only if every baseline + // is established (a cluster genuinely tracking max_repos matured repos). + let victim = self + .baselines + .iter() + .filter(|(_, b)| !b.established) + .min_by_key(|(_, b)| b.last_updated_ms) + .map(|(k, _)| k.clone()) + .or_else(|| { + self.baselines + .iter() + .min_by_key(|(_, b)| b.last_updated_ms) + .map(|(k, _)| k.clone()) + }); + if let Some(victim) = victim { + self.baselines.remove(&victim); + } + } +} + +#[cfg(test)] +#[path = "signing_baseline_tests.rs"] +mod tests; diff --git a/engine/src/engine/state/signing_baseline_tests.rs b/engine/src/engine/state/signing_baseline_tests.rs new file mode 100644 index 0000000..7868ab7 --- /dev/null +++ b/engine/src/engine/state/signing_baseline_tests.rs @@ -0,0 +1,408 @@ +//! Tests for the durable per-repository signing baseline (JEF-263). Kept in their own file +//! per the repo's 1,000-line cap (CLAUDE.md); tests count toward the limit. + +use std::path::{Path, PathBuf}; + +use super::*; +use crate::policies::signature::Signer; + +/// A unique temp journal path per test (no temp-file crate), mirroring `journal.rs`'s helper. +fn temp_path(tag: &str) -> PathBuf { + use std::sync::atomic::{AtomicU64, Ordering}; + static NONCE: AtomicU64 = AtomicU64::new(0); + let n = NONCE.fetch_add(1, Ordering::Relaxed); + std::env::temp_dir().join(format!( + "protector-baseline-{tag}-{}-{n}.jsonl", + std::process::id() + )) +} + +fn cleanup(path: &Path) { + let _ = std::fs::remove_file(path); + let mut rolled = path.as_os_str().to_owned(); + rolled.push(".1"); + let _ = std::fs::remove_file(PathBuf::from(rolled)); +} + +fn signed(identity: &str, issuer: Option<&str>) -> SigningPosture { + SigningPosture::Signed(Signer { + identity: identity.to_string(), + issuer: issuer.map(str::to_string), + }) +} + +const DAY_MS: u64 = 24 * 60 * 60 * 1000; + +#[test] +fn observing_a_signed_image_creates_a_repo_keyed_baseline() { + // Acceptance: a signed image updates the repo baseline (identities/issuers), keyed by + // registry/repo — not tag/digest. + let mut store = SigningBaselineStore::new(); + let changed = store.observe( + "ghcr.io/org/app:1", + &signed( + "https://github.com/org/app/.github/workflows/r.yaml@refs/tags/v1", + Some("https://token.actions.githubusercontent.com"), + ), + 1_000, + ); + assert_eq!(changed.as_deref(), Some("ghcr.io/org/app")); + let baseline = store.get("ghcr.io/org/app").expect("baseline learned"); + assert!( + baseline + .identities + .contains("https://github.com/org/app/.github/workflows/r.yaml@refs/tags/v1") + ); + assert!( + baseline + .issuers + .contains("https://token.actions.githubusercontent.com") + ); + assert_eq!(baseline.first_seen_ms, 1_000); + assert!( + !baseline.established, + "first sight is weak, not established" + ); +} + +#[test] +fn a_new_tag_or_digest_under_a_known_repo_is_not_a_new_baseline() { + // Acceptance: a new digest/tag under a repo with an existing baseline does NOT create a + // new baseline (and, by construction here, is not drift). + let mut store = SigningBaselineStore::new(); + let id = "https://github.com/org/app/.github/workflows/r.yaml@refs/tags/v1"; + store.observe("ghcr.io/org/app:1", &signed(id, None), 1_000); + // Same signer, different tag AND a digest ref — both fold to ghcr.io/org/app. + let changed_tag = store.observe("ghcr.io/org/app:2", &signed(id, None), 2_000); + let changed_digest = store.observe( + "ghcr.io/org/app@sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + &signed(id, None), + 3_000, + ); + assert_eq!( + store.len(), + 1, + "one baseline for the repo, across tags/digests" + ); + assert_eq!(changed_tag, None, "same signer, new tag ⇒ nothing changed"); + assert_eq!( + changed_digest, None, + "same signer, digest ref ⇒ nothing changed" + ); + assert_eq!( + store.get("ghcr.io/org/app").unwrap().first_seen_ms, + 1_000, + "first_seen is unchanged by later tags" + ); +} + +#[test] +fn a_new_signer_under_a_known_repo_widens_the_same_baseline() { + let mut store = SigningBaselineStore::new(); + store.observe( + "ghcr.io/org/app:1", + &signed("id-a", Some("issuer-a")), + 1_000, + ); + let changed = store.observe( + "ghcr.io/org/app:1", + &signed("id-b", Some("issuer-b")), + 2_000, + ); + assert_eq!( + changed.as_deref(), + Some("ghcr.io/org/app"), + "a new signer is a change" + ); + let baseline = store.get("ghcr.io/org/app").unwrap(); + assert_eq!(baseline.identities.len(), 2); + assert_eq!(baseline.issuers.len(), 2); + assert_eq!(store.len(), 1, "still one repo baseline"); +} + +#[test] +fn a_freshly_learned_baseline_is_distinguishable_from_an_established_one() { + // Acceptance: `established` + `first_seen` separate weak (fresh) from strong (matured). + let mut store = SigningBaselineStore::new(); + store.observe("ghcr.io/org/app:1", &signed("id-a", None), 0); + assert!(!store.get("ghcr.io/org/app").unwrap().established); + // Re-observed just past the grace window ⇒ established, first_seen still the original. + store.observe("ghcr.io/org/app:1", &signed("id-a", None), DAY_MS + 1); + let baseline = store.get("ghcr.io/org/app").unwrap(); + assert!(baseline.established, "matured past the grace window"); + assert_eq!( + baseline.first_seen_ms, 0, + "first_seen is the original observation" + ); +} + +#[test] +fn establishment_is_monotonic() { + // Once established, a later observation (even a clock that appears to go backwards) never + // un-establishes the baseline. + let mut store = SigningBaselineStore::new(); + store.observe("ghcr.io/org/app:1", &signed("id-a", None), 0); + store.observe("ghcr.io/org/app:1", &signed("id-a", None), DAY_MS + 1); + assert!(store.get("ghcr.io/org/app").unwrap().established); + store.observe("ghcr.io/org/app:1", &signed("id-a", None), 5); + assert!( + store.get("ghcr.io/org/app").unwrap().established, + "establishment never regresses" + ); +} + +#[test] +fn non_signed_postures_never_create_or_touch_a_baseline() { + // The store only learns from a verifying signature. A not-signed / invalid / checking + // posture is JEF-264's drift concern and must not create or mutate a baseline here. + let mut store = SigningBaselineStore::new(); + assert_eq!( + store.observe("ghcr.io/org/x:1", &SigningPosture::NotSigned, 1), + None + ); + assert_eq!( + store.observe("ghcr.io/org/y:1", &SigningPosture::InvalidSignature, 1), + None + ); + assert_eq!( + store.observe("ghcr.io/org/z:1", &SigningPosture::Checking, 1), + None + ); + assert!( + store.is_empty(), + "no baseline learned from a non-signed posture" + ); +} + +#[test] +fn baseline_survives_an_engine_restart_round_trip() { + // Acceptance: write + boot-replay. A baseline learned before a "restart" replays after it. + let path = temp_path("roundtrip"); + { + let journal = DecisionJournal::open(&path); + let mut store = SigningBaselineStore::new(); + let repo = store + .observe( + "ghcr.io/org/app:1", + &signed("id-a", Some("issuer-a")), + 1_000, + ) + .expect("learned"); + store.persist(&journal, &repo); + } + // A fresh store on the same journal (the "post-restart" engine) replays it. + let reopened = DecisionJournal::open(&path); + let mut restored = SigningBaselineStore::new(); + let count = restored.restore(&reopened); + assert_eq!(count, 1); + let baseline = restored.get("ghcr.io/org/app").expect("restored"); + assert!(baseline.identities.contains("id-a")); + assert!(baseline.issuers.contains("issuer-a")); + assert_eq!(baseline.first_seen_ms, 1_000); + cleanup(&path); +} + +#[test] +fn last_write_wins_on_replay_across_repeated_lines() { + // Compaction writes a full-state line each time; replay must keep the LATEST per repo. + let path = temp_path("lastwrite"); + { + let journal = DecisionJournal::open(&path); + let mut store = SigningBaselineStore::new(); + store.observe("ghcr.io/org/app:1", &signed("id-a", None), 1_000); + store.compact(&journal); // line 1: {id-a} + store.observe("ghcr.io/org/app:1", &signed("id-b", None), 2_000); + store.compact(&journal); // line 2: {id-a, id-b} + } + let reopened = DecisionJournal::open(&path); + let mut restored = SigningBaselineStore::new(); + restored.restore(&reopened); + let baseline = restored.get("ghcr.io/org/app").unwrap(); + assert_eq!(baseline.identities.len(), 2, "the latest, widest line wins"); + cleanup(&path); +} + +#[test] +fn a_baseline_that_matured_while_down_restores_established() { + // Written fresh (established=false) with an old first_seen; the replay stamp is a day + // later, so restore recomputes it as established from wall-clock age. + let path = temp_path("matured"); + let old_line = format!( + r#"{{"at_ms":{},"kind":"signing_baseline","repo":"ghcr.io/org/app","identities":["id-a"],"issuers":[],"first_seen_ms":0,"established":false}}"#, + DAY_MS + 5 + ); + std::fs::write(&path, format!("{old_line}\n")).unwrap(); + let journal = DecisionJournal::open(&path); + let mut store = SigningBaselineStore::new(); + store.restore(&journal); + assert!( + store.get("ghcr.io/org/app").unwrap().established, + "aged past the window by replay time ⇒ established" + ); + cleanup(&path); +} + +#[test] +fn a_forward_compatible_line_with_missing_fields_still_replays() { + // Forward-compat: every field is #[serde(default)], so a line missing optional fields + // (here: no issuers, no established) parses rather than breaking the whole replay. + let path = temp_path("forwardcompat"); + let line = r#"{"at_ms":10,"kind":"signing_baseline","repo":"ghcr.io/org/app","identities":["id-a"],"first_seen_ms":10}"#; + std::fs::write(&path, format!("{line}\n")).unwrap(); + let journal = DecisionJournal::open(&path); + let mut store = SigningBaselineStore::new(); + assert_eq!(store.restore(&journal), 1); + let baseline = store.get("ghcr.io/org/app").unwrap(); + assert!(baseline.identities.contains("id-a")); + assert!(baseline.issuers.is_empty()); + assert!(!baseline.established); + cleanup(&path); +} + +#[test] +fn compaction_keeps_a_live_established_baseline_across_rotation() { + // Acceptance: rotation never drops a live repo's established baseline. Establish one repo, + // then churn the journal past its ~2x rotation window while compacting the store each + // "pass" — the established baseline must still replay afterwards. + let path = temp_path("compaction"); + let journal = DecisionJournal::open(&path); + let mut store = SigningBaselineStore::new(); + // Establish the repo we care about (first_seen old enough to be established). + store.observe( + "ghcr.io/org/keep:1", + &signed("id-keep", Some("issuer-keep")), + 0, + ); + store.observe( + "ghcr.io/org/keep:1", + &signed("id-keep", Some("issuer-keep")), + DAY_MS + 1, + ); + assert!(store.get("ghcr.io/org/keep").unwrap().established); + + // Churn the SHARED journal well past its ~2x rotation window with unrelated decisions, + // compacting the store each pass (the durability discipline the engine loop runs). + let fat = "z".repeat(1000); + // MAX_BYTES is 1 MiB; write well past 3x so multiple rotations occur. + for i in 0..3300 { + journal.record(Decision::Revert { + cut: format!("cut-{i}"), + reason: fat.clone(), + }); + store.compact(&journal); + } + + // A post-restart store must still find the established baseline. + let reopened = DecisionJournal::open(&path); + let mut restored = SigningBaselineStore::new(); + restored.restore(&reopened); + let baseline = restored + .get("ghcr.io/org/keep") + .expect("the live established baseline survives rotation via compaction"); + assert!(baseline.established, "and it is still established"); + assert!(baseline.identities.contains("id-keep")); + cleanup(&path); +} + +#[test] +fn without_compaction_a_baseline_can_age_out_of_rotation() { + // The negative control that proves compaction is load-bearing: the SAME churn, but the + // baseline line is appended only ONCE (no re-compaction). Rotation ages it out. + let path = temp_path("nocompaction"); + let journal = DecisionJournal::open(&path); + let mut store = SigningBaselineStore::new(); + let repo = store + .observe("ghcr.io/org/keep:1", &signed("id-keep", None), 0) + .unwrap(); + store.persist(&journal, &repo); // appended once, then never again + + let fat = "z".repeat(1000); + for i in 0..3300 { + journal.record(Decision::Revert { + cut: format!("cut-{i}"), + reason: fat.clone(), + }); + } + + let reopened = DecisionJournal::open(&path); + let mut restored = SigningBaselineStore::new(); + restored.restore(&reopened); + assert!( + restored.get("ghcr.io/org/keep").is_none(), + "without compaction the single baseline line ages out of the rotation window" + ); + cleanup(&path); +} + +#[test] +fn a_disabled_journal_is_in_memory_only_and_resets_on_restart() { + // Degraded mode is honest: no journal ⇒ the store works in-memory, persist/compact are + // no-ops, and a fresh store (a "restart") starts empty (re-learns from observation). + let journal = DecisionJournal::disabled(); + let mut store = SigningBaselineStore::new(); + let repo = store + .observe("ghcr.io/org/app:1", &signed("id-a", None), 1_000) + .unwrap(); + store.persist(&journal, &repo); + store.compact(&journal); + assert_eq!(store.len(), 1, "in-memory learning still works"); + + let mut post_restart = SigningBaselineStore::new(); + assert_eq!( + post_restart.restore(&journal), + 0, + "a disabled journal restores nothing — honest cold start" + ); + assert!(post_restart.is_empty()); +} + +#[test] +fn the_store_is_bounded_and_evicts_non_established_before_established() { + // Bounded state + defined eviction: at capacity, a non-established (cheap to re-learn) + // entry is dropped before an established one. + let mut store = SigningBaselineStore::with_capacity(2); + // An established baseline (matured) we want to keep. + store.observe("ghcr.io/org/keep:1", &signed("id-keep", None), 0); + store.observe("ghcr.io/org/keep:1", &signed("id-keep", None), DAY_MS + 1); + assert!(store.get("ghcr.io/org/keep").unwrap().established); + // A fresh (non-established) baseline fills the second slot. + store.observe("ghcr.io/org/fresh:1", &signed("id-fresh", None), DAY_MS + 2); + assert_eq!(store.len(), 2); + // A third repo forces eviction — the non-established `fresh` goes, `keep` stays. + store.observe("ghcr.io/org/new:1", &signed("id-new", None), DAY_MS + 3); + assert_eq!(store.len(), 2, "bounded at capacity"); + assert!( + store.get("ghcr.io/org/keep").is_some(), + "established survives eviction" + ); + assert!( + store.get("ghcr.io/org/fresh").is_none(), + "non-established evicted first" + ); + assert!( + store.get("ghcr.io/org/new").is_some(), + "the new repo was admitted" + ); +} + +#[test] +fn repo_key_folds_tags_digests_and_host_variants() { + // The key discipline the whole baseline rests on: host canonicalization + tag/digest strip. + let mut store = SigningBaselineStore::new(); + let id = "id-a"; + // Uppercase host + tag, and a digest under the same repo, must fold to one key. + store.observe("GHCR.IO/org/app:1", &signed(id, None), 1); + store.observe( + "ghcr.io/org/app@sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + &signed(id, None), + 2, + ); + assert_eq!(store.len(), 1); + assert!(store.get("ghcr.io/org/app").is_some()); + // A registry host:port is preserved (only the trailing image tag is stripped). + store.observe("localhost:5000/team/svc:v2", &signed(id, None), 3); + assert!(store.get("localhost:5000/team/svc").is_some()); + // A bare Docker Hub shorthand folds to its repo. + store.observe("postgres:16", &signed(id, None), 4); + assert!(store.get("postgres").is_some()); +} diff --git a/engine/src/policies/signature/mod.rs b/engine/src/policies/signature/mod.rs index 8e83800..bb2afce 100644 --- a/engine/src/policies/signature/mod.rs +++ b/engine/src/policies/signature/mod.rs @@ -255,6 +255,35 @@ fn normalize_registry_host(image: &str) -> String { } } +/// The canonical **repository** key for an image ref (JEF-263, ADR-0020): the registry +/// host canonicalized exactly as the gate does (via [`normalize_registry_host`]), then the +/// mutable `:tag` and/or `@digest` stripped so every tag/digest under one source folds to a +/// single key. This is the TOFU baseline key — signing history is learned per *repository* +/// (`ghcr.io/org/app`), never per tag/digest, so a new tag under an established repo is the +/// same key (not a new baseline, not drift). +/// +/// The tag is stripped only from the LAST path segment, so a registry `host:port` +/// (`localhost:5000/app`) is preserved. A bare Docker Hub shorthand (`postgres:16`) has no +/// host segment and folds to its repo (`postgres`). +pub fn repo_key(image: &str) -> String { + let normalized = normalize_registry_host(image); + // Strip a `@sha256:…` digest first (it can itself contain a colon). + let without_digest = normalized + .split_once('@') + .map(|(before, _)| before) + .unwrap_or(&normalized); + // Strip the `:tag`, but only within the final path segment — a `host:port` earlier in + // the ref is part of the repo identity and must survive. + let cut = match without_digest.rfind('/') { + Some(slash) => match without_digest[slash + 1..].rfind(':') { + Some(colon) => slash + 1 + colon, + None => without_digest.len(), + }, + None => without_digest.rfind(':').unwrap_or(without_digest.len()), + }; + without_digest[..cut].to_string() +} + /// Canonicalize a single registry-host segment: lowercase, drop an explicit /// default port (`:443`/`:80`), and drop a FQDN trailing dot. The port is split /// off the end first, then the trailing dot, so `ghcr.io.:443` reduces to