From 6cceb9a745fe703ae4660c6ad771558d9b9fef8f Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Sun, 7 Jun 2026 18:12:54 -0400 Subject: [PATCH 1/4] fix: correctness bug sweep + hardening tests across crawlers, patch, and vex Fixes a broad set of correctness, security, and atomicity bugs surfaced by a line-by-line review, each paired with regression tests: - crawlers: single-quote TOML parsing, case canonicalization, vendor/project gate ordering, PnP detection, NuGet legacy/local-mode gating, Maven skip- section boundaries, Python layout/metadata fallbacks - patch: path-escape guards (cargo/go redirect, rollback, sidecars), atomic writes for user manifests (go.mod, Cargo.toml, .cargo/config.toml, package.json, pyproject/requirements), bsdiff header validation, copy_tree symlink chmod, cow hardlink is_file guard, lock timeout overflow - vex: single-quote product detection, schema/verify hardening - api/client: fetch_binary auth error classification, token/slug validation - misc: purl subpath strip, manifest deterministic serialization, severity color ordering, cleanup_blobs orphan handling, pth_hook detection fixes Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/socket-patch-cli/src/args.rs | 82 +++++ crates/socket-patch-cli/src/commands/apply.rs | 40 ++- crates/socket-patch-cli/src/commands/get.rs | 95 ++++++ crates/socket-patch-cli/src/commands/list.rs | 32 +- .../socket-patch-cli/src/commands/lock_cli.rs | 39 ++- .../socket-patch-cli/src/commands/remove.rs | 12 +- .../socket-patch-cli/src/commands/repair.rs | 38 ++- .../socket-patch-cli/src/commands/rollback.rs | 307 +++++++++++++++-- crates/socket-patch-cli/src/commands/scan.rs | 134 +++++++- crates/socket-patch-cli/src/commands/setup.rs | 59 +++- .../socket-patch-cli/src/commands/unlock.rs | 42 +++ crates/socket-patch-cli/src/commands/vex.rs | 36 +- .../src/ecosystem_dispatch.rs | 155 +++++++++ crates/socket-patch-cli/src/json_envelope.rs | 85 +++++ crates/socket-patch-cli/src/lib.rs | 28 ++ crates/socket-patch-cli/src/output.rs | 49 ++- .../socket-patch-cli/tests/cli_parse_list.rs | 58 ++++ .../tests/in_process_variant_apply_failure.rs | 141 ++++++++ .../tests/remove_invariants.rs | 69 ++++ .../tests/repair_invariants.rs | 58 ++++ .../tests/setup_invariants.rs | 31 ++ .../tests/setup_matrix_pypi.rs | 70 ++++ crates/socket-patch-core/src/api/client.rs | 190 ++++++++--- .../src/cargo_setup/discover.rs | 189 ++++++++++- .../src/cargo_setup/update.rs | 125 ++++++- .../src/composer_setup/mod.rs | 132 +++++++- .../src/crawlers/cargo_crawler.rs | 116 ++++++- .../src/crawlers/composer_crawler.rs | 157 +++++++-- .../src/crawlers/deno_crawler.rs | 77 +++++ .../src/crawlers/go_crawler.rs | 154 +++++++++ .../src/crawlers/maven_crawler.rs | 197 +++++++++-- .../src/crawlers/npm_crawler.rs | 24 ++ .../src/crawlers/nuget_crawler.rs | 130 ++++++++ .../src/crawlers/pkg_managers.rs | 62 +++- .../src/crawlers/python_crawler.rs | 92 ++++- .../src/crawlers/ruby_crawler.rs | 87 +++++ .../socket-patch-core/src/crawlers/types.rs | 27 ++ crates/socket-patch-core/src/gem_setup/mod.rs | 82 +++++ .../socket-patch-core/src/gem_setup/update.rs | 31 ++ crates/socket-patch-core/src/go_setup/mod.rs | 77 ++++- .../socket-patch-core/src/hash/git_sha256.rs | 98 ++++++ .../src/manifest/operations.rs | 120 ++++++- .../socket-patch-core/src/manifest/schema.rs | 239 ++++++++++++- .../src/package_json/detect.rs | 86 ++++- .../src/package_json/find.rs | 104 +++++- .../src/package_json/update.rs | 121 ++++++- crates/socket-patch-core/src/patch/apply.rs | 123 ++++++- .../socket-patch-core/src/patch/apply_lock.rs | 81 ++++- .../src/patch/cargo_config.rs | 126 ++++++- .../src/patch/cargo_redirect.rs | 232 +++++++++++++ .../socket-patch-core/src/patch/copy_tree.rs | 190 ++++++++++- crates/socket-patch-core/src/patch/diff.rs | 124 ++++++- .../src/patch/go_mod_edit.rs | 115 ++++++- .../src/patch/go_redirect.rs | 315 +++++++++++++++++- crates/socket-patch-core/src/patch/package.rs | 191 ++++++++++- .../socket-patch-core/src/patch/rollback.rs | 101 ++++++ .../src/patch/sidecars/cargo.rs | 94 +++++- .../src/patch/sidecars/nuget.rs | 118 ++++++- .../socket-patch-core/src/pth_hook/detect.rs | 76 ++++- crates/socket-patch-core/src/pth_hook/edit.rs | 266 ++++++++++++++- crates/socket-patch-core/src/pth_hook/mod.rs | 3 +- .../src/utils/cleanup_blobs.rs | 72 +++- .../socket-patch-core/src/utils/env_compat.rs | 64 +++- crates/socket-patch-core/src/utils/fs.rs | 70 ++++ crates/socket-patch-core/src/utils/purl.rs | 94 +++++- .../socket-patch-core/src/utils/telemetry.rs | 187 ++++++++++- crates/socket-patch-core/src/vex/build.rs | 77 +++++ .../src/vex/conformance_tests.rs | 82 +++++ crates/socket-patch-core/src/vex/product.rs | 117 ++++++- crates/socket-patch-core/src/vex/schema.rs | 59 ++++ crates/socket-patch-core/src/vex/time.rs | 42 +++ crates/socket-patch-core/src/vex/verify.rs | 124 +++++++ .../binary_fetch_error_classification_e2e.rs | 138 ++++++++ .../tests/blob_fetcher_edges_e2e.rs | 264 +++++++++++++++ .../tests/crawler_npm_e2e.rs | 42 +++ crates/socket-patch-core/tests/diff_e2e.rs | 25 ++ crates/socket-patch-guard/src/lib.rs | 98 ++++++ 77 files changed, 7773 insertions(+), 314 deletions(-) create mode 100644 crates/socket-patch-core/tests/binary_fetch_error_classification_e2e.rs diff --git a/crates/socket-patch-cli/src/args.rs b/crates/socket-patch-cli/src/args.rs index c49734e..5b9e878 100644 --- a/crates/socket-patch-cli/src/args.rs +++ b/crates/socket-patch-cli/src/args.rs @@ -578,6 +578,88 @@ mod tests { ); } + /// `parse_supported_ecosystem` accepts every name this build compiles in + /// and returns it verbatim. + #[test] + fn parse_supported_ecosystem_accepts_compiled_in_names() { + for e in Ecosystem::all() { + let name = e.cli_name(); + assert_eq!( + parse_supported_ecosystem(name), + Ok(name.to_string()), + "{name:?} is compiled in and must be accepted", + ); + } + } + + /// Unsupported / misspelled ecosystem names are rejected with a message + /// that names the offending token and lists the supported set. + #[test] + fn parse_supported_ecosystem_rejects_unknown_names() { + for bad in ["bogus", "NPM", "py-pi", ""] { + let err = parse_supported_ecosystem(bad) + .expect_err("unsupported ecosystem name must be rejected"); + assert!(err.contains(bad), "error should echo the bad token: {err:?}"); + assert!( + err.contains("supported:"), + "error should list the supported set: {err:?}", + ); + } + } + + /// End-to-end through clap: `--ecosystems` splits on commas, validates each + /// token, and rejects the whole parse if any token is unsupported. + #[test] + #[serial_test::serial] + fn ecosystems_flag_splits_and_validates() { + with_clean_socket_env(|| { + let cli = TestCli::try_parse_from(["socket-patch", "--ecosystems", "npm,pypi"]) + .expect("comma-separated supported ecosystems must parse"); + assert_eq!( + cli.common.ecosystems, + Some(vec!["npm".to_string(), "pypi".to_string()]), + ); + + // One bad token in the list aborts the whole parse. + assert!( + TestCli::try_parse_from(["socket-patch", "--ecosystems", "npm,bogus"]).is_err(), + "an unsupported token must fail the parse", + ); + }); + } + + /// Precedence contract: a CLI value wins over the env var for a string flag. + #[test] + #[serial_test::serial] + fn cli_arg_overrides_env_var() { + with_clean_socket_env(|| { + std::env::set_var("SOCKET_MANIFEST_PATH", "from-env.json"); + let cli = + TestCli::try_parse_from(["socket-patch", "--manifest-path", "from-cli.json"]) + .unwrap(); + assert_eq!(cli.common.manifest_path, "from-cli.json"); + std::env::remove_var("SOCKET_MANIFEST_PATH"); + }); + } + + /// Precedence contract: the env var is honored when no CLI value is given, + /// and the clap-declared default applies when neither is set. + #[test] + #[serial_test::serial] + fn env_var_used_then_default_applies() { + with_clean_socket_env(|| { + std::env::set_var("SOCKET_MANIFEST_PATH", "from-env.json"); + let cli = TestCli::try_parse_from(["socket-patch"]).unwrap(); + assert_eq!(cli.common.manifest_path, "from-env.json"); + std::env::remove_var("SOCKET_MANIFEST_PATH"); + + let cli = TestCli::try_parse_from(["socket-patch"]).unwrap(); + assert_eq!(cli.common.manifest_path, DEFAULT_PATCH_MANIFEST_PATH); + assert_eq!(cli.common.download_mode, "diff"); + assert_eq!(cli.common.cwd, PathBuf::from(".")); + }); + } + /// `apply_env_toggles` mirrors `--debug` / `--no-telemetry` into the env /// vars core code reads directly, and is a no-op when the flags are off. /// `#[serial]` because it mutates process-global env state. diff --git a/crates/socket-patch-cli/src/commands/apply.rs b/crates/socket-patch-cli/src/commands/apply.rs index 858d551..1836f67 100644 --- a/crates/socket-patch-cli/src/commands/apply.rs +++ b/crates/socket-patch-cli/src/commands/apply.rs @@ -1235,6 +1235,25 @@ async fn apply_patches_inner( // hash-mismatch and were skipped above), so this // applies a single variant for them; Maven's coexisting // classifier jars each get patched. + } else { + // A variant that reached apply IS the installed + // distribution, so a failure here is a real apply + // failure — flag it even if a *sibling* variant of the + // same base succeeds (Maven's coexisting classifier + // jars, or any base where `--force` attempts every + // variant). Mirrors the npm branch below and the + // rollback loop, which mark `has_errors` on every failed + // result; without this a partial multi-variant failure + // would leave a `failed` event in the envelope while the + // command still reported `success` / exit 0. + has_errors = true; + if !args.common.silent && !args.common.json { + eprintln!( + "Failed to patch {}: {}", + variant_purl, + result.error.as_deref().unwrap_or("unknown error") + ); + } } results.push(result); } @@ -1242,18 +1261,17 @@ async fn apply_patches_inner( if applied { applied_base_purls.insert(base_purl.clone()); } else { + // Nothing applied for this base. `has_errors` was already set + // per-variant above when a variant was attempted-but-failed; + // set it here too for the no-variant-attempted case so both + // paths fail the command. has_errors = true; - if !args.common.silent && !args.common.json { - if attempted { - // The installed variant was found but its patch could - // not be applied (e.g. a later file mismatched) — a - // genuine apply failure, not a missing package. - eprintln!( - "Failed to patch {base_purl}: the installed variant could not be patched" - ); - } else { - eprintln!("Failed to patch {base_purl}: no matching variant found"); - } + if !attempted && !args.common.silent && !args.common.json { + // No variant matched the installed distribution at all — + // the package on disk isn't any known release variant. + // (Attempted-but-failed variants already printed their own + // per-variant failure line above.) + eprintln!("Failed to patch {base_purl}: no matching variant found"); } } } else { diff --git a/crates/socket-patch-cli/src/commands/get.rs b/crates/socket-patch-cli/src/commands/get.rs index f866a6d..e296341 100644 --- a/crates/socket-patch-cli/src/commands/get.rs +++ b/crates/socket-patch-cli/src/commands/get.rs @@ -103,6 +103,13 @@ pub(crate) fn max_vuln_severity( vulns .values() .max_by_key(|v| severity_rank(&v.severity)) + // `max_by_key` only yields `None` for an empty map; a non-empty + // map of exclusively unrecognized severities (all rank 0) would + // otherwise leak a garbage label like "" or "unknown". Drop it so + // the documented "every entry unrecognized → None" contract holds + // and `patch_event_metadata` omits `severity` rather than emitting + // a meaningless value. + .filter(|v| severity_rank(&v.severity) > 0) .map(|v| v.severity.clone()) } @@ -1928,6 +1935,94 @@ mod tests { assert_eq!(max_vuln_severity(&HashMap::new()), None); } + #[test] + fn max_vuln_severity_returns_none_when_all_unrecognized() { + // Non-empty map but every severity is off-canon (rank 0). Per the + // doc contract this must be `None` — NOT `Some("")`/`Some("unknown")`. + // Regression guard: `max_by_key` alone returns the element for any + // non-empty map, leaking a garbage severity label. + let mut vulns = HashMap::new(); + vulns.insert( + "GHSA-a".into(), + VulnerabilityResponse { + cves: Vec::new(), + summary: String::new(), + severity: "informational".into(), + description: String::new(), + }, + ); + vulns.insert( + "GHSA-b".into(), + VulnerabilityResponse { + cves: Vec::new(), + summary: String::new(), + severity: String::new(), + description: String::new(), + }, + ); + assert_eq!(max_vuln_severity(&vulns), None); + } + + #[test] + fn max_vuln_severity_recognized_wins_over_unrecognized() { + // A single recognized severity alongside unrecognized ones must + // surface — the rank-0 filter only suppresses the all-unrecognized + // case, never a real label. + let mut vulns = HashMap::new(); + vulns.insert( + "GHSA-junk".into(), + VulnerabilityResponse { + cves: Vec::new(), + summary: String::new(), + severity: "unknown".into(), + description: String::new(), + }, + ); + vulns.insert( + "GHSA-real".into(), + VulnerabilityResponse { + cves: Vec::new(), + summary: String::new(), + severity: "low".into(), + description: String::new(), + }, + ); + assert_eq!(max_vuln_severity(&vulns).as_deref(), Some("low")); + } + + #[test] + fn patch_event_metadata_omits_severity_when_all_unrecognized() { + // The consumer-facing contract: a patch whose vulnerabilities all + // carry non-canonical severities must NOT emit a `severity` key + // (it would otherwise be `""`), while still listing the vulns. + let mut vulns = HashMap::new(); + vulns.insert( + "GHSA-aaaa-bbbb-cccc".into(), + VulnerabilityResponse { + cves: vec!["CVE-2024-0001".into()], + summary: "Something".into(), + severity: "informational".into(), + description: String::new(), + }, + ); + let patch = PatchResponse { + uuid: String::new(), + purl: String::new(), + published_at: "ts".into(), + files: HashMap::new(), + vulnerabilities: vulns, + description: "desc".into(), + license: "MIT".into(), + tier: "free".into(), + }; + let meta = patch_event_metadata(&patch); + assert!(meta.as_object().unwrap().get("severity").is_none()); + // The vulnerability itself is still surfaced (with its raw label). + let vulns_out = meta["vulnerabilities"].as_array().unwrap(); + assert_eq!(vulns_out.len(), 1); + assert_eq!(vulns_out[0]["severity"], "informational"); + } + #[test] fn patch_event_metadata_includes_all_keys() { let mut vulns = HashMap::new(); diff --git a/crates/socket-patch-cli/src/commands/list.rs b/crates/socket-patch-cli/src/commands/list.rs index 00abb81..4d3d0e3 100644 --- a/crates/socket-patch-cli/src/commands/list.rs +++ b/crates/socket-patch-cli/src/commands/list.rs @@ -96,15 +96,14 @@ fn emit_error(args: &ListArgs, code: &str, message: String) { pub async fn run(args: ListArgs) -> i32 { let manifest_path = args.common.resolved_manifest_path(); - if tokio::fs::metadata(&manifest_path).await.is_err() { - emit_error( - &args, - "manifest_not_found", - format!("Manifest not found at {}", manifest_path.display()), - ); - return 1; - } - + // `read_manifest` is the single source of truth for the three error + // states: `Ok(None)` (file absent), `Err(InvalidData)` (present but + // unparseable), and any other `Err` (genuine I/O failure). We deliberately + // do NOT stat the path first: a `metadata` pre-check is both redundant and + // wrong — it reports *any* stat failure (e.g. an unreadable parent dir) as + // `manifest_not_found`, masking real I/O errors that owe a + // `manifest_unreadable`, and it opens a TOCTOU window where a file removed + // between the stat and the read lands in the wrong error arm. match read_manifest(&manifest_path).await { Ok(Some(manifest)) => { // Sort by PURL so both the JSON envelope and the human-readable @@ -170,11 +169,16 @@ pub async fn run(args: ListArgs) -> i32 { 0 } Ok(None) => { - // Defensive: `read_manifest` only returns `Ok(None)` for a - // missing file, which the metadata pre-check above already - // turned into `manifest_not_found`. Kept so a future loader - // change can't silently fall through without an envelope. - emit_error(&args, "manifest_invalid", "Invalid manifest".to_string()); + // `read_manifest` returns `Ok(None)` only when the file does not + // exist (its documented contract), so this is the missing-manifest + // path — `manifest_not_found`, NOT `manifest_invalid` (which means + // the file is present but corrupt). See CLI_CONTRACT.md error-code + // table. + emit_error( + &args, + "manifest_not_found", + format!("Manifest not found at {}", manifest_path.display()), + ); 1 } Err(e) => { diff --git a/crates/socket-patch-cli/src/commands/lock_cli.rs b/crates/socket-patch-cli/src/commands/lock_cli.rs index 9735cd9..77a7b67 100644 --- a/crates/socket-patch-cli/src/commands/lock_cli.rs +++ b/crates/socket-patch-cli/src/commands/lock_cli.rs @@ -104,7 +104,14 @@ pub fn acquire_or_emit( match acquire(socket_dir, Duration::ZERO) { Ok(guard) => drop(guard), Err(LockError::Held) => { - let msg = held_message(timeout); + // The probe above is a *non-blocking* try-once + // (`Duration::ZERO`), so report a zero wait. Threading + // the caller's `timeout` here would claim a "(waited …)" + // that never happened — the probe refuses a live holder + // immediately, it does not wait out the budget first. + // `break_probe_held_message` takes no timeout precisely so + // the wrong value can't be passed back in. + let msg = break_probe_held_message(); emit(command, json, silent, dry_run, "lock_held", &msg, Some(socket_dir)); return Err(1); } @@ -192,6 +199,16 @@ pub fn record_lock_broken(env: &mut Envelope, socket_dir: &Path) { env.record(lock_broken_event(socket_dir)); } +/// Contention message for the `--break-lock` pre-acquire probe. That +/// probe is hard-wired to a non-blocking try-once (`Duration::ZERO`), so +/// the message must never claim a wait, regardless of the caller's +/// `--lock-timeout`. Kept timeout-free on purpose: the call site cannot +/// thread the full budget back in and fabricate a "(waited …)" clause +/// for time that was never spent. +fn break_probe_held_message() -> String { + held_message(Duration::ZERO) +} + /// Human-readable description of a `lock_held` contention for the given /// wait budget. A zero budget means the historical non-blocking /// try-once, so we omit the "(waited …)" clause entirely. @@ -492,6 +509,26 @@ mod tests { assert!(!msg.contains("waited"), "zero budget should not claim a wait: {msg}"); } + /// Regression: the `--break-lock` pre-acquire probe is a non-blocking + /// try-once, so its `lock_held` refusal must NEVER claim a wait — even + /// when the caller passes a positive `--lock-timeout`. The earlier + /// code threaded the full `timeout` into the probe's message, so a + /// `--break-lock --lock-timeout 250ms` against a live holder reported + /// `(waited 250ms)` despite refusing immediately. The probe message is + /// now timeout-free by construction; this pins that it carries no wait + /// clause. + #[test] + fn break_probe_held_message_never_claims_a_wait() { + let msg = break_probe_held_message(); + assert!( + !msg.contains("waited"), + "break-lock probe refuses immediately and must not claim a wait: {msg}" + ); + // It is still the same identity sentence the rest of the code + // emits for contention, just without the trailing budget clause. + assert_eq!(msg, held_message(Duration::ZERO)); + } + /// The `--json` failure envelope (previously emitted only via /// `println!`, so untested) has the stable error shape downstream /// consumers pattern-match on: top-level `status: "error"` and diff --git a/crates/socket-patch-cli/src/commands/remove.rs b/crates/socket-patch-cli/src/commands/remove.rs index edb181c..d36ac29 100644 --- a/crates/socket-patch-cli/src/commands/remove.rs +++ b/crates/socket-patch-cli/src/commands/remove.rs @@ -295,8 +295,18 @@ pub async fn run(args: RemoveArgs) -> i32 { // is non-zero so the `rolledBack` count is still reported // even when no blobs happened to be swept (e.g. the removed // patch's afterHash blobs are still referenced elsewhere). + // + // Pushed directly rather than via `env.record`: this is a + // purl-less metadata carrier, not a removed manifest entry. + // The per-purl events above are the authoritative + // patch-removal count, so `summary.removed` must equal the + // number of entries deleted (`removed.len()`) — letting this + // carrier bump `removed` too would double-count, reporting + // e.g. `removed: 2` for a single-patch removal that happened + // to sweep an orphan blob. Consumers read the blob/rollback + // totals from `details`, never from `summary.removed`. if blobs_removed > 0 || rollback_count > 0 { - env.record( + env.events.push( PatchEvent::artifact(PatchAction::Removed).with_details(serde_json::json!({ "blobsRemoved": blobs_removed, "rolledBack": rollback_count, diff --git a/crates/socket-patch-cli/src/commands/repair.rs b/crates/socket-patch-cli/src/commands/repair.rs index 7895f01..b6b0306 100644 --- a/crates/socket-patch-cli/src/commands/repair.rs +++ b/crates/socket-patch-cli/src/commands/repair.rs @@ -166,6 +166,14 @@ pub(crate) async fn repair_inner( let download_mode = DownloadMode::parse(&args.common.download_mode).map_err(|e| e.to_string())?; + // `--silent` ("suppress non-error output") must mute the human-readable + // progress just like `--json` does — otherwise a silent repair still + // floods stdout with "Found N missing", "Downloading…", cleanup + // summaries and "Repair complete.". Gate every informational print on + // both, mirroring `get`/`apply`. (The JSON envelope is emitted by the + // caller, so nothing here depends on `json` alone.) + let quiet = args.common.json || args.common.silent; + let mut downloaded_count = 0usize; let mut download_failed_count = 0usize; let mut blobs_cleaned = 0usize; @@ -193,7 +201,7 @@ pub(crate) async fn repair_inner( if !args.common.offline { if !missing_artifacts.is_empty() { - if !args.common.json { + if !quiet { println!( "Found {} missing {} artifact(s)", missing_artifacts.len(), @@ -202,7 +210,7 @@ pub(crate) async fn repair_inner( } if args.common.dry_run { - if !args.common.json { + if !quiet { println!("\nDry run - would download:"); for id in missing_artifacts.iter().take(10) { println!(" - {}...", &id[..12.min(id.len())]); @@ -212,7 +220,7 @@ pub(crate) async fn repair_inner( } } } else { - if !args.common.json { + if !quiet { println!("\nDownloading missing {}s...", download_mode.as_tag()); } let (client, _) = @@ -226,18 +234,18 @@ pub(crate) async fn repair_inner( fetch_missing_sources(&manifest, &sources, download_mode, &client, None).await; downloaded_count = fetch_result.downloaded; download_failed_count = fetch_result.failed; - if !args.common.json { + if !quiet { println!("{}", format_fetch_result(&fetch_result)); } } - } else if !args.common.json { + } else if !quiet { println!( "All {} artifacts are present locally.", download_mode.as_tag() ); } } else if !missing_artifacts.is_empty() { - if !args.common.json { + if !quiet { println!( "Warning: {} {} artifact(s) are missing (offline mode - not downloading)", missing_artifacts.len(), @@ -250,7 +258,7 @@ pub(crate) async fn repair_inner( println!(" ... and {} more", missing_artifacts.len() - 5); } } - } else if !args.common.json { + } else if !quiet { println!( "All {} artifacts are present locally.", download_mode.as_tag() @@ -259,7 +267,7 @@ pub(crate) async fn repair_inner( // Step 2: Clean up unused artifacts across all three directories. if !args.download_only { - if !args.common.json { + if !quiet { println!(); } match cleanup_unused_blobs(&manifest, &blobs_path, args.common.dry_run).await { @@ -267,7 +275,7 @@ pub(crate) async fn repair_inner( blobs_checked += cleanup_result.blobs_checked; blobs_cleaned += cleanup_result.blobs_removed; bytes_freed += cleanup_result.bytes_freed; - if !args.common.json { + if !quiet { if cleanup_result.blobs_checked == 0 { println!("No blobs directory found, nothing to clean up."); } else if cleanup_result.blobs_removed == 0 { @@ -281,7 +289,7 @@ pub(crate) async fn repair_inner( } } Err(e) => { - if !args.common.json { + if !quiet { eprintln!("Warning: blob cleanup failed: {e}"); } } @@ -293,7 +301,7 @@ pub(crate) async fn repair_inner( blobs_checked += cleanup_result.blobs_checked; blobs_cleaned += cleanup_result.blobs_removed; bytes_freed += cleanup_result.bytes_freed; - if !args.common.json && cleanup_result.blobs_removed > 0 { + if !quiet && cleanup_result.blobs_removed > 0 { println!( "{}", format_cleanup_result(&cleanup_result, args.common.dry_run) @@ -302,7 +310,7 @@ pub(crate) async fn repair_inner( } } Err(e) => { - if !args.common.json { + if !quiet { eprintln!("Warning: diff cleanup failed: {e}"); } } @@ -314,7 +322,7 @@ pub(crate) async fn repair_inner( blobs_checked += cleanup_result.blobs_checked; blobs_cleaned += cleanup_result.blobs_removed; bytes_freed += cleanup_result.bytes_freed; - if !args.common.json && cleanup_result.blobs_removed > 0 { + if !quiet && cleanup_result.blobs_removed > 0 { println!( "{}", format_cleanup_result(&cleanup_result, args.common.dry_run) @@ -323,14 +331,14 @@ pub(crate) async fn repair_inner( } } Err(e) => { - if !args.common.json { + if !quiet { eprintln!("Warning: package cleanup failed: {e}"); } } } } - if !args.common.dry_run && !args.common.json { + if !args.common.dry_run && !quiet { println!("\nRepair complete."); } diff --git a/crates/socket-patch-cli/src/commands/rollback.rs b/crates/socket-patch-cli/src/commands/rollback.rs index aff70b9..862ffc3 100644 --- a/crates/socket-patch-cli/src/commands/rollback.rs +++ b/crates/socket-patch-cli/src/commands/rollback.rs @@ -37,10 +37,11 @@ struct PatchToRollback { patch: PatchRecord, } -// ── local-cargo redirect helpers ───────────────────────────────────────────── -// Local cargo rolls back by dropping the project-local `[patch]` redirect + -// patched copy (no in-place restore, no before-blob). Inert stubs in a build -// without the `cargo` feature. +// ── local-redirect rollback helpers (cargo + go) ───────────────────────────── +// Local cargo/go roll back by dropping the project-local redirect (cargo's +// `[patch]` entry / go's `replace` directive) + the patched copy — no in-place +// restore, no before-blob. Each helper is an inert stub in a build without its +// respective feature. /// True for a cargo PURL in local mode (no `--global` / `--global-prefix`). #[cfg(feature = "cargo")] @@ -51,30 +52,51 @@ fn is_local_cargo(purl: &str, common: &GlobalArgs) -> bool { && Ecosystem::from_purl(purl) == Some(Ecosystem::Cargo) } -/// Copy of `manifest` with local-cargo PURLs removed — used for the -/// before-blob gate, which those PURLs never need (redirect rollback reads no -/// blobs). Avoids blocking an offline redirect rollback on absent blobs. -#[cfg(feature = "cargo")] -fn exclude_local_cargo(manifest: &PatchManifest, common: &GlobalArgs) -> PatchManifest { +/// True for a golang PURL in local mode (no `--global` / `--global-prefix`). +#[cfg(feature = "golang")] +fn is_local_go(purl: &str, common: &GlobalArgs) -> bool { + use socket_patch_core::crawlers::Ecosystem; + !common.global + && common.global_prefix.is_none() + && Ecosystem::from_purl(purl) == Some(Ecosystem::Golang) +} + +/// True when `purl` rolls back by dropping a project-local redirect (cargo or +/// go in local mode) rather than restoring bytes from a before-blob. The +/// before-blob gate uses this to skip those PURLs — they read no blobs, so a +/// missing before-blob must not block (or trigger a needless download for) an +/// offline redirect rollback. +fn is_local_redirect(purl: &str, common: &GlobalArgs) -> bool { + #[cfg(feature = "cargo")] + if is_local_cargo(purl, common) { + return true; + } + #[cfg(feature = "golang")] + if is_local_go(purl, common) { + return true; + } + let _ = (purl, common); + false +} + +/// Copy of `manifest` with local-redirect PURLs (cargo + go) removed — used for +/// the before-blob gate, which those PURLs never need. Avoids blocking an +/// offline redirect rollback on absent blobs. +fn exclude_local_redirects(manifest: &PatchManifest, common: &GlobalArgs) -> PatchManifest { PatchManifest { patches: manifest .patches .iter() - .filter(|(purl, _)| !is_local_cargo(purl, common)) + .filter(|(purl, _)| !is_local_redirect(purl, common)) .map(|(k, v)| (k.clone(), v.clone())) .collect(), setup: manifest.setup.clone(), } } -#[cfg(not(feature = "cargo"))] -fn exclude_local_cargo(manifest: &PatchManifest, _common: &GlobalArgs) -> PatchManifest { - manifest.clone() -} - /// Roll back a local-cargo redirect (drop the `[patch]` entry + copy), or -/// `None` if `purl` isn't a local-cargo target (caller falls back to in-place -/// rollback). +/// `None` if `purl` isn't a local-cargo target (caller falls back to the next +/// backend, ultimately in-place rollback). #[cfg(feature = "cargo")] async fn try_rollback_local_cargo( purl: &str, @@ -119,6 +141,50 @@ async fn try_rollback_local_cargo( None } +/// Roll back a local-go redirect (drop the `go.mod` `replace` directive + the +/// patched copy under `.socket/go-patches/`), or `None` if `purl` isn't a +/// local-go target (caller falls back to in-place rollback). The module cache +/// is left pristine by the redirect, so — exactly like cargo — there is no +/// before-blob to restore; mirrors apply's `try_local_go_apply`. Go has no +/// `vendor/` fallthrough (apply always redirects local go), so there is no +/// vendored discriminator here. +#[cfg(feature = "golang")] +async fn try_rollback_local_go( + purl: &str, + pkg_path: &Path, + patch: &PatchRecord, + common: &GlobalArgs, +) -> Option { + use socket_patch_core::patch::go_redirect::remove_go_redirect; + if !is_local_go(purl, common) { + return None; + } + let mut result = RollbackResult { + package_key: purl.to_string(), + package_path: pkg_path.display().to_string(), + success: true, + files_verified: Vec::new(), + files_rolled_back: patch.files.keys().cloned().collect(), + error: None, + }; + if let Err(e) = remove_go_redirect(purl, &common.cwd, common.dry_run).await { + result.success = false; + result.files_rolled_back.clear(); + result.error = Some(e.to_string()); + } + Some(result) +} + +#[cfg(not(feature = "golang"))] +async fn try_rollback_local_go( + _purl: &str, + _pkg_path: &Path, + _patch: &PatchRecord, + _common: &GlobalArgs, +) -> Option { + None +} + fn find_patches_to_rollback( manifest: &PatchManifest, identifier: Option<&str>, @@ -500,11 +566,11 @@ async fn rollback_patches_inner( setup: None, }; - // Check for missing beforeHash blobs. Local-cargo PURLs are excluded: - // their rollback just drops the `[patch]` redirect + copy and reads no - // blobs, so a missing before-blob must not block an offline redirect - // rollback. - let gate_manifest = exclude_local_cargo(&filtered_manifest, &args.common); + // Check for missing beforeHash blobs. Local-redirect PURLs (cargo + go) + // are excluded: their rollback just drops the project-local redirect + copy + // and reads no blobs, so a missing before-blob must not block an offline + // redirect rollback. + let gate_manifest = exclude_local_redirects(&filtered_manifest, &args.common); let missing_blobs = get_missing_before_blobs(&gate_manifest, &blobs_path).await; if !missing_blobs.is_empty() { if args.common.offline { @@ -628,21 +694,25 @@ async fn rollback_patches_inner( None => continue, }; - // Local cargo drops the project-local redirect; everything else — - // npm/pypi, and cargo under --global — restores in place. In a - // build without the `cargo` feature this is an inert `None`. + // Local cargo/go drop the project-local redirect; everything else — + // npm/pypi, and cargo/go under --global/--global-prefix — restores + // in place. Without the respective feature the `try_rollback_local_*` + // helpers are inert `None`s. let result = match try_rollback_local_cargo(purl, pkg_path, patch, &args.common).await { Some(r) => r, - None => { - rollback_package_patch( - purl, - pkg_path, - &patch.files, - &blobs_path, - args.common.dry_run, - ) - .await - } + None => match try_rollback_local_go(purl, pkg_path, patch, &args.common).await { + Some(r) => r, + None => { + rollback_package_patch( + purl, + pkg_path, + &patch.files, + &blobs_path, + args.common.dry_run, + ) + .await + } + }, }; if !result.success { @@ -955,7 +1025,7 @@ mod tests { } /// Regression: a local-cargo before-hash that is absent on disk must NOT - /// count as missing once the manifest is run through `exclude_local_cargo` + /// count as missing once the manifest is run through `exclude_local_redirects` /// — for the initial gate or the post-download re-check. Before the fix /// the re-check used the full filtered manifest, so a present-npm + /// missing-cargo manifest still reported the cargo blob missing and @@ -991,11 +1061,174 @@ mod tests { // Gate manifest: the local-cargo PURL is excluded, so its before-blob // is not counted as missing. With the npm blob present, the gate (and // the re-check that now reuses it) reports nothing missing. - let gate = exclude_local_cargo(&manifest, &common); + let gate = exclude_local_redirects(&manifest, &common); let gate_missing = get_missing_before_blobs(&gate, blobs).await; assert!( gate_missing.is_empty(), "gate must exclude local-cargo before-blobs, got {gate_missing:?}" ); } + + /// Regression: local-GO redirects must be excluded from the before-blob + /// gate exactly like local-cargo. A go redirect drops the `go.mod` + /// `replace` directive + the patched copy and reads no before-blob, so a + /// missing before-blob must not abort (nor trigger a needless download for) + /// an offline local-go rollback. Before the fix only cargo was excluded, so + /// a local-go patch with an absent before-blob aborted the whole rollback + /// under `--offline`. + #[cfg(feature = "golang")] + #[tokio::test] + async fn gate_manifest_excludes_local_go_before_blobs_from_missing_check() { + let mut patches = HashMap::new(); + patches.insert( + "pkg:golang/github.com%2Fpkg%2Ferrors@0.9.1".to_string(), + record_with_file("uuid-go", "errors.go", "go_before"), + ); + patches.insert( + "pkg:npm/foo@1.0.0".to_string(), + record_with_file("uuid-npm", "index.js", "npm_before"), + ); + let manifest = PatchManifest { patches, setup: None }; + + // Local mode (no --global / --global-prefix). + let common = crate::args::GlobalArgs::default(); + assert!(!common.global && common.global_prefix.is_none()); + + // Blobs dir holds only the npm before-blob; the go one is absent. + let tmp = tempfile::tempdir().unwrap(); + let blobs = tmp.path(); + tokio::fs::write(blobs.join("npm_before"), b"x").await.unwrap(); + + // Full manifest: the go before-blob shows up as missing — exactly what + // the buggy (cargo-only) gate left in, spuriously aborting rollback. + let full_missing = get_missing_before_blobs(&manifest, blobs).await; + assert!(full_missing.contains("go_before")); + + // Gate manifest: the local-go PURL is excluded, so its before-blob is + // not counted as missing. With the npm blob present, the gate reports + // nothing missing. + let gate = exclude_local_redirects(&manifest, &common); + let gate_missing = get_missing_before_blobs(&gate, blobs).await; + assert!( + gate_missing.is_empty(), + "gate must exclude local-go before-blobs, got {gate_missing:?}" + ); + + // And `is_local_redirect` must classify the go PURL as a redirect in + // local mode but a global PURL as in-place (gate must keep the latter). + assert!(is_local_redirect("pkg:golang/github.com%2Fpkg%2Ferrors@0.9.1", &common)); + let global = crate::args::GlobalArgs { + global: true, + ..crate::args::GlobalArgs::default() + }; + assert!(!is_local_redirect("pkg:golang/github.com%2Fpkg%2Ferrors@0.9.1", &global)); + } + + /// Regression: rolling back a local-GO patch must DROP the project-local + /// redirect (the `go.mod` `replace` directive + the patched copy under + /// `.socket/go-patches/`), not fall through to in-place rollback. + /// + /// Before the fix, `rollback` only had a cargo redirect backend; a go PURL + /// fell through to `rollback_package_patch` against the pristine module + /// cache, every file verified `AlreadyOriginal`, and the redirect was left + /// active — a silent no-op that reported "already original" while the build + /// kept using the patched copy. + #[cfg(feature = "golang")] + #[tokio::test] + async fn try_rollback_local_go_drops_redirect_and_copy() { + use socket_patch_core::patch::go_mod_edit::{ + ensure_replace_entry, read_replace_entries, + }; + + const MODULE: &str = "github.com/foo/bar"; + const VERSION: &str = "v1.4.2"; + const PURL: &str = "pkg:golang/github.com/foo/bar@v1.4.2"; + + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + // A go.mod with a require directive (NOT socket-owned) plus the + // socket-owned replace directive a prior apply would have written. + tokio::fs::write( + root.join("go.mod"), + "module myproj\n\ngo 1.21\n\nrequire github.com/foo/bar v1.4.2\n", + ) + .await + .unwrap(); + let changed = ensure_replace_entry(root, MODULE, VERSION, false) + .await + .unwrap(); + assert!(changed, "fixture must install a socket-owned replace"); + + // The patched copy the redirect points at. + let copy_dir = root.join(".socket/go-patches/github.com/foo/bar@v1.4.2"); + tokio::fs::create_dir_all(©_dir).await.unwrap(); + tokio::fs::write(copy_dir.join("errors.go"), b"// patched\n") + .await + .unwrap(); + + // Sanity: the redirect is in place before rollback. + assert!(read_replace_entries(root) + .await + .iter() + .any(|e| e.module == MODULE && e.socket_owned)); + + let patch = record_with_file("uuid-go", "errors.go", "go_before"); + let common = crate::args::GlobalArgs { + cwd: root.to_path_buf(), + ..crate::args::GlobalArgs::default() + }; + + // `pkg_path` is the (unused for go) pristine module-cache dir. + let result = try_rollback_local_go(PURL, root, &patch, &common) + .await + .expect("go PURL in local mode must be handled by the go backend"); + + assert!(result.success, "rollback failed: {:?}", result.error); + assert!( + result.files_rolled_back.contains(&"errors.go".to_string()), + "the patched file must be reported rolled back, got {:?}", + result.files_rolled_back + ); + + // The socket-owned replace directive is gone... + assert!( + read_replace_entries(root) + .await + .iter() + .all(|e| !(e.module == MODULE && e.socket_owned)), + "socket-owned replace directive must be dropped" + ); + // ...the require directive (user-authored) survives... + assert!(tokio::fs::read_to_string(root.join("go.mod")) + .await + .unwrap() + .contains("require github.com/foo/bar v1.4.2")); + // ...and the patched copy is removed. + assert!( + !copy_dir.exists(), + "patched copy under .socket/go-patches must be removed" + ); + } + + /// A go PURL under `--global` is an in-place module-cache rollback, NOT a + /// redirect — `try_rollback_local_go` must decline it so the caller falls + /// through to `rollback_package_patch`. + #[cfg(feature = "golang")] + #[tokio::test] + async fn try_rollback_local_go_declines_global() { + let patch = record_with_file("uuid-go", "errors.go", "go_before"); + let global = crate::args::GlobalArgs { + global: true, + ..crate::args::GlobalArgs::default() + }; + let result = try_rollback_local_go( + "pkg:golang/github.com/foo/bar@v1.4.2", + Path::new("/nonexistent"), + &patch, + &global, + ) + .await; + assert!(result.is_none(), "global go must not use the redirect backend"); + } } diff --git a/crates/socket-patch-cli/src/commands/scan.rs b/crates/socket-patch-cli/src/commands/scan.rs index 6ad5164..d3430b0 100644 --- a/crates/socket-patch-cli/src/commands/scan.rs +++ b/crates/socket-patch-cli/src/commands/scan.rs @@ -149,11 +149,21 @@ async fn preview_apply_gc( socket_dir: &Path, scanned_purls: &HashSet, ) -> GcSummary { - let manifest = match read_manifest(manifest_path).await { + let mut manifest = match read_manifest(manifest_path).await { Ok(Some(m)) => m, _ => return GcSummary::default(), }; let prunable = detect_prunable(&manifest, scanned_purls); + // Mirror `run_apply_gc`: drop the prunable entries from the manifest + // *before* computing orphans (no write — this is the preview). The + // cleanup helpers derive the "referenced" blob/archive set from the + // manifest they're handed, so leaving the prunable entries in place + // would keep their blobs marked as used and the preview would + // under-report `orphan*`/`bytesReclaimable` relative to what the real + // `--prune`/`--sync` run actually frees. + for purl in &prunable { + manifest.patches.remove(purl); + } run_gc(&manifest, prunable, socket_dir, /*dry_run=*/true).await } @@ -1468,6 +1478,128 @@ mod tests { assert_eq!(out.len(), 2, "all variants of a gone package should prune"); } + // ---- preview_apply_gc / run_apply_gc parity ---------------------------- + // The dry-run preview MUST report the same orphan blobs/archives the real + // (wet) prune would remove. Both delete the prunable manifest entries + // first, then sweep; the cleanup helpers derive the "still referenced" + // blob set from the manifest they're handed, so a preview that swept + // against the un-pruned manifest would keep the prunable entries' blobs + // marked "used" and under-report `orphan*`/`bytesReclaimable`. + + /// Write a manifest holding a single entry that references one afterHash + /// blob, plant that blob on disk, and return `(manifest_path, socket_dir, + /// blob_path)`. + fn seed_manifest_with_blob( + tmp: &std::path::Path, + purl: &str, + after_hash: &str, + ) -> (std::path::PathBuf, std::path::PathBuf, std::path::PathBuf) { + let socket_dir = tmp.join(".socket"); + let blobs_dir = socket_dir.join("blobs"); + std::fs::create_dir_all(&blobs_dir).unwrap(); + let blob_path = blobs_dir.join(after_hash); + // Non-trivial size so `bytesReclaimable`/`bytesFreed` is observably > 0. + std::fs::write(&blob_path, vec![0u8; 64]).unwrap(); + + let manifest_path = socket_dir.join("manifest.json"); + let manifest = serde_json::json!({ + "patches": { + purl: { + "uuid": "11111111-1111-4111-8111-111111111111", + "exportedAt": "2024-01-01T00:00:00Z", + "files": { + "package/index.js": { + "beforeHash": "0".repeat(64), + "afterHash": after_hash, + } + }, + "vulnerabilities": {}, + "description": "seed", + "license": "MIT", + "tier": "free", + } + } + }); + std::fs::write( + &manifest_path, + serde_json::to_string_pretty(&manifest).unwrap(), + ) + .unwrap(); + (manifest_path, socket_dir, blob_path) + } + + #[tokio::test] + async fn preview_apply_gc_reports_blobs_of_prunable_entry() { + // The package is not installed (empty scan), so its entry is prunable + // and its only blob is reclaimable. A correct PREVIEW must count that + // blob even though it is still referenced by the not-yet-pruned entry. + let tmp = tempfile::tempdir().unwrap(); + let after_hash = "a".repeat(64); + let (manifest_path, socket_dir, blob_path) = + seed_manifest_with_blob(tmp.path(), "pkg:npm/gone@1.0.0", &after_hash); + + let scanned: HashSet = HashSet::new(); + let preview = preview_apply_gc(&manifest_path, &socket_dir, &scanned).await; + + assert_eq!( + preview.pruned, + vec!["pkg:npm/gone@1.0.0".to_string()], + "preview must list the uninstalled entry as prunable" + ); + assert_eq!( + preview.blobs.blobs_removed, 1, + "preview must count the prunable entry's blob as an orphan \ + (regression: it was masked because the entry still referenced it)" + ); + assert!( + preview.total_bytes() > 0, + "bytesReclaimable must be > 0 when an orphan blob would be freed" + ); + // Preview is non-mutating: blob and manifest untouched. + assert!(blob_path.exists(), "dry-run preview must not delete the blob"); + let m = read_manifest(&manifest_path).await.unwrap().unwrap(); + assert!( + m.patches.contains_key("pkg:npm/gone@1.0.0"), + "dry-run preview must not prune the manifest entry" + ); + } + + #[tokio::test] + async fn preview_and_apply_gc_agree_on_orphan_counts() { + // The preview's reclaimable counts must equal what the wet run frees. + let after_hash = "b".repeat(64); + + let tmp_preview = tempfile::tempdir().unwrap(); + let (mp_p, sd_p, blob_p) = + seed_manifest_with_blob(tmp_preview.path(), "pkg:npm/gone@1.0.0", &after_hash); + let scanned: HashSet = HashSet::new(); + let preview = preview_apply_gc(&mp_p, &sd_p, &scanned).await; + assert!(blob_p.exists(), "preview must not mutate"); + + let tmp_wet = tempfile::tempdir().unwrap(); + let (mp_w, sd_w, blob_w) = + seed_manifest_with_blob(tmp_wet.path(), "pkg:npm/gone@1.0.0", &after_hash); + let wet = run_apply_gc(&mp_w, &sd_w, &scanned).await; + + assert_eq!( + preview.blobs.blobs_removed, wet.blobs.blobs_removed, + "preview and wet run must agree on the orphan-blob count" + ); + assert_eq!( + preview.total_bytes(), + wet.total_bytes(), + "preview and wet run must agree on reclaimable bytes" + ); + assert_eq!(preview.pruned, wet.pruned, "prunable set must match"); + // The wet run actually removed the blob and pruned the entry. + assert!(!blob_w.exists(), "wet run must delete the orphan blob"); + let m = read_manifest(&mp_w).await.unwrap().unwrap(); + assert!( + !m.patches.contains_key("pkg:npm/gone@1.0.0"), + "wet run must prune the entry" + ); + } + // ---- collect_vuln_ids -------------------------------------------------- /// Build a single-patch package whose patch carries the given CVE and diff --git a/crates/socket-patch-cli/src/commands/setup.rs b/crates/socket-patch-cli/src/commands/setup.rs index 69686fe..2170767 100644 --- a/crates/socket-patch-cli/src/commands/setup.rs +++ b/crates/socket-patch-cli/src/commands/setup.rs @@ -19,8 +19,8 @@ use socket_patch_core::package_json::update::{ UpdateStatus, }; use socket_patch_core::pth_hook::{ - add_hook_dependency, deps_contain_hook, detect_python_pm, remove_hook_dependency, ManifestKind, - PthEditResult, PthStatus, PythonPackageManager, + add_hook_dependency, deps_contain_hook, detect_python_pm, pyproject_contains_hook, + remove_hook_dependency, ManifestKind, PthEditResult, PthStatus, PythonPackageManager, }; use socket_patch_core::crawlers::CrawlerOptions; use socket_patch_core::manifest::operations::{read_manifest, write_manifest}; @@ -156,19 +156,27 @@ async fn discover(args: &SetupArgs, excludes: &[String]) -> Vec i32 { +/// Emit the shared "nothing found" result and exit code. `counts` carries the +/// per-command zero-valued summary fields (`check` → configured/needs/errors, +/// `remove` → removed/notConfigured/errors) so the `no_files` envelope keeps the +/// documented shape (CLI_CONTRACT "Setup command contract") instead of dropping +/// them — matching what the plain `setup` `no_files` path already emits. +fn report_no_files(args: &SetupArgs, status: &str, counts: &[(&str, i64)]) -> i32 { if args.common.json { + // `serde_json::Map` preserves insertion order (the crate enables + // `preserve_order`), so status → counts → files comes out in that order. + let mut map = serde_json::Map::new(); + map.insert("status".to_string(), serde_json::json!(status)); + for (key, value) in counts { + map.insert((*key).to_string(), serde_json::json!(value)); + } + map.insert("files".to_string(), serde_json::json!([])); println!( "{}", - serde_json::to_string_pretty(&serde_json::json!({ - "status": status, - "files": [], - })) - .unwrap() + serde_json::to_string_pretty(&serde_json::Value::Object(map)).unwrap() ); } else { - println!("No package.json or Python project found"); + println!("No package.json, Python, Cargo, Go, Bundler, or Composer project found"); } 0 } @@ -323,9 +331,9 @@ pub(crate) async fn configured_ecosystems( // (e.g. `vex --ecosystems cargo` must still see a set-up python project). if is_python_project(&common.cwd).await { let pm = detect_python_pm(&common.cwd).await; - for (path, _) in choose_python_manifests(&common.cwd, pm).await { + for (path, kind) in choose_python_manifests(&common.cwd, pm).await { if let Ok(content) = tokio::fs::read_to_string(&path).await { - if deps_contain_hook(&content) { + if manifest_contains_hook(kind, &content) { set.insert(Ecosystem::Pypi); break; } @@ -386,6 +394,19 @@ const ECO_COMPOSER: &[&str] = &["composer", "php"]; // Python (.pth hook) helpers // ───────────────────────────────────────────────────────────────────────── +/// Is the hook dependency present in a Python manifest's content? Picks the +/// right detector for the manifest kind: `pyproject.toml` needs the *structural* +/// probe ([`pyproject_contains_hook`]) because the classic-Poetry form +/// (`socket-patch = { extras = ["hook"] }`) has no literal `socket-patch[hook]` +/// substring, so the textual probe would mis-report a configured project; +/// `requirements.txt` uses the textual line probe. +fn manifest_contains_hook(kind: ManifestKind, content: &str) -> bool { + match kind { + ManifestKind::Pyproject => pyproject_contains_hook(content), + ManifestKind::Requirements => deps_contain_hook(content), + } +} + /// A Python manifest `setup` will edit, plus the resolved package manager. struct PythonPlan { pm: PythonPackageManager, @@ -1290,7 +1311,7 @@ async fn run_check(args: &SetupArgs) -> i32 { for (path, kind) in &plan.manifests { let (state, err) = match tokio::fs::read_to_string(path).await { Ok(content) => { - if deps_contain_hook(&content) { + if manifest_contains_hook(*kind, &content) { (CheckState::Configured, None) } else { (CheckState::NeedsConfiguration, None) @@ -1319,7 +1340,11 @@ async fn run_check(args: &SetupArgs) -> i32 { append_patch_consistency_entries(&args.common, &mut entries).await; if entries.is_empty() { - return report_no_files(args, "no_files"); + return report_no_files( + args, + "no_files", + &[("configured", 0), ("needsConfiguration", 0), ("errors", 0)], + ); } let configured = entries.iter().filter(|(_, _, s, _)| *s == CheckState::Configured).count(); @@ -1423,7 +1448,11 @@ async fn run_remove(args: &SetupArgs) -> i32 { && !gem_preview.present && !composer_preview.present { - return report_no_files(args, "no_files"); + return report_no_files( + args, + "no_files", + &[("removed", 0), ("notConfigured", 0), ("errors", 0)], + ); } let cargo_present = cargo_preview.present; let go_present = go_preview.present; diff --git a/crates/socket-patch-cli/src/commands/unlock.rs b/crates/socket-patch-cli/src/commands/unlock.rs index e53c121..938fd38 100644 --- a/crates/socket-patch-cli/src/commands/unlock.rs +++ b/crates/socket-patch-cli/src/commands/unlock.rs @@ -240,6 +240,28 @@ mod tests { assert_eq!(code, 0); } + /// A stale lock *file* left on disk by a crashed run — with **no** + /// live OS holder — must read as `free` (exit 0), and a plain probe + /// must leave that file in place. Guards against a regression where + /// the verdict keys off `apply.lock` merely *existing* rather than + /// off a live advisory lock. (The e2e suite proves this via a + /// release-then-reprobe; this pins it at the unit level too.) + #[tokio::test] + async fn run_reports_free_when_stale_lock_file_present_but_not_held() { + let dir = tempfile::tempdir().unwrap(); + let socket_dir = dir.path().join(".socket"); + std::fs::create_dir_all(&socket_dir).unwrap(); + // Leftover file, but nobody holds the OS lock. + std::fs::write(socket_dir.join("apply.lock"), b"").unwrap(); + + let code = run(args_in(dir.path(), false)).await; + assert_eq!(code, 0, "an unheld leftover lock file must read as free"); + assert!( + socket_dir.join("apply.lock").is_file(), + "a plain (no --release) probe must not delete the file" + ); + } + /// Active holder (via core `acquire`) → `unlock` reports /// `held`, exits 1, and the file remains on disk. #[tokio::test] @@ -295,6 +317,26 @@ mod tests { ); } + /// `--release` against a stale (unheld) leftover removes it and + /// exits 0 — the recovery path. Distinct from + /// `run_deletes_lock_file_when_release_and_free` only in intent + /// (post-crash leftover), but kept as a named guard so the + /// stale-file recovery contract is explicit. + #[tokio::test] + async fn run_release_removes_stale_unheld_lock_file() { + let dir = tempfile::tempdir().unwrap(); + let socket_dir = dir.path().join(".socket"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::write(socket_dir.join("apply.lock"), b"crashed-run-leftover").unwrap(); + + let code = run(args_in(dir.path(), true)).await; + assert_eq!(code, 0); + assert!( + !socket_dir.join("apply.lock").exists(), + "--release must remove an unheld stale lock file" + ); + } + /// `--release` against a HELD lock refuses (exit 1), file stays. #[tokio::test] async fn run_refuses_release_when_held() { diff --git a/crates/socket-patch-cli/src/commands/vex.rs b/crates/socket-patch-cli/src/commands/vex.rs index 6ceacf6..575f587 100644 --- a/crates/socket-patch-cli/src/commands/vex.rs +++ b/crates/socket-patch-cli/src/commands/vex.rs @@ -239,13 +239,6 @@ pub async fn run(args: VexArgs) -> i32 { } } -/// Core VEX pipeline shared by the standalone `vex` command and the -/// embedded `apply`/`scan` `--vex` paths: resolve the product, verify the -/// manifest against disk (unless `no_verify`), build the OpenVEX document, -/// serialize, write (or print to stdout when `output` is `None`), and fire -/// telemetry. Returns a [`VexWriteSummary`] on success or a structured -/// [`VexGenError`] (with a stable code) on failure. All `track_vex_*` -/// telemetry is fired here so every caller reports consistently. /// Map a `setup.manual` entry to an `Ecosystem`. Accepts the canonical /// `cli_name` plus the friendly aliases `setup --exclude`/`--ecosystems` accept /// (`go`/`golang`, `python`/`pypi`, `ruby`/`gem`, `php`/`composer`). Names for @@ -275,6 +268,13 @@ fn ecosystem_from_manual_name(name: &str) -> Option { } } +/// Core VEX pipeline shared by the standalone `vex` command and the +/// embedded `apply`/`scan` `--vex` paths: resolve the product, verify the +/// manifest against disk (unless `no_verify`), build the OpenVEX document, +/// serialize, write (or print to stdout when `output` is `None`), and fire +/// telemetry. Returns a [`VexWriteSummary`] on success or a structured +/// [`VexGenError`] (with a stable code) on failure. All `track_vex_*` +/// telemetry is fired here so every caller reports consistently. pub(crate) async fn generate_vex( common: &GlobalArgs, params: &VexBuildParams, @@ -601,6 +601,28 @@ mod tests { assert_eq!(ecosystem_from_manual_name("deno"), Some(Ecosystem::Deno)); } + // Property 7 completeness, the reverse direction of the test above and + // future-proof: every ecosystem the build can classify a PURL for (i.e. + // every `Ecosystem::all()` variant) MUST round-trip through its canonical + // `cli_name` back to itself via `ecosystem_from_manual_name`. Otherwise a + // `manual`-declared patch for that ecosystem would be silently dropped from + // the VEX doc by the `retain` in `generate_vex`. Iterating `all()` (rather + // than hard-coding names) means adding a new ecosystem without wiring up its + // `manual` alias fails this test instead of shipping a silent drop. + #[test] + fn every_compiled_ecosystem_is_declarable_manual_via_cli_name() { + for &e in Ecosystem::all() { + assert_eq!( + ecosystem_from_manual_name(e.cli_name()), + Some(e), + "ecosystem {:?} (cli_name {:?}) is not reachable via ecosystem_from_manual_name — \ + its `manual`-declared patches would be silently dropped from VEX", + e, + e.cli_name(), + ); + } + } + #[derive(Parser)] struct Wrap { #[command(subcommand)] diff --git a/crates/socket-patch-cli/src/ecosystem_dispatch.rs b/crates/socket-patch-cli/src/ecosystem_dispatch.rs index b7feace..a2d6ade 100644 --- a/crates/socket-patch-cli/src/ecosystem_dispatch.rs +++ b/crates/socket-patch-cli/src/ecosystem_dispatch.rs @@ -880,4 +880,159 @@ mod tests { let map = partition_purls(&purls, Some(allowed.as_slice())); assert!(map.is_empty()); } + + // ---- dispatch_find orchestration (end-to-end via real crawlers) ------ + // + // The pure merge/override helpers above are covered in isolation. These + // exercise the full `dispatch_find` wiring — discover-paths → find_by_purls + // → unified `purl -> path` map — through the real npm crawler against a + // temp `node_modules`, so a regression in the macro plumbing (wrong + // crawler/path method, dropped result, swapped merge) is caught. + + use std::io::Write as _; + + /// Lay down `node_modules//package.json` under `root` with the + /// given version, returning the package directory the crawler should + /// resolve the PURL to. + fn write_npm_package(root: &std::path::Path, name: &str, version: &str) -> PathBuf { + let pkg_dir = root.join("node_modules").join(name); + std::fs::create_dir_all(&pkg_dir).unwrap(); + let mut f = std::fs::File::create(pkg_dir.join("package.json")).unwrap(); + write!(f, r#"{{"name":"{name}","version":"{version}"}}"#).unwrap(); + pkg_dir + } + + fn local_options(cwd: PathBuf) -> CrawlerOptions { + CrawlerOptions { + cwd, + global: false, + global_prefix: None, + batch_size: 100, + } + } + + #[tokio::test] + async fn find_packages_for_purls_maps_npm_purl_to_install_dir() { + let tmp = tempfile::tempdir().unwrap(); + let pkg_dir = write_npm_package(tmp.path(), "foo", "1.0.0"); + + let partitioned = partition_purls(&["pkg:npm/foo@1.0.0".to_string()], None); + let out = + find_packages_for_purls(&partitioned, &local_options(tmp.path().to_path_buf()), true) + .await; + + // The unified map must key the result by the exact PURL handed in + // (npm = passthrough + first-wins) and point at the install dir. + assert_eq!(out.get("pkg:npm/foo@1.0.0"), Some(&pkg_dir)); + } + + #[tokio::test] + async fn find_packages_for_purls_skips_version_mismatch() { + // The crawler only matches an installed dir whose version equals the + // PURL's; a mismatched version must yield no mapping (guards against + // the dispatch returning a path for the wrong release). + let tmp = tempfile::tempdir().unwrap(); + write_npm_package(tmp.path(), "foo", "2.0.0"); + + let partitioned = partition_purls(&["pkg:npm/foo@1.0.0".to_string()], None); + let out = + find_packages_for_purls(&partitioned, &local_options(tmp.path().to_path_buf()), true) + .await; + assert!(out.is_empty()); + } + + #[tokio::test] + async fn find_packages_for_rollback_keeps_full_npm_key() { + // Non-variant ecosystems use `merge_first_wins` even on the rollback + // path, so a qualified npm PURL must round-trip under its exact key + // (a regression that routed npm through `merge_qualified` would drop + // it, since the crawler echoes the verbatim PURL back). + let tmp = tempfile::tempdir().unwrap(); + let pkg_dir = write_npm_package(tmp.path(), "foo", "1.0.0"); + + let qualified = "pkg:npm/foo@1.0.0?vcs_url=git@github.com".to_string(); + let partitioned = partition_purls(std::slice::from_ref(&qualified), None); + let out = + find_packages_for_rollback(&partitioned, &local_options(tmp.path().to_path_buf()), true) + .await; + assert_eq!(out.get(&qualified), Some(&pkg_dir)); + } + + #[tokio::test] + async fn dispatch_find_empty_partition_yields_empty_map() { + let tmp = tempfile::tempdir().unwrap(); + let empty: HashMap> = HashMap::new(); + let opts = local_options(tmp.path().to_path_buf()); + assert!(find_packages_for_purls(&empty, &opts, true).await.is_empty()); + assert!(find_packages_for_rollback(&empty, &opts, true) + .await + .is_empty()); + } + + // ---- experimental Maven/NuGet runtime gates -------------------------- + // + // `crawl_all_ecosystems` only walks Maven / NuGet when the operator has + // opted in via `SOCKET_EXPERIMENTAL_*`. The gate's observable effect is + // whether the ecosystem appears in the returned per-ecosystem `counts` + // map at all: a crawled-but-empty ecosystem gets a `0` entry; a gated-off + // one gets no entry. That distinction lets us test the gate without a + // real Maven repo / NuGet cache fixture. + + #[cfg(feature = "maven")] + #[tokio::test] + #[serial_test::serial(experimental_gate_env)] + async fn crawl_all_gates_maven_on_runtime_flag() { + let tmp = tempfile::tempdir().unwrap(); + let opts = local_options(tmp.path().to_path_buf()); + + std::env::remove_var("SOCKET_EXPERIMENTAL_MAVEN"); + let (_, counts) = crawl_all_ecosystems(&opts).await; + assert!( + !counts.contains_key(&Ecosystem::Maven), + "Maven must not be crawled when the experimental flag is unset" + ); + + std::env::set_var("SOCKET_EXPERIMENTAL_MAVEN", "1"); + let (_, counts) = crawl_all_ecosystems(&opts).await; + assert!( + counts.contains_key(&Ecosystem::Maven), + "Maven must be crawled once the experimental flag is set" + ); + std::env::remove_var("SOCKET_EXPERIMENTAL_MAVEN"); + } + + #[cfg(feature = "nuget")] + #[tokio::test] + #[serial_test::serial(experimental_gate_env)] + async fn crawl_all_gates_nuget_on_runtime_flag() { + let tmp = tempfile::tempdir().unwrap(); + let opts = local_options(tmp.path().to_path_buf()); + + std::env::remove_var("SOCKET_EXPERIMENTAL_NUGET"); + let (_, counts) = crawl_all_ecosystems(&opts).await; + assert!( + !counts.contains_key(&Ecosystem::Nuget), + "NuGet must not be crawled when the experimental flag is unset" + ); + + std::env::set_var("SOCKET_EXPERIMENTAL_NUGET", "1"); + let (_, counts) = crawl_all_ecosystems(&opts).await; + assert!( + counts.contains_key(&Ecosystem::Nuget), + "NuGet must be crawled once the experimental flag is set" + ); + std::env::remove_var("SOCKET_EXPERIMENTAL_NUGET"); + } + + /// The always-on ecosystems must appear in `counts` unconditionally — + /// guards against one being accidentally moved behind a runtime gate. + #[tokio::test] + #[serial_test::serial(experimental_gate_env)] + async fn crawl_all_always_includes_core_ecosystems() { + let tmp = tempfile::tempdir().unwrap(); + let (_, counts) = crawl_all_ecosystems(&local_options(tmp.path().to_path_buf())).await; + assert!(counts.contains_key(&Ecosystem::Npm)); + assert!(counts.contains_key(&Ecosystem::Pypi)); + assert!(counts.contains_key(&Ecosystem::Gem)); + } } diff --git a/crates/socket-patch-cli/src/json_envelope.rs b/crates/socket-patch-cli/src/json_envelope.rs index 2bc5183..73fab72 100644 --- a/crates/socket-patch-cli/src/json_envelope.rs +++ b/crates/socket-patch-cli/src/json_envelope.rs @@ -789,4 +789,89 @@ mod tests { assert_eq!(v["events"][0]["details"]["tier"], "free"); assert_eq!(v["events"][0]["details"]["vulns"], serde_json::json!([1, 2])); } + + #[test] + fn failed_event_serializes_error_not_reason() { + // `with_error` is exercised by several tests, but they all assert + // only `status`/`summary` — none ever inspected the serialized + // event. Per CLI_CONTRACT.md a `failed` event carries `errorCode` + // + `error`; the human `reason` field is reserved for `skipped`. + // Pin both halves so a builder that mis-routed the message into + // `reason` (or dropped the routing tag) can't slip through. + let event = PatchEvent::new(PatchAction::Failed, "pkg:npm/bar@2.0.0") + .with_error("apply_failed", "hash mismatch after write"); + let v: serde_json::Value = + serde_json::from_str(&serde_json::to_string(&event).unwrap()).unwrap(); + let obj = v.as_object().unwrap(); + assert_eq!(obj["action"], "failed"); + assert_eq!(obj["errorCode"], "apply_failed"); + assert_eq!(obj["error"], "hash mismatch after write"); + // The Failed path must NOT populate `reason` — that key is the + // skipped/human channel and a consumer routing on its presence + // would misclassify the event. + assert!(!obj.contains_key("reason")); + } + + #[test] + fn skipped_reason_does_not_leak_into_error_field() { + // Mirror of the above for `with_reason`: it sets `errorCode` + + // `reason` and must leave `error` unset, so a skip is never + // mistaken for a hard failure by a consumer keying on `error`. + let event = PatchEvent::new(PatchAction::Skipped, "pkg:npm/foo@1.0.0") + .with_reason("already_patched", "Files match afterHash"); + let v: serde_json::Value = + serde_json::from_str(&serde_json::to_string(&event).unwrap()).unwrap(); + let obj = v.as_object().unwrap(); + assert_eq!(obj["errorCode"], "already_patched"); + assert_eq!(obj["reason"], "Files match afterHash"); + assert!(!obj.contains_key("error")); + } + + #[test] + fn every_command_serializes_to_its_contract_tag() { + // `empty_envelope_has_stable_shape`/`special_statuses_*` only ever + // serialized `scan`/`remove`/`get`. Pin the full `Command` + // vocabulary (lowercase, no separators) so a renamed or reordered + // `rename_all` arm can't silently change what `command` a + // consumer routes on. + for (command, tag) in [ + (Command::Apply, "apply"), + (Command::Rollback, "rollback"), + (Command::Get, "get"), + (Command::Scan, "scan"), + (Command::List, "list"), + (Command::Remove, "remove"), + (Command::Repair, "repair"), + (Command::Setup, "setup"), + (Command::Unlock, "unlock"), + (Command::Vex, "vex"), + ] { + let serialized = serde_json::to_string(&command).unwrap(); + assert_eq!(serialized, format!("\"{tag}\""), "Command::{command:?}"); + } + } + + #[test] + fn recording_failed_overrides_success_like_status() { + // The exit-code contract treats any `failed` event as exit 1 + // ("Exit 1 when status is partialFailure (any events[*].action == + // \"failed\")"). `record` enforces that by escalating every + // non-Error status — including the success-like specials + // (`notFound`, `noManifest`, `paidRequired`) — to PartialFailure. + // Only a hard `Error` outranks it. Pin that so the auto-escalation + // can't regress to leaving a `failed` event under an exit-0 status. + for start in [Status::NotFound, Status::NoManifest, Status::PaidRequired] { + let mut env = Envelope::new(Command::Remove); + env.status = start; + env.record( + PatchEvent::new(PatchAction::Failed, "pkg:npm/bar@2.0.0") + .with_error("rollback_failed", "boom"), + ); + assert_eq!( + env.status, + Status::PartialFailure, + "{start:?} + failed event must escalate to partialFailure" + ); + } + } } diff --git a/crates/socket-patch-cli/src/lib.rs b/crates/socket-patch-cli/src/lib.rs index ff33859..e33b472 100644 --- a/crates/socket-patch-cli/src/lib.rs +++ b/crates/socket-patch-cli/src/lib.rs @@ -295,6 +295,34 @@ mod tests { } } + #[test] + fn fallback_forwards_value_bearing_flag_in_order() { + // The existing forwarding tests only use boolean flags, which don't + // consume the following token. A value-bearing flag (`--manifest-path + // `) exercises the splice ordering differently: an off-by-one in + // `extend_from_slice(&argv[1..])` would either drop the flag's value or + // shift it onto the wrong token. Passing the flag explicitly wins over + // its `SOCKET_MANIFEST_PATH` env fallback, so this holds regardless of + // ambient env. + let cli = parse_with_uuid_fallback(argv(&[ + "socket-patch", + UUID, + "--manifest-path", + "custom/forwarded.json", + ])) + .unwrap(); + match cli.command { + Commands::Get(args) => { + assert_eq!(args.identifier, UUID); + assert_eq!( + args.common.manifest_path, "custom/forwarded.json", + "the value-bearing flag and its argument must survive the rewrite in order" + ); + } + _ => panic!("expected Commands::Get"), + } + } + #[test] fn fallback_handles_no_args_without_panicking() { // Only the program name is present (argv.len() == 1). The diff --git a/crates/socket-patch-cli/src/output.rs b/crates/socket-patch-cli/src/output.rs index abc6847..1c84219 100644 --- a/crates/socket-patch-cli/src/output.rs +++ b/crates/socket-patch-cli/src/output.rs @@ -21,8 +21,8 @@ pub fn format_severity(s: &str, use_color: bool) -> String { return s.to_string(); } match s.to_lowercase().as_str() { - "critical" => format!("\x1b[31m{s}\x1b[0m"), - "high" => format!("\x1b[91m{s}\x1b[0m"), + "critical" => format!("\x1b[91m{s}\x1b[0m"), + "high" => format!("\x1b[31m{s}\x1b[0m"), "medium" => format!("\x1b[33m{s}\x1b[0m"), "low" => format!("\x1b[36m{s}\x1b[0m"), _ => s.to_string(), @@ -114,7 +114,7 @@ mod tests { assert!(out.starts_with("\x1b["), "expected ANSI prefix: {out:?}"); assert!(out.contains("critical"), "expected input verbatim: {out:?}"); assert!(out.ends_with("\x1b[0m"), "expected ANSI reset: {out:?}"); - assert!(out.contains("31"), "expected red code 31: {out:?}"); + assert!(out.contains("91"), "expected bright-red code 91: {out:?}"); } #[test] @@ -123,7 +123,7 @@ mod tests { assert!(out.starts_with("\x1b["), "expected ANSI prefix: {out:?}"); assert!(out.contains("high"), "expected input verbatim: {out:?}"); assert!(out.ends_with("\x1b[0m"), "expected ANSI reset: {out:?}"); - assert!(out.contains("91"), "expected bright-red code 91: {out:?}"); + assert!(out.contains("31"), "expected red code 31: {out:?}"); } #[test] @@ -150,7 +150,7 @@ mod tests { assert!(out.starts_with("\x1b["), "expected ANSI prefix: {out:?}"); assert!(out.contains("CRITICAL"), "expected input verbatim: {out:?}"); assert!(out.ends_with("\x1b[0m"), "expected ANSI reset: {out:?}"); - assert!(out.contains("31"), "expected red code 31: {out:?}"); + assert!(out.contains("91"), "expected bright-red code 91: {out:?}"); } #[test] @@ -159,7 +159,7 @@ mod tests { assert!(out.starts_with("\x1b["), "expected ANSI prefix: {out:?}"); assert!(out.contains("Critical"), "expected input verbatim: {out:?}"); assert!(out.ends_with("\x1b[0m"), "expected ANSI reset: {out:?}"); - assert!(out.contains("31"), "expected red code 31: {out:?}"); + assert!(out.contains("91"), "expected bright-red code 91: {out:?}"); } #[test] @@ -176,7 +176,7 @@ mod tests { assert!(out.starts_with("\x1b["), "expected ANSI prefix: {out:?}"); assert!(out.contains("HIGH"), "expected input verbatim: {out:?}"); assert!(out.ends_with("\x1b[0m"), "expected ANSI reset: {out:?}"); - assert!(out.contains("91"), "expected bright-red code 91: {out:?}"); + assert!(out.contains("31"), "expected red code 31: {out:?}"); } #[test] @@ -216,6 +216,41 @@ mod tests { assert_eq!(out, ""); } + #[test] + fn format_severity_full_color_ramp_is_exact() { + // Pin every known arm to its exact wrapper so an accidental palette + // edit is caught, not just "contains a digit". + assert_eq!(format_severity("critical", true), "\x1b[91mcritical\x1b[0m"); + assert_eq!(format_severity("high", true), "\x1b[31mhigh\x1b[0m"); + assert_eq!(format_severity("medium", true), "\x1b[33mmedium\x1b[0m"); + assert_eq!(format_severity("low", true), "\x1b[36mlow\x1b[0m"); + } + + #[test] + fn format_severity_critical_is_more_prominent_than_high() { + // Regression: `critical` is the worst severity and must render at + // least as loud as `high`. The ramp uses the high-intensity (9x) red + // for critical and the standard (3x) red for high; swapping them (the + // original bug) made `high` brighter than `critical`. + let crit = format_severity("critical", true); + let high = format_severity("high", true); + assert_ne!(crit, high, "critical and high must use distinct colors"); + assert!( + crit.contains("\x1b[91m"), + "critical must use high-intensity red 91: {crit:?}" + ); + assert!( + high.contains("\x1b[31m"), + "high must use standard red 31: {high:?}" + ); + // Guard the inversion directly: critical must not be wrapped in the + // duller standard-red code that belongs to `high`. + assert!( + !crit.contains("\x1b[31m"), + "critical must not use the duller standard red reserved for high: {crit:?}" + ); + } + // ---- color ---- #[test] diff --git a/crates/socket-patch-cli/tests/cli_parse_list.rs b/crates/socket-patch-cli/tests/cli_parse_list.rs index 7af2c35..5f2efc2 100644 --- a/crates/socket-patch-cli/tests/cli_parse_list.rs +++ b/crates/socket-patch-cli/tests/cli_parse_list.rs @@ -366,6 +366,64 @@ fn empty_file_manifest_reports_manifest_invalid_via_binary() { assert_eq!(v["error"]["code"], "manifest_invalid", "got envelope: {v}"); } +#[test] +fn missing_manifest_under_valid_cwd_reports_manifest_not_found_via_binary() { + // The common missing-manifest case: cwd exists, but `.socket/manifest.json` + // does not. `read_manifest` returns `Ok(None)` here, which must surface as + // `manifest_not_found` — NOT `manifest_invalid`. (Regression: the `Ok(None)` + // arm previously hard-coded `manifest_invalid`, telling consumers a missing + // file was corrupt. It was masked by a now-removed metadata pre-check.) + let tmp = tempfile::tempdir().unwrap(); + let out = run_list_binary(tmp.path(), &["--json"]); + let v: serde_json::Value = + serde_json::from_str(String::from_utf8_lossy(&out.stdout).trim()) + .expect("stdout must be valid JSON envelope"); + assert_eq!(out.status.code(), Some(1), "missing manifest must exit 1"); + assert_eq!(v["status"], "error"); + assert_eq!( + v["error"]["code"], "manifest_not_found", + "missing manifest must be manifest_not_found, got envelope: {v}" + ); + let msg = v["error"]["message"].as_str().expect("error message"); + assert!( + msg.contains("Manifest not found"), + "message must name the missing manifest, got: {msg}" + ); +} + +#[test] +fn manifest_path_through_regular_file_reports_unreadable_via_binary() { + // A genuine I/O error reaching the manifest must be `manifest_unreadable`, + // never `manifest_not_found`. Here the manifest path is nested *under a + // regular file* (`/manifest.json`), so the OS rejects the read with + // ENOTDIR — an I/O error, not file-absence. + // + // Regression: `run()` used to stat the path with `tokio::fs::metadata` + // first and treat ANY stat failure as `manifest_not_found`, so this case + // (and an unreadable parent dir, etc.) was misreported as a missing file. + // Removing that pre-check lets `read_manifest`'s I/O error classify it + // correctly. + let tmp = tempfile::tempdir().unwrap(); + let blocker = tmp.path().join("not-a-dir"); + std::fs::write(&blocker, b"i am a regular file").unwrap(); + let manifest_path = blocker.join("manifest.json"); + + let out = run_list_binary( + tmp.path(), + &["--json", "--manifest-path", manifest_path.to_str().unwrap()], + ); + let v: serde_json::Value = + serde_json::from_str(String::from_utf8_lossy(&out.stdout).trim()) + .expect("stdout must be valid JSON envelope"); + assert_eq!(out.status.code(), Some(1), "I/O error must exit 1"); + assert_eq!(v["status"], "error"); + assert_eq!( + v["error"]["code"], "manifest_unreadable", + "a non-absence I/O error must be manifest_unreadable, not \ + manifest_not_found, got envelope: {v}" + ); +} + // --------------------------------------------------------------------------- // Subprocess content tests — the in-process run() tests above only assert the // exit code. run() prints the actual listing to stdout (which cannot be diff --git a/crates/socket-patch-cli/tests/in_process_variant_apply_failure.rs b/crates/socket-patch-cli/tests/in_process_variant_apply_failure.rs index 6ede453..0b22f8e 100644 --- a/crates/socket-patch-cli/tests/in_process_variant_apply_failure.rs +++ b/crates/socket-patch-cli/tests/in_process_variant_apply_failure.rs @@ -234,3 +234,144 @@ fn failed_installed_variant_is_not_also_reported_not_installed() { variant {purl}; the failed-apply variant was misreported as not installed: {out}" ); } + +/// Regression: a multi-variant base PURL where ONE variant applies cleanly +/// but a SIBLING variant fails must flip the command to a non-zero exit / +/// `partialFailure` — not silently report success because one variant +/// happened to apply. +/// +/// The apply variant branch tracks an `applied` flag and only flagged +/// `has_errors` when *no* variant applied. A successful sibling therefore +/// masked a failed variant: the JSON envelope carried a `failed` event yet +/// the command exited 0 with `status: success`. The npm branch and the +/// rollback loop both set `has_errors` on *every* failed result; this pins +/// the variant branch to the same contract. +/// +/// `--force` is the lever that makes every variant of the base get +/// attempted (it bypasses the per-variant first-file installed-distribution +/// check), so both variants reach `apply_package_patch`: one with a valid +/// `afterHash` blob (applies), one with a decoy blob that does not hash to +/// its `afterHash` (fails the pre-write hash check). +#[test] +fn partial_multi_variant_failure_fails_the_command() { + if find_python().is_none() { + println!("SKIP: python3 not on PATH"); + return; + } + + let tmp = tempfile::tempdir().expect("tempdir"); + let six_path = install_six(tmp.path()); + let original = std::fs::read(&six_path).expect("read six.py"); + let before_hash = git_sha256(&original); + + // Variant A: a genuine patch whose blob hashes to its declared + // `afterHash` → applies cleanly. + let mut patched_a = original.clone(); + patched_a.extend_from_slice(b"\n# PATCH-A\n"); + let after_hash_a = git_sha256(&patched_a); + + // Variant B: declares an `afterHash` for content the blob will NOT + // contain (the blob holds the unpatched original), so the pre-write + // hash check inside `apply_file_patch` fails → this variant fails. + let mut intended_b = original.clone(); + intended_b.extend_from_slice(b"\n# PATCH-B\n"); + let after_hash_b = git_sha256(&intended_b); + + let socket_dir = tmp.path().join(".socket"); + std::fs::create_dir_all(socket_dir.join("blobs")).expect("mk .socket/blobs"); + // A's blob is valid; B's blob is a decoy (original bytes under B's hash). + std::fs::write(socket_dir.join("blobs").join(&after_hash_a), &patched_a) + .expect("write valid blob A"); + std::fs::write(socket_dir.join("blobs").join(&after_hash_b), &original) + .expect("write decoy blob B"); + + let base = format!("pkg:pypi/{PYPI_PACKAGE}@{PYPI_VERSION}"); + let variant_a = format!("{base}?artifact_id=six-{PYPI_VERSION}-py2.py3-none-any.whl"); + let variant_b = format!("{base}?artifact_id=six-{PYPI_VERSION}.tar.gz"); + let key_a = variant_a.clone(); + let key_b = variant_b.clone(); + let manifest = serde_json::json!({ + "patches": { + key_a: { + "uuid": UUID, + "exportedAt": "2024-01-01T00:00:00Z", + "files": { "six.py": { "beforeHash": before_hash, "afterHash": after_hash_a } }, + "vulnerabilities": {}, + "description": "variant A (applies)", + "license": "MIT", + "tier": "free" + }, + key_b: { + "uuid": UUID, + "exportedAt": "2024-01-01T00:00:00Z", + "files": { "six.py": { "beforeHash": before_hash, "afterHash": after_hash_b } }, + "vulnerabilities": {}, + "description": "variant B (fails)", + "license": "MIT", + "tier": "free" + } + } + }); + std::fs::write( + socket_dir.join("manifest.json"), + serde_json::to_vec_pretty(&manifest).unwrap(), + ) + .expect("write manifest"); + + let output = Command::new(binary()) + .args([ + "apply", + "--force", + "--offline", + "--ecosystems", + "pypi", + "--json", + "--cwd", + tmp.path().to_str().unwrap(), + ]) + .env_remove("SOCKET_API_TOKEN") + .env_remove("SOCKET_OFFLINE") + .env_remove("SOCKET_ECOSYSTEMS") + .env_remove("SOCKET_JSON") + .env_remove("SOCKET_FORCE") + .env_remove("SOCKET_CWD") + .env_remove("SOCKET_MANIFEST_PATH") + .output() + .expect("run socket-patch apply"); + let code = output.status.code().unwrap_or(-1); + let out = String::from_utf8_lossy(&output.stdout).to_string(); + + // The core regression: a failed sibling variant must fail the command. + assert_eq!( + code, 1, + "a partial multi-variant failure must exit 1, not be masked by the \ + successful sibling; stdout: {out}" + ); + + let env: serde_json::Value = + serde_json::from_str(&out).unwrap_or_else(|e| panic!("envelope not JSON ({e}): {out}")); + let events = env["events"] + .as_array() + .unwrap_or_else(|| panic!("no events array in envelope: {out}")); + + // Prove the scenario was genuinely exercised: exactly one variant + // applied and exactly one failed (not a total failure). + let applied: Vec<&serde_json::Value> = + events.iter().filter(|e| e["action"] == "applied").collect(); + let failed: Vec<&serde_json::Value> = + events.iter().filter(|e| e["action"] == "failed").collect(); + assert_eq!( + applied.len(), + 1, + "expected exactly one applied variant: {out}" + ); + assert_eq!(failed.len(), 1, "expected exactly one failed variant: {out}"); + assert_eq!(applied[0]["purl"], serde_json::Value::String(variant_a)); + assert_eq!(failed[0]["purl"], serde_json::Value::String(variant_b)); + + // And the envelope itself must signal the partial failure. + assert_eq!( + env["status"], "partialFailure", + "envelope status must reflect the partial failure: {out}" + ); +} diff --git a/crates/socket-patch-cli/tests/remove_invariants.rs b/crates/socket-patch-cli/tests/remove_invariants.rs index e86de1a..cc568db 100644 --- a/crates/socket-patch-cli/tests/remove_invariants.rs +++ b/crates/socket-patch-cli/tests/remove_invariants.rs @@ -299,6 +299,75 @@ fn remove_without_skip_rollback_fails_closed_and_keeps_manifest() { assert!(patches.contains_key("pkg:npm/__remove_test_b__@2.0.0")); } +// --------------------------------------------------------------------------- +// Blob-sweep artifact event must not inflate the removed count +// --------------------------------------------------------------------------- + +/// When `remove` sweeps an orphaned blob (or rolls files back) it appends a +/// purl-less, artifact-level `Removed` event carrying `details.blobsRemoved` / +/// `details.rolledBack`. That carrier is metadata — NOT a removed manifest +/// entry — so it must never bump `summary.removed`. +/// +/// Every other test passes `--skip-rollback` against a manifest whose afterHash +/// blobs aren't present on disk, so the cleanup phase sweeps nothing and the +/// carrier never fires — leaving this path completely uncovered. Here we stage +/// both patches' afterHash blobs in `.socket/blobs`, remove A, and force a +/// real one-blob sweep (A's afterHash blob becomes unreferenced; B's stays). +/// +/// The contract: exactly ONE manifest entry was deleted, so `summary.removed` +/// must be 1 — matching the single per-purl `removed` event — even though the +/// event stream also carries the artifact carrier reporting `blobsRemoved: 1`. +/// A regression that routes the carrier through the summary-bumping `record` +/// path would report `removed: 2` and flip this test red. +#[test] +fn remove_blob_sweep_does_not_inflate_removed_count() { + // afterHash values from TWO_PATCH_MANIFEST. + const AFTER_A: &str = "1111111111111111111111111111111111111111111111111111111111111111"; + const AFTER_B: &str = "2222222222222222222222222222222222222222222222222222222222222222"; + + let tmp = tempfile::tempdir().expect("tempdir"); + let socket = make_socket_dir(tmp.path()); + let blobs = socket.join("blobs"); + std::fs::create_dir_all(&blobs).expect("create blobs dir"); + std::fs::write(blobs.join(AFTER_A), b"blob-a").expect("stage blob A"); + std::fs::write(blobs.join(AFTER_B), b"blob-b").expect("stage blob B"); + + let (code, stdout) = run_remove(tmp.path(), "pkg:npm/__remove_test_a__@1.0.0", &[]); + assert_eq!(code, 0, "remove must succeed; stdout=\n{stdout}"); + let v: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); + assert_eq!(v["status"], "success"); + + // The crux: one entry removed → summary.removed == 1, NOT 2. + assert_eq!( + v["summary"]["removed"], 1, + "the blob-sweep carrier event must not inflate summary.removed; envelope={v}" + ); + + let events = v["events"].as_array().expect("events array"); + // Exactly one per-purl Removed event, naming A. + let removed_purls: Vec<&str> = events + .iter() + .filter(|e| e["action"] == "removed" && e["purl"].is_string()) + .map(|e| e["purl"].as_str().unwrap()) + .collect(); + assert_eq!(removed_purls, vec!["pkg:npm/__remove_test_a__@1.0.0"]); + + // The artifact carrier is still present (purl-less) and reports the sweep. + let carrier = events + .iter() + .find(|e| e["action"] == "removed" && e["purl"].is_null()) + .expect("artifact-level Removed carrier event must be present"); + assert_eq!( + carrier["details"]["blobsRemoved"], 1, + "exactly A's orphaned afterHash blob should be swept; carrier={carrier}" + ); + + // B's afterHash blob is still referenced, so it must survive on disk; + // A's must be gone. + assert!(!blobs.join(AFTER_A).exists(), "A's orphaned blob must be swept"); + assert!(blobs.join(AFTER_B).exists(), "B's referenced blob must remain"); +} + // --------------------------------------------------------------------------- // Manifest-path override // --------------------------------------------------------------------------- diff --git a/crates/socket-patch-cli/tests/repair_invariants.rs b/crates/socket-patch-cli/tests/repair_invariants.rs index 4ccc90c..e6cff40 100644 --- a/crates/socket-patch-cli/tests/repair_invariants.rs +++ b/crates/socket-patch-cli/tests/repair_invariants.rs @@ -586,3 +586,61 @@ fn repair_honors_manifest_path_override() { assert_eq!(v["summary"]["removed"], 0); assert_eq!(v["summary"]["downloaded"], 0); } + +/// Regression: `--silent` ("Suppress non-error output") must mute the +/// human-readable progress that `repair` prints to stdout — "Found N +/// missing", "Downloading…", the cleanup summary and "Repair complete.". +/// +/// Before the fix every informational print in `repair_inner` was gated on +/// `--json` ALONE, so `repair --silent` (no `--json`) still flooded stdout, +/// contradicting the flag's contract (and `get`/`apply`, which gate on +/// `!json && !silent`). We run an offline repair that has real work to +/// report — an orphan blob to sweep — once silent and once not, and prove +/// the silent run emits NOTHING on stdout while the loud control does. +#[test] +fn repair_silent_suppresses_human_stdout() { + let tmp = tempfile::tempdir().expect("tempdir"); + let socket = make_socket_dir(tmp.path()); + // Keep the referenced blob (survives) plus an orphan (swept) so cleanup + // has something to announce in the non-silent control. + write_blob(&socket, REFERENCED_HASH, b"kept"); + let orphan = "deadbeef".repeat(8); // 64 hex chars, not referenced + write_blob(&socket, &orphan, b"orphan bytes"); + + // Loud control (offline, human mode): stdout must carry the summary. + let loud = socket_cmd(tmp.path()) + .args(["repair", "--offline"]) + .output() + .expect("run socket-patch"); + assert_eq!(loud.status.code(), Some(0)); + let loud_out = String::from_utf8_lossy(&loud.stdout); + assert!( + loud_out.contains("Repair complete."), + "control: human repair must print progress; stdout=\n{loud_out}" + ); + + // Re-stage the orphan (the control swept it) so the silent run has the + // identical workload — only the flag differs. + write_blob(&socket, &orphan, b"orphan bytes"); + + let silent = socket_cmd(tmp.path()) + .args(["repair", "--offline", "--silent"]) + .output() + .expect("run socket-patch"); + assert_eq!( + silent.status.code(), + Some(0), + "silent repair must still succeed; stderr=\n{}", + String::from_utf8_lossy(&silent.stderr), + ); + let silent_out = String::from_utf8_lossy(&silent.stdout); + assert!( + silent_out.trim().is_empty(), + "--silent must suppress all human stdout; got:\n{silent_out}" + ); + // And the work still happened: the orphan was actually swept. + assert!( + !socket.join("blobs").join(&orphan).exists(), + "silent repair must still perform cleanup (orphan should be gone)" + ); +} diff --git a/crates/socket-patch-cli/tests/setup_invariants.rs b/crates/socket-patch-cli/tests/setup_invariants.rs index ae8ee1a..c4ea48a 100644 --- a/crates/socket-patch-cli/tests/setup_invariants.rs +++ b/crates/socket-patch-cli/tests/setup_invariants.rs @@ -530,6 +530,37 @@ fn setup_check_no_files_exits_zero() { assert_eq!(code, 0, "no files should still exit 0; stdout=\n{stdout}"); let v: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); assert_eq!(v["status"], "no_files"); + // The `no_files` envelope must keep the documented `--check` shape + // (CLI_CONTRACT "Setup command contract") — the summary counts are + // always-present, zero-valued fields, NOT dropped. A consumer reading + // `.needsConfiguration` must see 0, not null. + assert_eq!(v["configured"], 0, "missing/`null` configured; stdout=\n{stdout}"); + assert_eq!( + v["needsConfiguration"], 0, + "missing/`null` needsConfiguration; stdout=\n{stdout}" + ); + assert_eq!(v["errors"], 0, "missing/`null` errors; stdout=\n{stdout}"); + assert!(v["files"].as_array().is_some_and(|a| a.is_empty())); +} + +#[test] +fn setup_remove_no_files_exits_zero_with_full_envelope() { + let tmp = tempfile::tempdir().expect("tempdir"); + let (code, stdout) = run_setup(tmp.path(), &["--remove", "--yes"]); + assert_eq!(code, 0, "no files should still exit 0; stdout=\n{stdout}"); + let v: serde_json::Value = serde_json::from_str(&stdout).expect("valid JSON"); + assert_eq!(v["status"], "no_files"); + // The `no_files` envelope must keep the documented `--remove` shape + // (removed/notConfigured/errors), present and zero — not dropped. This + // mirrors the plain-`setup` `no_files` envelope, which already carries its + // own counts; the `--remove`/`--check` variants must not diverge. + assert_eq!(v["removed"], 0, "missing/`null` removed; stdout=\n{stdout}"); + assert_eq!( + v["notConfigured"], 0, + "missing/`null` notConfigured; stdout=\n{stdout}" + ); + assert_eq!(v["errors"], 0, "missing/`null` errors; stdout=\n{stdout}"); + assert!(v["files"].as_array().is_some_and(|a| a.is_empty())); } #[test] diff --git a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs index c7b8e78..8d00af6 100644 --- a/crates/socket-patch-cli/tests/setup_matrix_pypi.rs +++ b/crates/socket-patch-cli/tests/setup_matrix_pypi.rs @@ -369,6 +369,76 @@ mod host_guard { "after remove the project must report needs_configuration again:\n{v}" ); } + + /// Regression: classic-Poetry projects. + /// + /// `setup` writes the hook into a Poetry manifest as the *structural* + /// `socket-patch = { version = "*", extras = ["hook"] }` — which has NO + /// literal `socket-patch[hook]` substring. A `setup --check` that probes + /// the manifest *textually* would therefore report a freshly-and-correctly + /// configured Poetry project as `needs_configuration` (exit 1), breaking + /// the setup→check round-trip. This guard pins the structural detection by + /// running the real binary against a hand-authored Poetry manifest in each + /// state. Fully hermetic: `--check` neither writes nor refreshes a lockfile. + #[test] + fn poetry_check_recognizes_structural_hook_host() { + // ── configured: the exact structural form `setup` emits ───────────── + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + let root_s = root.to_str().unwrap(); + std::fs::write( + root.join("pyproject.toml"), + "[tool.poetry]\nname = \"x\"\nversion = \"0.1.0\"\n\n\ + [tool.poetry.dependencies]\npython = \"^3.9\"\n\ + socket-patch = {version = \"*\", extras = [\"hook\"]}\n", + ) + .unwrap(); + + let (code, out, err) = run(root, &["setup", "--check", "--cwd", root_s, "--json"]); + assert_eq!( + code, 0, + "setup --check must PASS (exit 0) for a Poetry project carrying the \ + structural hook extra.\nstdout:\n{out}\nstderr:\n{err}" + ); + let v = parse_json(&out, "poetry check (configured)"); + assert_eq!( + json_str(&v, "status", "poetry check (configured)"), + "configured", + "structurally-configured Poetry project must report configured:\n{v}" + ); + assert_eq!( + json_str( + &pth_entry(&v, "poetry check (configured)"), + "status", + "poetry check (configured) pth" + ), + "configured", + "the pyproject pth entry must read configured:\n{v}" + ); + + // ── unconfigured: a plain socket-patch dep (no hook) is NOT enough ── + let tmp2 = tempfile::tempdir().unwrap(); + let root2 = tmp2.path(); + let root2_s = root2.to_str().unwrap(); + std::fs::write( + root2.join("pyproject.toml"), + "[tool.poetry]\nname = \"x\"\nversion = \"0.1.0\"\n\n\ + [tool.poetry.dependencies]\npython = \"^3.9\"\nsocket-patch = \"^3.3.0\"\n", + ) + .unwrap(); + let (code, out, err) = run(root2, &["setup", "--check", "--cwd", root2_s, "--json"]); + assert_eq!( + code, 1, + "setup --check must FAIL (exit 1) for a Poetry project whose \ + socket-patch dep carries no hook extra.\nstdout:\n{out}\nstderr:\n{err}" + ); + let v = parse_json(&out, "poetry check (unconfigured)"); + assert_eq!( + json_str(&v, "status", "poetry check (unconfigured)"), + "needs_configuration", + "a hook-less Poetry project must report needs_configuration:\n{v}" + ); + } } // ── Nested-workspace layouts (EXPECTED BASELINE GAP) ────────────────── diff --git a/crates/socket-patch-core/src/api/client.rs b/crates/socket-patch-core/src/api/client.rs index 5cb9a7a..1a4b978 100644 --- a/crates/socket-patch-core/src/api/client.rs +++ b/crates/socket-patch-core/src/api/client.rs @@ -168,40 +168,25 @@ impl ApiClient { ) -> Result, ApiError> { let status = resp.status(); - match status { - StatusCode::OK => { - let body = resp - .json::() - .await - .map_err(|e| ApiError::Parse(format!("Failed to parse response: {}", e)))?; - Ok(Some(body)) - } - StatusCode::NOT_FOUND => Ok(None), - StatusCode::UNAUTHORIZED => Err(ApiError::Unauthorized( - "Unauthorized: Invalid API token".into(), - )), - StatusCode::FORBIDDEN => { - let msg = if use_public_proxy { - "Forbidden: This patch is only available to paid subscribers. \ - Sign up at https://socket.dev to access paid patches." - } else { - "Forbidden: Access denied. This may be a paid patch or \ - you may not have access to this organization." - }; - Err(ApiError::Forbidden(msg.into())) - } - StatusCode::TOO_MANY_REQUESTS => Err(ApiError::RateLimited( - "Rate limit exceeded. Please try again later.".into(), - )), - _ => { - let text = resp.text().await.unwrap_or_default(); - Err(ApiError::Other(format!( - "API request failed with status {}: {}", - status.as_u16(), - text - ))) - } + if status == StatusCode::OK { + let body = resp + .json::() + .await + .map_err(|e| ApiError::Parse(format!("Failed to parse response: {}", e)))?; + return Ok(Some(body)); + } + if status == StatusCode::NOT_FOUND { + return Ok(None); } + if let Some(err) = classify_auth_error(status, use_public_proxy) { + return Err(err); + } + let text = resp.text().await.unwrap_or_default(); + Err(ApiError::Other(format!( + "API request failed with status {}: {}", + status.as_u16(), + text + ))) } // ── Public API methods ──────────────────────────────────────────── @@ -520,28 +505,36 @@ impl ApiClient { let status = resp.status(); - match status { - StatusCode::OK => { - let bytes = resp.bytes().await.map_err(|e| { - ApiError::Network(format!( - "Error reading {} body for {}: {}", - label, identifier, e - )) - })?; - Ok(Some(bytes.to_vec())) - } - StatusCode::NOT_FOUND => Ok(None), - _ => { - let text = resp.text().await.unwrap_or_default(); - Err(ApiError::Other(format!( - "Failed to fetch {} {}: status {} - {}", - label, - identifier, - status.as_u16(), - text, - ))) - } + if status == StatusCode::OK { + let bytes = resp.bytes().await.map_err(|e| { + ApiError::Network(format!( + "Error reading {} body for {}: {}", + label, identifier, e + )) + })?; + return Ok(Some(bytes.to_vec())); + } + if status == StatusCode::NOT_FOUND { + return Ok(None); } + // Classify 401/403/429 identically to the JSON transport path + // (`handle_json_response`). Without this an authenticated blob/diff/ + // package fetch that 401s/403s would surface as `ApiError::Other`, + // which `is_fallback_candidate` ignores — silently disabling the + // auth→proxy fallback for binary downloads. `use_auth` is the + // authenticated-endpoint flag, so `!use_auth` is the proxy case that + // drives the paid-patch wording. + if let Some(err) = classify_auth_error(status, !use_auth) { + return Err(err); + } + let text = resp.text().await.unwrap_or_default(); + Err(ApiError::Other(format!( + "Failed to fetch {} {}: status {} - {}", + label, + identifier, + status.as_u16(), + text, + ))) } } @@ -767,6 +760,41 @@ pub fn is_fallback_candidate(err: &ApiError) -> bool { matches!(err, ApiError::Unauthorized(_) | ApiError::Forbidden(_)) } +/// Map the well-known auth / rate-limit HTTP statuses (401 / 403 / 429) to +/// their tailored [`ApiError`] variant. Returns `None` for any other status, +/// leaving `OK` / `404` / fallthrough handling to the caller. +/// +/// Shared by both transport paths — the JSON [`ApiClient::handle_json_response`] +/// *and* the binary [`ApiClient::fetch_binary`] — so a 401/403 is classified +/// identically regardless of whether the body is JSON or octet-stream. This is +/// what [`is_fallback_candidate`] keys on to reroute auth→proxy: a binary +/// download that buried these statuses under [`ApiError::Other`] would silently +/// skip the fallback (and lose the operator-facing message). +/// +/// `use_public_proxy` selects the 403 wording (paid-subscriber hint vs. +/// org-access hint). +fn classify_auth_error(status: StatusCode, use_public_proxy: bool) -> Option { + match status { + StatusCode::UNAUTHORIZED => Some(ApiError::Unauthorized( + "Unauthorized: Invalid API token".into(), + )), + StatusCode::FORBIDDEN => { + let msg = if use_public_proxy { + "Forbidden: This patch is only available to paid subscribers. \ + Sign up at https://socket.dev to access paid patches." + } else { + "Forbidden: Access denied. This may be a paid patch or \ + you may not have access to this organization." + }; + Some(ApiError::Forbidden(msg.into())) + } + StatusCode::TOO_MANY_REQUESTS => Some(ApiError::RateLimited( + "Rate limit exceeded. Please try again later.".into(), + )), + _ => None, + } +} + /// Choose an org slug from the list returned by `/v0/organizations`. /// /// Returns an error when the list is empty, the sole slug when there is @@ -1424,6 +1452,60 @@ mod tests { ); } + // ── classify_auth_error: shared 401/403/429 classification ────────── + // + // Regression: `fetch_binary` used to fold *every* non-OK/404 status into + // `ApiError::Other`, so an authenticated blob/diff/package fetch that + // 401'd/403'd was never recognized by `is_fallback_candidate` and the + // auth→proxy fallback silently never fired. Both transport paths now route + // through this shared classifier; these pin its contract directly. + + #[test] + fn classify_auth_error_maps_401_to_unauthorized() { + let err = classify_auth_error(StatusCode::UNAUTHORIZED, false) + .expect("401 must classify"); + assert!(matches!(err, ApiError::Unauthorized(_))); + assert!(is_fallback_candidate(&err), "401 must drive the proxy fallback"); + } + + #[test] + fn classify_auth_error_maps_403_to_forbidden_with_proxy_wording() { + // Proxy path (use_public_proxy = true) → paid-subscriber hint. + let proxy = classify_auth_error(StatusCode::FORBIDDEN, true).expect("403 classifies"); + assert!(matches!(proxy, ApiError::Forbidden(_))); + assert!(is_fallback_candidate(&proxy), "403 must drive the proxy fallback"); + assert!( + proxy.to_string().contains("paid subscribers"), + "proxy 403 must carry the paid-subscriber hint; got: {proxy}" + ); + + // Authenticated path (use_public_proxy = false) → org-access wording. + let auth = classify_auth_error(StatusCode::FORBIDDEN, false).expect("403 classifies"); + assert!( + auth.to_string().contains("organization"), + "authenticated 403 must carry the org-access wording; got: {auth}" + ); + } + + #[test] + fn classify_auth_error_maps_429_to_rate_limited() { + let err = classify_auth_error(StatusCode::TOO_MANY_REQUESTS, false) + .expect("429 must classify"); + assert!(matches!(err, ApiError::RateLimited(_))); + // Rate limits are intentionally *not* a fallback candidate — they + // surface as-is so the operator sees them. + assert!(!is_fallback_candidate(&err)); + } + + #[test] + fn classify_auth_error_returns_none_for_other_statuses() { + // OK / 404 / 5xx are handled by the caller, not this classifier. + assert!(classify_auth_error(StatusCode::OK, false).is_none()); + assert!(classify_auth_error(StatusCode::NOT_FOUND, false).is_none()); + assert!(classify_auth_error(StatusCode::INTERNAL_SERVER_ERROR, false).is_none()); + assert!(classify_auth_error(StatusCode::BAD_GATEWAY, true).is_none()); + } + #[test] fn looks_like_token_hash_recognizes_sri_prefixes() { assert!(looks_like_token_hash("sha256-abc")); diff --git a/crates/socket-patch-core/src/cargo_setup/discover.rs b/crates/socket-patch-core/src/cargo_setup/discover.rs index 05464ff..cb32533 100644 --- a/crates/socket-patch-core/src/cargo_setup/discover.rs +++ b/crates/socket-patch-core/src/cargo_setup/discover.rs @@ -45,9 +45,14 @@ pub async fn discover_cargo_project(cwd: &Path) -> Option { } // `[workspace] members = [...]` (with single-trailing-`*` glob support). + // Read via `as_table_like` so the equally-valid inline form + // `workspace = { members = [...] }` is honored too — otherwise its members + // are silently dropped even though `find_workspace_root` (which only checks + // `.is_some()`) still treats it as the workspace root. Mirrors the + // inline-aware `[dependencies]` handling in `update::is_guard_dep_present`. if let Some(arr) = doc .get("workspace") - .and_then(Item::as_table) + .and_then(Item::as_table_like) .and_then(|w| w.get("members")) .and_then(Item::as_array) { @@ -439,4 +444,186 @@ mod tests { let dir = tempfile::tempdir().unwrap(); assert!(discover_cargo_project(dir.path()).await.is_none()); } + + // A bare-path member that does not resolve to a `Cargo.toml` must be + // silently skipped without aborting discovery of the valid siblings. + #[tokio::test] + async fn test_nonexistent_bare_member_skipped() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write( + &root.join("Cargo.toml"), + "[workspace]\nmembers = [\"app\", \"ghost\", \"lib\"]\n", + ) + .await; + write( + &root.join("app/Cargo.toml"), + "[package]\nname=\"app\"\nversion=\"0.1.0\"\n", + ) + .await; + write( + &root.join("lib/Cargo.toml"), + "[package]\nname=\"lib\"\nversion=\"0.1.0\"\n", + ) + .await; + // `ghost` is listed but the directory has no Cargo.toml (it doesn't even + // exist) — Cargo would error, but `setup` must just skip it. + + let proj = discover_cargo_project(root).await.unwrap(); + assert!(proj.members.contains(&root.join("app/Cargo.toml"))); + assert!(proj.members.contains(&root.join("lib/Cargo.toml"))); + assert!( + !proj.members.iter().any(|m| m.to_string_lossy().contains("ghost")), + "unresolved member must not be added, got {:?}", + proj.members + ); + assert_eq!(proj.members.len(), 2); + } + + // Root `[package]` + recursive `crates/**`: the root manifest is a member + // (via `[package]`) and every nested crate is discovered, with no path + // appearing twice. + #[tokio::test] + async fn test_recursive_glob_with_root_package_and_dedup() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write( + &root.join("Cargo.toml"), + "[package]\nname=\"root\"\nversion=\"0.1.0\"\n\n[workspace]\nmembers = [\"crates/**\"]\n", + ) + .await; + write( + &root.join("crates/a/Cargo.toml"), + "[package]\nname=\"a\"\nversion=\"0.1.0\"\n", + ) + .await; + write( + &root.join("crates/group/deep/Cargo.toml"), + "[package]\nname=\"deep\"\nversion=\"0.1.0\"\n", + ) + .await; + + let proj = discover_cargo_project(root).await.unwrap(); + assert!(proj.members.contains(&root.join("Cargo.toml"))); + assert!(proj.members.contains(&root.join("crates/a/Cargo.toml"))); + assert!(proj.members.contains(&root.join("crates/group/deep/Cargo.toml"))); + assert_eq!(proj.members.len(), 3, "no duplicates: {:?}", proj.members); + + // No path appears twice. + let mut sorted = proj.members.clone(); + sorted.sort(); + let deduped_len = { + let mut s = sorted.clone(); + s.dedup(); + s.len() + }; + assert_eq!(sorted.len(), deduped_len, "members contain a duplicate: {:?}", proj.members); + } + + // Single-level `*` at the workspace root finds direct crate dirs and ignores + // an immediate subdir that has no `Cargo.toml`. + #[tokio::test] + async fn test_single_level_star_at_root() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write(&root.join("Cargo.toml"), "[workspace]\nmembers = [\"*\"]\n").await; + write( + &root.join("alpha/Cargo.toml"), + "[package]\nname=\"alpha\"\nversion=\"0.1.0\"\n", + ) + .await; + write( + &root.join("beta/Cargo.toml"), + "[package]\nname=\"beta\"\nversion=\"0.1.0\"\n", + ) + .await; + // A non-crate dir at the same level is ignored. + fs::create_dir_all(root.join("docs")).await.unwrap(); + + let proj = discover_cargo_project(root).await.unwrap(); + assert!(proj.members.contains(&root.join("alpha/Cargo.toml"))); + assert!(proj.members.contains(&root.join("beta/Cargo.toml"))); + assert_eq!(proj.members.len(), 2, "only the two crate dirs: {:?}", proj.members); + } + + // The `[workspace]`/`members` tables may be written as an inline table — + // `workspace = { members = [...] }` is valid TOML that Cargo (serde) + // accepts exactly like a `[workspace]` section. The reader must see through + // it via `as_table_like`, just as `is_guard_dep_present` does for inline + // `[dependencies]`. The old `as_table` gate returned None for the inline + // form, so every member was silently dropped (only the virtual-manifest + // fallback survived) — leaving the members unconfigured by `setup`, even + // though `find_workspace_root` still treats it as the workspace root. + #[tokio::test] + async fn test_inline_workspace_members_are_discovered() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + // Inline workspace table — NO `[package]`, so the only way to get real + // members is to read the inline `members` array. + write( + &root.join("Cargo.toml"), + "workspace = { members = [\"crates/*\"] }\n", + ) + .await; + write( + &root.join("crates/a/Cargo.toml"), + "[package]\nname=\"a\"\nversion=\"0.1.0\"\n", + ) + .await; + write( + &root.join("crates/b/Cargo.toml"), + "[package]\nname=\"b\"\nversion=\"0.1.0\"\n", + ) + .await; + + let proj = discover_cargo_project(root).await.unwrap(); + assert_eq!(proj.root, root); + assert!( + proj.members.contains(&root.join("crates/a/Cargo.toml")), + "inline-workspace member `a` must be discovered, got {:?}", + proj.members + ); + assert!( + proj.members.contains(&root.join("crates/b/Cargo.toml")), + "inline-workspace member `b` must be discovered, got {:?}", + proj.members + ); + // Exactly the two real members — NOT the virtual-manifest fallback + // (which would wrongly list the root `Cargo.toml` alone). + assert_eq!( + proj.members.len(), + 2, + "must be the two inline members, not the virtual fallback: {:?}", + proj.members + ); + assert!(!proj.members.contains(&root.join("Cargo.toml"))); + } + + // An inline workspace table with an explicit (non-glob) member list must + // also resolve through the same `as_table_like` path. + #[tokio::test] + async fn test_inline_workspace_explicit_members() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write( + &root.join("Cargo.toml"), + "workspace = { members = [\"app\", \"lib\"] }\n", + ) + .await; + write( + &root.join("app/Cargo.toml"), + "[package]\nname=\"app\"\nversion=\"0.1.0\"\n", + ) + .await; + write( + &root.join("lib/Cargo.toml"), + "[package]\nname=\"lib\"\nversion=\"0.1.0\"\n", + ) + .await; + + let proj = discover_cargo_project(root).await.unwrap(); + assert!(proj.members.contains(&root.join("app/Cargo.toml"))); + assert!(proj.members.contains(&root.join("lib/Cargo.toml"))); + assert_eq!(proj.members.len(), 2, "{:?}", proj.members); + } } diff --git a/crates/socket-patch-core/src/cargo_setup/update.rs b/crates/socket-patch-core/src/cargo_setup/update.rs index 148a1e8..e584808 100644 --- a/crates/socket-patch-core/src/cargo_setup/update.rs +++ b/crates/socket-patch-core/src/cargo_setup/update.rs @@ -129,6 +129,20 @@ fn guard_dep_add(content: &str, version: &str) -> Result, String> let mut doc = content .parse::() .map_err(|e| format!("Invalid Cargo.toml: {e}"))?; + // A *virtual* workspace manifest (`[workspace]` but no `[package]`) cannot + // carry a `[dependencies]` section — cargo rejects it with "this virtual + // manifest specifies a `dependencies` section, which is not allowed". Adding + // the guard here would corrupt the manifest, and there is no crate to build + // anyway (the guard belongs in each *member*). Refuse rather than write a + // file cargo can no longer parse. (Reachable via `discover`'s empty-members + // fallback, which hands the workspace root to `setup`.) + if doc.contains_key("workspace") && !doc.contains_key("package") { + return Err( + "Cargo.toml is a virtual workspace manifest (no `[package]`); the guard \ + dependency belongs in each member crate, not the workspace root" + .to_string(), + ); + } let root = doc.as_table_mut(); let deps = ensure_table(root, "dependencies")?; if deps.contains_key(GUARD_CRATE) { @@ -192,15 +206,23 @@ mod tests { #[test] fn test_add_into_inline_dependencies_table() { - // `dependencies = { … }` is a valid (if uncommon) inline table. The - // reader (`is_guard_dep_present`) sees through it via `as_table_like`, - // so the writer must too — otherwise add errors on a valid manifest. - let toml = "[package]\nname = \"x\"\ndependencies = { serde = \"1\" }\n"; + // `dependencies = { … }` is a valid (if uncommon) *root-level* inline + // table. The reader (`is_guard_dep_present`) sees through it via + // `as_table_like`, so the writer must insert INTO it too — otherwise add + // would either error or fork a second `[dependencies]` (a duplicate key, + // which is invalid TOML). The `dependencies` key must be at the document + // root, NOT under `[package]` (where it would belong to `package.*` and + // the writer would never touch it — masking this very regression). + let toml = "dependencies = { serde = \"1\" }\n"; let out = guard_dep_add(toml, "3.3").unwrap().unwrap(); assert!(is_guard_dep_present(&out)); assert!(out.contains("serde = \"1\"")); + // Round-trips through a parser (proves it is not a duplicate-key file). let doc = out.parse::().unwrap(); assert_eq!(doc["dependencies"][GUARD_CRATE].as_str(), Some("3.3")); + // The guard lives in the SAME (inline) table as serde — there is exactly + // one `dependencies` key, still inline. + assert!(doc["dependencies"].is_inline_table()); } #[test] @@ -269,6 +291,99 @@ mod tests { assert_eq!(res.status, CargoSetupStatus::AlreadyConfigured); } + #[test] + fn test_add_to_virtual_workspace_manifest_is_error() { + // A virtual manifest (`[workspace]`, no `[package]`) cannot hold a + // `[dependencies]` section — cargo refuses to parse it. `add` must NOT + // produce such a file; it errors instead so `setup` surfaces the problem + // rather than silently corrupting the workspace root. + let toml = "[workspace]\nmembers = [\"crates/*\"]\n"; + let err = guard_dep_add(toml, "3.3").unwrap_err(); + assert!( + err.contains("virtual workspace manifest"), + "expected a virtual-manifest error, got: {err}" + ); + // The async wrapper reports it as Error, not a (corrupting) Updated. + // (Covered indirectly; the pure transform is the contract.) + } + + #[test] + fn test_add_to_root_package_with_workspace_is_allowed() { + // A *root package* (`[package]` AND `[workspace]`) is a real crate and + // CAN carry `[dependencies]` — the virtual-manifest guard must not reject + // it. This is the common single-repo-with-root-crate layout. + let toml = "[package]\nname = \"root\"\nversion = \"0.1.0\"\n\n[workspace]\nmembers = [\"crates/*\"]\n"; + let out = guard_dep_add(toml, "3.3").unwrap().unwrap(); + assert!(is_guard_dep_present(&out)); + // The produced manifest still parses (no duplicate/invalid section). + assert!(out.parse::().is_ok()); + } + + #[test] + fn test_add_into_root_inline_does_not_fork_a_second_table() { + // Regression guard: inserting into a root-level inline `dependencies` + // must mutate THAT table, never append a separate `[dependencies]` + // header (which would be a duplicate key → unparseable). + let toml = "dependencies = { serde = \"1\" }\n"; + let out = guard_dep_add(toml, "3.3").unwrap().unwrap(); + assert_eq!( + out.matches("dependencies").count(), + 1, + "must not fork a second dependencies table: {out}" + ); + assert!(out.parse::().is_ok(), "must stay valid TOML: {out}"); + } + + #[test] + fn test_add_then_remove_round_trips_byte_for_byte() { + // add into an existing `[dependencies]`, then remove, must restore the + // original manifest exactly (formatting + comments preserved). + let toml = "# top\n[package]\nname = \"x\"\n\n[dependencies]\nserde = \"1\" # json\n"; + let added = guard_dep_add(toml, "3.3").unwrap().unwrap(); + let removed = guard_dep_remove(&added).unwrap().unwrap(); + assert_eq!(removed, toml, "add→remove must round-trip byte-for-byte"); + } + + #[test] + fn test_dotted_guard_header_is_present_and_removable() { + // The guard pinned via a `[dependencies.socket-patch-guard]` section + // header (a sub-table) must be detected AND actually removed — not a + // silent no-op that leaves it behind. + let toml = "[dependencies.socket-patch-guard]\nversion = \"3.3\"\nfeatures = [\"x\"]\n"; + assert!(is_guard_dep_present(toml)); + // Idempotent add (already configured). + assert!(guard_dep_add(toml, "3.3").unwrap().is_none()); + let out = guard_dep_remove(toml).unwrap().unwrap(); + assert!(!is_guard_dep_present(&out), "dotted guard must be removed"); + } + + #[tokio::test] + async fn test_remove_dry_run_does_not_write() { + // The remove dry-run branch was previously untested. + let dir = tempfile::tempdir().unwrap(); + let cargo = dir.path().join("Cargo.toml"); + let body = "[dependencies]\nsocket-patch-guard = \"3.3\"\nserde = \"1\"\n"; + tokio::fs::write(&cargo, body).await.unwrap(); + let res = remove_guard_dep(&cargo, true).await; + assert_eq!(res.status, CargoSetupStatus::Updated); + let on_disk = tokio::fs::read_to_string(&cargo).await.unwrap(); + assert_eq!(on_disk, body, "dry-run must not modify the file"); + } + + #[tokio::test] + async fn test_add_to_virtual_manifest_wrapper_reports_error_without_writing() { + // End-to-end: the async wrapper turns the virtual-manifest refusal into + // an Error result and leaves the file byte-for-byte unchanged. + let dir = tempfile::tempdir().unwrap(); + let cargo = dir.path().join("Cargo.toml"); + let body = "[workspace]\nmembers = [\"a\", \"b\"]\n"; + tokio::fs::write(&cargo, body).await.unwrap(); + let res = add_guard_dep(&cargo, "3.3", false).await; + assert_eq!(res.status, CargoSetupStatus::Error); + let on_disk = tokio::fs::read_to_string(&cargo).await.unwrap(); + assert_eq!(on_disk, body, "must not corrupt the virtual manifest"); + } + #[tokio::test] async fn test_add_dry_run_does_not_write() { let dir = tempfile::tempdir().unwrap(); @@ -285,3 +400,5 @@ mod tests { ); } } + + diff --git a/crates/socket-patch-core/src/composer_setup/mod.rs b/crates/socket-patch-core/src/composer_setup/mod.rs index 667d9c2..8aacac0 100644 --- a/crates/socket-patch-core/src/composer_setup/mod.rs +++ b/crates/socket-patch-core/src/composer_setup/mod.rs @@ -168,10 +168,18 @@ fn composer_add(content: &str) -> Result, String> { fn composer_remove(content: &str) -> Result, String> { let mut doc: Value = serde_json::from_str(content).map_err(|e| format!("Invalid composer.json: {e}"))?; - let root = match doc.as_object_mut() { - Some(r) => r, - None => return Ok(None), - }; + if !doc.is_object() { + return Err("Invalid composer.json: root is not a JSON object".to_string()); + } + // Mirror `composer_add`: a present-but-non-object `scripts` is malformed and + // must error, not be silently swallowed as a "nothing to remove" no-op. + if let Some(scripts) = doc.get("scripts") { + if !scripts.is_null() && !scripts.is_object() { + return Err("Invalid composer.json: \"scripts\" is not a JSON object".to_string()); + } + } + let root = doc.as_object_mut().unwrap(); + // An absent (or `null`) `scripts` is a legitimate no-op: nothing of ours. let scripts = match root.get_mut("scripts").and_then(Value::as_object_mut) { Some(s) => s, None => return Ok(None), @@ -442,4 +450,120 @@ mod tests { let dir = tempfile::tempdir().unwrap(); assert!(discover_composer_project(dir.path()).await.is_none()); } + + #[test] + fn test_remove_round_trip_with_other_user_scripts() { + // add then remove restores a composer.json that already had unrelated + // scripts, byte-for-byte (our two events are added and then pruned). + let inp = "{\n \"name\": \"x\",\n \"scripts\": {\n \"test\": \"phpunit\"\n }\n}\n"; + let added = composer_add(inp).unwrap().unwrap(); + let removed = composer_remove(&added).unwrap().unwrap(); + assert_eq!(removed, inp, "round-trip with user scripts"); + } + + #[test] + fn test_remove_non_object_root_is_error() { + // Regression: composer_remove must reject a malformed (non-object) root + // with an error, not silently report "nothing to remove" — matching + // composer_add and the npm `remove_package_json_content` contract. + let err = composer_remove("[1, 2, 3]").unwrap_err(); + assert!(err.contains("root is not a JSON object"), "got: {err}"); + assert!(composer_remove("\"just a string\"").is_err()); + assert!(composer_remove("42").is_err()); + } + + #[test] + fn test_remove_non_object_scripts_is_error() { + // Regression: a present-but-non-object `scripts` is malformed. `setup` + // (composer_add) errors on it; `setup --remove` must too, rather than + // silently swallowing it as a no-op success. + let err = composer_remove("{\"scripts\": \"oops\"}").unwrap_err(); + assert!(err.contains("\"scripts\" is not a JSON object"), "got: {err}"); + assert!(composer_remove("{\"scripts\": 7}").is_err()); + assert!(composer_remove("{\"scripts\": [\"a\"]}").is_err()); + // add and remove agree on what counts as malformed. + assert!(composer_add("{\"scripts\": \"oops\"}").is_err()); + } + + #[test] + fn test_remove_absent_or_null_scripts_is_noop_not_error() { + // A genuinely absent or null `scripts` has nothing of ours: no-op, not + // an error (the malformed-input guard must not over-trigger). + assert!(composer_remove("{\"name\": \"x\"}").unwrap().is_none()); + assert!(composer_remove("{\"scripts\": null}").unwrap().is_none()); + } + + #[test] + fn test_exhaustive_invariants() { + let event_values = [ + None, + Some(format!("\"{APPLY_COMMAND}\"")), + Some("\"@php artisan\"".to_string()), + Some(format!("[\"{APPLY_COMMAND}\"]")), + Some("[\"@php artisan\"]".to_string()), + Some(format!("[\"@php artisan\",\"{APPLY_COMMAND}\"]")), + Some("[]".to_string()), + ]; + for a in &event_values { + for b in &event_values { + let mut parts = vec![]; + if let Some(v) = a { + parts.push(format!("\"post-install-cmd\":{v}")); + } + if let Some(v) = b { + parts.push(format!("\"post-update-cmd\":{v}")); + } + let json = format!("{{\"scripts\":{{{}}}}}", parts.join(",")); + + // add is idempotent + let after_add = match composer_add(&json).unwrap() { + Some(out) => { + assert!(is_hook_present(&out), "add changed but not present:\n{json}\n{out}"); + assert!( + composer_add(&out).unwrap().is_none(), + "add NOT idempotent:\n{json}\n{out}" + ); + out + } + None => json.clone(), + }; + + // after a full add, both events must carry our command + if composer_add(&json).unwrap().is_some() { + assert!(is_hook_present(&after_add)); + } + + // remove undoes add, and remove is idempotent + if let Some(rem) = composer_remove(&after_add).unwrap() { + assert!( + composer_remove(&rem).unwrap().is_none(), + "remove NOT idempotent:\n{after_add}\n{rem}" + ); + } + } + } + } + + #[test] + fn test_add_then_check_consistency() { + // For every input where add reports a change, is_hook_present must be true. + let inputs = [ + BASIC, + "{\"scripts\":{\"post-install-cmd\":\"@php artisan\"}}", + "{\"scripts\":{\"post-install-cmd\":[\"a\",\"b\"]}}", + "{\"scripts\":{}}", + "{\"scripts\":null}", + "{}", + ]; + for inp in inputs { + if let Some(out) = composer_add(inp).unwrap() { + assert!(is_hook_present(&out), "add changed but check false for {inp}\n{out}"); + // second add is a no-op + assert!(composer_add(&out).unwrap().is_none(), "not idempotent for {inp}"); + // remove undoes + let rem = composer_remove(&out).unwrap().unwrap(); + assert!(!is_hook_present(&rem), "remove left hook for {inp}\n{rem}"); + } + } + } } diff --git a/crates/socket-patch-core/src/crawlers/cargo_crawler.rs b/crates/socket-patch-core/src/crawlers/cargo_crawler.rs index f8555fd..449cdaa 100644 --- a/crates/socket-patch-core/src/crawlers/cargo_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/cargo_crawler.rs @@ -82,13 +82,28 @@ fn parse_table_header(line: &str) -> Option<&str> { } /// Extract a quoted string value from a `key = "value"` line. +/// +/// Handles both TOML string flavors that Cargo accepts for `name` / +/// `version`: basic strings (`"..."`) and literal strings (`'...'`). +/// A too-strict double-quote-only match would silently drop a crate +/// whose manifest uses single quotes — and in the vendor layout, where +/// the directory name carries no version, that crate would become +/// undiscoverable (and thus unpatchable). fn extract_string_value(line: &str, key: &str) -> Option { let rest = line.strip_prefix(key)?; let rest = rest.trim_start(); let rest = rest.strip_prefix('=')?; let rest = rest.trim_start(); - let rest = rest.strip_prefix('"')?; - let end = rest.find('"')?; + // The value must open with a quote of one kind; the matching close + // is the next quote of the *same* kind (literal strings have no + // escapes, and basic strings used for name/version never contain an + // escaped quote in practice). + let quote = match rest.chars().next()? { + c @ ('"' | '\'') => c, + _ => return None, + }; + let rest = &rest[1..]; + let end = rest.find(quote)?; Some(rest[..end].to_string()) } @@ -830,6 +845,103 @@ version = "fake" assert!(parse_cargo_toml_name_version(content).is_none()); } + // --- regression: single-quoted (literal) string values ------------- + + /// TOML literal strings use single quotes and are valid in a + /// `Cargo.toml`. The minimal parser must read `name`/`version` from + /// them just as it does from basic (double-quoted) strings. + #[test] + fn test_parse_cargo_toml_single_quoted_values() { + let content = "[package]\nname = 'serde'\nversion = '1.0.200'\n"; + let (name, version) = parse_cargo_toml_name_version(content).unwrap(); + assert_eq!(name, "serde"); + assert_eq!(version, "1.0.200"); + } + + /// A manifest may legally mix the two string flavors. + #[test] + fn test_parse_cargo_toml_mixed_quote_values() { + let content = "[package]\nname = 'tokio'\nversion = \"1.38.0\"\n"; + let (name, version) = parse_cargo_toml_name_version(content).unwrap(); + assert_eq!(name, "tokio"); + assert_eq!(version, "1.38.0"); + } + + /// A `#` inside the closing-quote pair is part of the value; a + /// trailing comment after the literal string is ignored. (The `'` + /// flavor must find its matching `'`, not a stray `"`.) + #[test] + fn test_parse_cargo_toml_single_quoted_with_comment() { + let content = "[package]\nname = 'serde' # the lib\nversion = '1.0.200'\n"; + let (name, version) = parse_cargo_toml_name_version(content).unwrap(); + assert_eq!(name, "serde"); + assert_eq!(version, "1.0.200"); + } + + /// `version.workspace = true` must still short-circuit to `None` + /// regardless of the quote-handling change (no quotes are involved). + #[test] + fn test_parse_cargo_toml_workspace_still_none_after_quote_fix() { + let content = "[package]\nname = 'my-crate'\nversion.workspace = true\n"; + assert!(parse_cargo_toml_name_version(content).is_none()); + } + + /// End-to-end: a vendored crate whose `Cargo.toml` uses single-quoted + /// values must still be located by `find_by_purls`. The vendor + /// directory name (`serde`) carries no version, so the version can + /// only come from the manifest — this is the layout where the + /// double-quote-only bug made the crate undiscoverable. + #[tokio::test] + async fn test_find_by_purls_vendor_single_quoted_manifest() { + let dir = tempfile::tempdir().unwrap(); + let serde_dir = dir.path().join("serde"); + tokio::fs::create_dir_all(&serde_dir).await.unwrap(); + tokio::fs::write( + serde_dir.join("Cargo.toml"), + "[package]\nname = 'serde'\nversion = '1.0.200'\n", + ) + .await + .unwrap(); + + let crawler = CargoCrawler::new(); + let purls = vec!["pkg:cargo/serde@1.0.200".to_string()]; + let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap(); + + assert_eq!(result.len(), 1); + assert!(result.contains_key("pkg:cargo/serde@1.0.200")); + assert_eq!(result["pkg:cargo/serde@1.0.200"].version, "1.0.200"); + } + + /// End-to-end via `crawl_all`: a single-quoted registry manifest is + /// parsed from the manifest (not just the dir name), proving the + /// value is read rather than recovered by the dir-name fallback. + #[tokio::test] + async fn test_crawl_all_single_quoted_manifest() { + let dir = tempfile::tempdir().unwrap(); + // Dir name deliberately disagrees with the manifest version so a + // pass can only come from reading the single-quoted manifest. + let crate_dir = dir.path().join("serde-9.9.9"); + tokio::fs::create_dir_all(&crate_dir).await.unwrap(); + tokio::fs::write( + crate_dir.join("Cargo.toml"), + "[package]\nname = 'serde'\nversion = '1.0.200'\n", + ) + .await + .unwrap(); + + let crawler = CargoCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1); + assert_eq!(packages[0].purl, "pkg:cargo/serde@1.0.200"); + } + // --- regression: dir-name version splitting ------------------------ /// A numeric pre-release segment (legal SemVer) must stay part of the diff --git a/crates/socket-patch-core/src/crawlers/composer_crawler.rs b/crates/socket-patch-core/src/crawlers/composer_crawler.rs index eba76df..3cb673a 100644 --- a/crates/socket-patch-core/src/crawlers/composer_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/composer_crawler.rs @@ -88,16 +88,28 @@ impl ComposerCrawler { // version (often `v6.4.1`); PURLs use the bare numeric // version, so normalize before building the PURL. let version = normalize_version(&entry.version).to_string(); - let purl = crate::utils::purl::build_composer_purl(namespace, name, &version); + + // Composer/Packagist treat package names + // case-insensitively and the canonical PURL is + // lowercase, but installed.json records the *pretty* + // (case-preserved) name. Lowercase the namespace/name + // for the PURL so it matches the canonical form Socket's + // catalog uses; the on-disk `path` keeps the original + // casing (Composer writes the vendor dir with the pretty + // name, which matters on case-sensitive filesystems). + let ns_canon = namespace.to_ascii_lowercase(); + let name_canon = name.to_ascii_lowercase(); + let purl = + crate::utils::purl::build_composer_purl(&ns_canon, &name_canon, &version); if !seen.insert(purl.clone()) { continue; } packages.push(CrawledPackage { - name: name.to_string(), + name: name_canon, version, - namespace: Some(namespace.to_string()), + namespace: Some(ns_canon), purl, path: pkg_path, }); @@ -116,40 +128,60 @@ impl ComposerCrawler { ) -> Result, std::io::Error> { let mut result: HashMap = HashMap::new(); - // Build a name -> version lookup from installed.json + // Build a case-insensitive lookup from installed.json. Composer + // package names are case-insensitive and the canonical PURL is + // lowercase, but installed.json records the *pretty* (case-preserved) + // name and Composer writes the vendor directory with that same + // casing. Key the map by the lowercased name and carry the original + // name so the real on-disk path can be reconstructed even on + // case-sensitive filesystems. let entries = read_installed_json(vendor_path).await; - let installed: HashMap = - entries.into_iter().map(|e| (e.name, e.version)).collect(); + let installed: HashMap = entries + .into_iter() + .map(|e| (e.name.to_ascii_lowercase(), (e.name, e.version))) + .collect(); for purl in purls { if let Some(((namespace, name), version)) = crate::utils::purl::parse_composer_purl(purl) { - let full_name = format!("{namespace}/{name}"); - let pkg_dir = vendor_path.join(namespace).join(name); + let full_name = format!("{namespace}/{name}").to_ascii_lowercase(); - if !is_dir(&pkg_dir).await { + let Some((installed_name, installed_version)) = installed.get(&full_name) else { continue; - } + }; // Verify version matches installed.json. Compare on the // normalized version so a `v`-prefixed installed.json // version (`v6.4.1`) matches a bare PURL version (`6.4.1`) // and vice versa. - if let Some(installed_version) = installed.get(&full_name) { - if normalize_version(installed_version) == normalize_version(version) { - result.insert( - purl.clone(), - CrawledPackage { - name: name.to_string(), - version: version.to_string(), - namespace: Some(namespace.to_string()), - purl: purl.clone(), - path: pkg_dir, - }, - ); - } + if normalize_version(installed_version) != normalize_version(version) { + continue; } + + // Resolve the on-disk directory using the original casing + // recorded in installed.json, which is what Composer wrote to + // disk — the canonical (lowercase) PURL name would miss it on + // a case-sensitive filesystem. + let pkg_dir = match installed_name.split_once('/') { + Some((ns, n)) => vendor_path.join(ns).join(n), + None => continue, + }; + + if !is_dir(&pkg_dir).await { + continue; + } + + result.insert( + purl.clone(), + CrawledPackage { + name: name.to_ascii_lowercase(), + version: version.to_string(), + namespace: Some(namespace.to_ascii_lowercase()), + purl: purl.clone(), + path: pkg_dir, + }, + ); } } @@ -796,6 +828,85 @@ mod tests { assert_eq!(packages[0].purl, "pkg:composer/symfony/console@6.4.1"); } + #[tokio::test] + async fn test_crawl_all_canonicalizes_uppercase_name_to_lowercase_purl() { + // Composer/Packagist treat package names case-insensitively and the + // canonical PURL is lowercase, but installed.json records the pretty + // (case-preserved) name. crawl_all must emit a lowercase canonical + // PURL so it matches Socket's catalog — otherwise an uppercase pretty + // name silently produces an unmatchable PURL and the vuln is missed. + let dir = tempfile::tempdir().unwrap(); + let vendor_dir = dir.path().join("vendor"); + + let composer_dir = vendor_dir.join("composer"); + tokio::fs::create_dir_all(&composer_dir).await.unwrap(); + tokio::fs::write( + composer_dir.join("installed.json"), + r#"{"packages": [{"name": "Foo/Bar", "version": "1.0.0"}]}"#, + ) + .await + .unwrap(); + // Composer writes the vendor directory using the pretty (case- + // preserved) name. + tokio::fs::create_dir_all(vendor_dir.join("Foo").join("Bar")) + .await + .unwrap(); + tokio::fs::write(dir.path().join("composer.json"), "{}") + .await + .unwrap(); + + let crawler = ComposerCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: None, + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1); + // PURL, name and namespace are the canonical lowercase form... + assert_eq!(packages[0].purl, "pkg:composer/foo/bar@1.0.0"); + assert_eq!(packages[0].name, "bar"); + assert_eq!(packages[0].namespace, Some("foo".to_string())); + // ...but the on-disk path keeps the original casing Composer wrote. + assert_eq!(packages[0].path, vendor_dir.join("Foo").join("Bar")); + } + + #[tokio::test] + async fn test_find_by_purls_canonical_purl_matches_case_preserved_install() { + // A canonical (lowercase) PURL must resolve a package whose + // installed.json name and on-disk directory carry uppercase letters. + // The lookup is case-insensitive and the on-disk path is rebuilt from + // the original installed.json casing so it resolves even on a + // case-sensitive filesystem. + let dir = tempfile::tempdir().unwrap(); + let vendor_dir = dir.path().join("vendor"); + + let composer_dir = vendor_dir.join("composer"); + tokio::fs::create_dir_all(&composer_dir).await.unwrap(); + tokio::fs::write( + composer_dir.join("installed.json"), + r#"{"packages": [{"name": "Foo/Bar", "version": "1.0.0"}]}"#, + ) + .await + .unwrap(); + tokio::fs::create_dir_all(vendor_dir.join("Foo").join("Bar")) + .await + .unwrap(); + + let crawler = ComposerCrawler::new(); + let purls = vec!["pkg:composer/foo/bar@1.0.0".to_string()]; + let result = crawler.find_by_purls(&vendor_dir, &purls).await.unwrap(); + + assert_eq!(result.len(), 1); + let pkg = result.get("pkg:composer/foo/bar@1.0.0").unwrap(); + // The resolved path points at the real (case-preserved) directory. + assert_eq!(pkg.path, vendor_dir.join("Foo").join("Bar")); + assert_eq!(pkg.namespace, Some("foo".to_string())); + assert_eq!(pkg.name, "bar"); + } + #[tokio::test] async fn test_find_by_purls_version_mismatch() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/crawlers/deno_crawler.rs b/crates/socket-patch-core/src/crawlers/deno_crawler.rs index 2014fc1..6a275ef 100644 --- a/crates/socket-patch-core/src/crawlers/deno_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/deno_crawler.rs @@ -419,6 +419,83 @@ mod tests { assert_eq!(entry.namespace.as_deref(), Some("@std")); } + #[tokio::test] + async fn find_by_purls_skips_when_version_path_is_a_file() { + // Malformed layout: the `//` leaf is a + // regular file, not the expected version directory. The `is_dir` + // gate must reject it rather than emit a CrawledPackage whose + // `path` points at a non-directory. + let tmp = tempfile::tempdir().unwrap(); + let name_dir = tmp.path().join("@std").join("path"); + tokio::fs::create_dir_all(&name_dir).await.unwrap(); + tokio::fs::write(name_dir.join("0.220.0"), b"not a dir") + .await + .unwrap(); + + let crawler = DenoCrawler; + let result = crawler + .find_by_purls(tmp.path(), &["pkg:jsr/@std/path@0.220.0".to_string()]) + .await + .unwrap(); + assert!( + result.is_empty(), + "a file at the version path must not resolve, got {result:?}" + ); + } + + #[tokio::test] + async fn scan_tolerates_malformed_tree_without_emitting_phantoms() { + // A grab-bag of malformed shapes that must all be skipped without + // panicking: an empty scope dir, a scoped package with no version + // dirs, and a non-`@` top-level dir holding a version-shaped tree. + let tmp = tempfile::tempdir().unwrap(); + // The one real package. + stage(tmp.path(), "@std", "path", "0.220.0").await; + // Empty scope dir — no name children. + tokio::fs::create_dir_all(tmp.path().join("@empty")) + .await + .unwrap(); + // Scoped package whose name dir has no version children. + tokio::fs::create_dir_all(tmp.path().join("@std").join("nover")) + .await + .unwrap(); + // Non-`@` top-level dir with an otherwise-valid-looking subtree. + tokio::fs::create_dir_all(tmp.path().join("bare").join("pkg").join("1.0.0")) + .await + .unwrap(); + + let mut seen = HashSet::new(); + let mut out = Vec::new(); + scan_jsr_cache(tmp.path(), &mut seen, &mut out).await; + + assert_eq!(out.len(), 1, "got {:?}", out); + assert_eq!(out[0].purl, "pkg:jsr/@std/path@0.220.0"); + } + + #[tokio::test] + #[serial_test::serial] + async fn crawl_all_local_without_marker_returns_empty() { + // crawl_all in LOCAL mode (no global / no prefix) must yield + // nothing when the cwd has no Deno project marker, even if a + // populated cache is reachable via DENO_DIR. Guards the + // project-marker gate wiring through crawl_all, not just + // get_jsr_cache_paths in isolation. + let project = tempfile::tempdir().unwrap(); + let deno_home = tempfile::tempdir().unwrap(); + let jsr = deno_home.path().join("npm").join("jsr.io"); + stage(&jsr, "@std", "path", "0.220.0").await; + let _g = EnvGuard::set("DENO_DIR", deno_home.path().to_str().unwrap()); + + let crawler = DenoCrawler; + let opts = CrawlerOptions { + cwd: project.path().to_path_buf(), // no deno.json/.jsonc/.lock + global: false, + global_prefix: None, + batch_size: 100, + }; + assert!(crawler.crawl_all(&opts).await.is_empty()); + } + #[tokio::test] async fn find_by_purls_skips_absent_version_keeps_present() { let tmp = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/crawlers/go_crawler.rs b/crates/socket-patch-core/src/crawlers/go_crawler.rs index 3f5a9ca..8994198 100644 --- a/crates/socket-patch-core/src/crawlers/go_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/go_crawler.rs @@ -821,4 +821,158 @@ mod tests { assert_eq!(pkg.name, "bar"); assert_eq!(pkg.version, "v1.0.0-RC1"); } + + #[tokio::test] + async fn test_crawl_finds_v2_submodule_beside_v1() { + // A `/vN` major-version submodule lives at + // `/v2@/`, which forces a *plain* `` directory to + // exist alongside the versioned `@` leaf. The walk must + // descend into the plain `bar/` dir (no `@`) to reach `v2@v2.0.0` + // while still parsing the sibling `bar@v1.0.0` leaf — i.e. hitting + // a versioned directory must not abort the walk of its siblings. + let dir = tempfile::tempdir().unwrap(); + + let v1 = dir + .path() + .join("github.com") + .join("foo") + .join("bar@v1.0.0"); + tokio::fs::create_dir_all(&v1).await.unwrap(); + + let v2 = dir + .path() + .join("github.com") + .join("foo") + .join("bar") + .join("v2@v2.0.0"); + tokio::fs::create_dir_all(&v2).await.unwrap(); + + let crawler = GoCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + let purls: HashSet<_> = packages.iter().map(|p| p.purl.as_str()).collect(); + assert_eq!(packages.len(), 2, "both v1 leaf and v2 submodule found"); + assert!(purls.contains("pkg:golang/github.com/foo/bar@v1.0.0")); + assert!(purls.contains("pkg:golang/github.com/foo/bar/v2@v2.0.0")); + } + + #[tokio::test] + async fn test_crawl_finds_multiple_versions_of_same_module() { + // Two versions of one module are distinct sibling directories and + // must both surface as separate packages (dedup keys on the full + // versioned PURL, not the module path). + let dir = tempfile::tempdir().unwrap(); + + for v in ["gin@v1.9.0", "gin@v1.9.1"] { + let d = dir.path().join("github.com").join("gin-gonic").join(v); + tokio::fs::create_dir_all(&d).await.unwrap(); + } + + let crawler = GoCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + let purls: HashSet<_> = packages.iter().map(|p| p.purl.as_str()).collect(); + assert_eq!(packages.len(), 2); + assert!(purls.contains("pkg:golang/github.com/gin-gonic/gin@v1.9.0")); + assert!(purls.contains("pkg:golang/github.com/gin-gonic/gin@v1.9.1")); + } + + #[test] + fn test_parse_versioned_dir_empty_version_guard() { + // A dir name with a trailing `@` and no version (`foo@`) is + // malformed metadata: the empty-version guard must yield None + // rather than emit a package with an empty version that would + // build a dangling `pkg:golang/foo@` PURL. + let base = std::path::Path::new("/cache"); + let dir = std::path::Path::new("/cache/github.com/foo/bar@"); + let mut seen = HashSet::new(); + let crawler = GoCrawler; + let result = crawler.parse_versioned_dir(base, dir, "bar@", &mut seen); + assert!(result.is_none(), "empty version must yield None"); + } + + #[tokio::test] + async fn test_find_by_purls_qualified_purl_keys_by_input() { + // A PURL carrying `?` qualifiers must still resolve the on-disk + // dir (qualifiers stripped before parsing) AND be keyed in the + // result map by the *exact* input string the caller passed. + let dir = tempfile::tempdir().unwrap(); + let module_dir = dir + .path() + .join("github.com") + .join("gin-gonic") + .join("gin@v1.9.1"); + tokio::fs::create_dir_all(&module_dir).await.unwrap(); + + let crawler = GoCrawler::new(); + let qualified = "pkg:golang/github.com/gin-gonic/gin@v1.9.1?type=module".to_string(); + let result = crawler + .find_by_purls(dir.path(), &[qualified.clone()]) + .await + .unwrap(); + + assert_eq!(result.len(), 1); + assert!(result.contains_key(&qualified)); + assert_eq!(result[&qualified].name, "gin"); + } + + #[tokio::test] + async fn test_find_by_purls_absent_returns_empty_ok() { + // No matching directory on disk → Ok(empty map), never an Err. + let dir = tempfile::tempdir().unwrap(); + let crawler = GoCrawler::new(); + let result = crawler + .find_by_purls( + dir.path(), + &["pkg:golang/github.com/none/here@v0.0.1".to_string()], + ) + .await + .unwrap(); + assert!(result.is_empty()); + } + + #[tokio::test] + async fn test_crawl_ignores_stray_file_with_at_sign() { + // Only directories are modules. A stray *file* whose name contains + // `@` at the cache root (e.g. a leftover lock/marker) must not be + // parsed into a ghost package. + let dir = tempfile::tempdir().unwrap(); + + let real = dir + .path() + .join("github.com") + .join("gin-gonic") + .join("gin@v1.9.1"); + tokio::fs::create_dir_all(&real).await.unwrap(); + tokio::fs::write(dir.path().join("stray@v0.0.0"), b"junk") + .await + .unwrap(); + + let crawler = GoCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let packages = crawler.crawl_all(&options).await; + assert_eq!(packages.len(), 1, "the stray file must be ignored"); + assert_eq!( + packages[0].purl, + "pkg:golang/github.com/gin-gonic/gin@v1.9.1" + ); + } } diff --git a/crates/socket-patch-core/src/crawlers/maven_crawler.rs b/crates/socket-patch-core/src/crawlers/maven_crawler.rs index 8e26c59..0879244 100644 --- a/crates/socket-patch-core/src/crawlers/maven_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/maven_crawler.rs @@ -59,19 +59,59 @@ fn strip_comment_spans(line: &str, in_comment: &mut bool) -> String { } } -/// Does the opening tag for `element` on this line self-close -/// (e.g. `` or ``)? Such a tag opens -/// and closes in one shot and must not change the skip-section depth. -fn opening_tag_self_closes(line: &str, element: &str) -> bool { - let open = format!("<{element}"); - let Some(pos) = line.find(&open) else { - return false; - }; - let after = &line[pos + open.len()..]; - match after.find('>') { - Some(gt) => after[..gt].trim_end().ends_with('/'), - None => false, +/// Find the first *real* opening tag for `element` on this line and report +/// whether it self-closes (`Some(true)` for ``, `Some(false)` +/// for a plain `` or ``); `None` if there +/// is no opening tag at all. +/// +/// "Real" means there is a tag boundary immediately after the element name — +/// `>`, `/`, whitespace, or end-of-line. This is critical: a bare substring +/// match would prefix-match a *different* element such as `` as if +/// it opened ``. Because the corresponding close `` never +/// equals ``, that phantom open would never be matched by a close and +/// would leak the entire remainder of the document into the skip section, +/// dropping the project's real coordinates. +fn opening_tag(line: &str, element: &str) -> Option { + let needle = format!("<{element}"); + let mut from = 0; + while let Some(rel) = line[from..].find(&needle) { + let pos = from + rel; + let after = &line[pos + needle.len()..]; + match after.chars().next() { + // Tag name runs to the end of the line (attributes continue on the + // next line): a real, non-self-closing open. + None => return Some(false), + Some(c) if c == '>' || c == '/' || c.is_whitespace() => { + let self_closes = match after.find('>') { + Some(gt) => after[..gt].trim_end().ends_with('/'), + None => false, + }; + return Some(self_closes); + } + // Prefix match of a longer name (``): keep scanning for + // a genuine ``/``/`` later on the line. + _ => from = pos + needle.len(), + } } + None +} + +/// Does this line contain a *real* closing tag `` (tolerating +/// whitespace before `>`, e.g. ``)? The boundary `>` is required, so +/// `` is not treated as a close of `` — mirroring the +/// boundary discipline of [`opening_tag`]. +fn contains_closing_tag(line: &str, element: &str) -> bool { + let needle = format!("') { + return true; + } + from = pos + needle.len(); + } + false } /// Parse `groupId`, `artifactId`, and `version` from a POM XML file. @@ -113,11 +153,10 @@ pub fn parse_pom_group_artifact_version(content: &str) -> Option<(String, String // (``) leaves the depth unchanged; only a lone open // increments and a lone close decrements. for section in &skip_sections { - let open_tag = format!("<{section}"); - let close_tag = format!(""); - let has_open = trimmed.contains(&open_tag); - let has_close = trimmed.contains(&close_tag); - if has_open && !has_close && !opening_tag_self_closes(trimmed, section) { + let open = opening_tag(trimmed, section); + let has_open = open.is_some(); + let has_close = contains_closing_tag(trimmed, section); + if has_open && !has_close && open != Some(true) { skip_depth += 1; } else if has_close && !has_open { skip_depth = skip_depth.saturating_sub(1); @@ -130,14 +169,15 @@ pub fn parse_pom_group_artifact_version(content: &str) -> Option<(String, String // Track parent section (a self-closing `` carries no // coordinates, so it never opens a parent block). - if trimmed.contains("") { + if contains_closing_tag(trimmed, "parent") { in_parent = false; continue; } @@ -832,6 +872,121 @@ mod tests { assert_eq!(pkgs[0].namespace, Some("com.example".to_string())); } + #[test] + fn test_parse_pom_foreign_element_prefixed_with_skip_name() { + // REGRESSION: a top-level element whose name merely *starts with* a + // skip-section name (here `` vs the `build` skip section, + // and `` vs `modules`) must NOT open a skip section. + // + // The opening match was a bare substring (`` + // matched as an open; its close `` never equals ``, + // so the phantom open never balanced and `skip_depth` stayed >0 for the + // rest of the file — swallowing the project's real coordinates. + let content = r#" + com.example + ci-metadata + x + my-app + 1.0.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "com.example"); + assert_eq!(a, "my-app"); + assert_eq!(v, "1.0.0"); + } + + #[test] + fn test_parse_pom_foreign_prefixed_element_does_not_swallow_trailing_coords() { + // The decoy element appears BEFORE all coordinates, so if it wrongly + // opened a skip section every coordinate would be lost and parse would + // return None instead of the real package. + let content = r#" + aggregator-notes + com.example + my-app + 2.5.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "com.example"); + assert_eq!(a, "my-app"); + assert_eq!(v, "2.5.0"); + } + + #[test] + fn test_parse_pom_skip_section_close_tag_with_whitespace() { + // XML permits whitespace before `>` in a closing tag (``). + // The exact `` match used to miss it, leaving `build` open and + // leaking the plugin's coordinates. The boundary-aware close handles it. + let content = r#" + com.example + my-app + + + + org.leak + leak-plugin + 9.9.9 + + + + 1.0.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "com.example"); + assert_eq!(a, "my-app"); + assert_eq!(v, "1.0.0"); + } + + #[test] + fn test_parse_pom_parent_block_with_foreign_prefixed_child() { + // A `` decoy must not be mistaken for opening the real + // `` block, and the real `` groupId must still be the + // fallback when the project omits its own groupId. + let content = r#" + https://example.com + + org.apache + apache + 30 + + commons-lang3 + 3.12.0 +"#; + let (g, a, v) = parse_pom_group_artifact_version(content).unwrap(); + assert_eq!(g, "org.apache"); + assert_eq!(a, "commons-lang3"); + assert_eq!(v, "3.12.0"); + } + + // ---- opening_tag / contains_closing_tag boundary tests ---- + + #[test] + fn test_opening_tag_boundary() { + // Real opening tags. + assert_eq!(opening_tag("", "build"), Some(false)); + assert_eq!(opening_tag(" ", "build"), Some(false)); + assert_eq!(opening_tag("", "build"), Some(true)); + assert_eq!(opening_tag("", "build"), Some(true)); + // Attribute list spilling onto the next line — name at end of line. + assert_eq!(opening_tag("", "build"), None); + assert_eq!(opening_tag("x", "modules"), None); + // Close tags are not opens. + assert_eq!(opening_tag("", "build"), None); + // A genuine open later on a line that starts with a decoy prefix. + assert_eq!(opening_tag(" ", "build"), Some(false)); + } + + #[test] + fn test_contains_closing_tag_boundary() { + assert!(contains_closing_tag("", "build")); + assert!(contains_closing_tag("", "build")); // whitespace tolerated + assert!(contains_closing_tag("stuff more", "build")); + assert!(!contains_closing_tag("", "build")); // prefix decoy + assert!(!contains_closing_tag("", "build")); // open is not a close + } + // ---- extract_xml_value tests ---- #[test] diff --git a/crates/socket-patch-core/src/crawlers/npm_crawler.rs b/crates/socket-patch-core/src/crawlers/npm_crawler.rs index 85fd4fa..4d330e3 100644 --- a/crates/socket-patch-core/src/crawlers/npm_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/npm_crawler.rs @@ -780,6 +780,30 @@ mod tests { assert!(NpmCrawler::parse_purl_components("not-a-purl").is_none()); } + /// The `?qualifier` is stripped *before* `rfind('@')` splits the + /// version, so an `@` living inside a qualifier value + /// (`vcs_url=git@github.com:...`) must not be mistaken for the + /// version separator. Reordering those two steps would parse the + /// version as `github.com:...` and break apply/rollback for any + /// PURL whose qualifier carries an `@`. + #[test] + fn test_parse_purl_components_qualifier_with_at_sign() { + let (ns, name, ver) = + NpmCrawler::parse_purl_components("pkg:npm/foo@1.0.0?vcs_url=git@github.com:x/y.git") + .unwrap(); + assert!(ns.is_none()); + assert_eq!(name, "foo"); + assert_eq!(ver, "1.0.0"); + + let (ns, name, ver) = NpmCrawler::parse_purl_components( + "pkg:npm/@types/node@20.0.0?maintainer=a@b.com", + ) + .unwrap(); + assert_eq!(ns.as_deref(), Some("@types")); + assert_eq!(name, "node"); + assert_eq!(ver, "20.0.0"); + } + #[tokio::test] async fn test_read_package_json_valid() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/crawlers/nuget_crawler.rs b/crates/socket-patch-core/src/crawlers/nuget_crawler.rs index c0d0b56..fa42deb 100644 --- a/crates/socket-patch-core/src/crawlers/nuget_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/nuget_crawler.rs @@ -249,6 +249,23 @@ impl NuGetCrawler { let ver_name = ver_entry.file_name(); let ver_str = ver_name.to_string_lossy(); + + // A global-cache name directory contains only *version* + // subdirectories, and a NuGet version always begins with a + // numeric major component (SemVer). A legacy + // `./` package, by contrast, contains content + // folders (`lib/`, `tools/`, `runtimes/`, `build/`, …), none + // of which start with a digit. Without this shape check, a + // legacy package whose content folder happens to verify (e.g. + // a `tools/lib/` tool package missing its top-level `.nuspec`) + // would be misread as a global-cache layout and emitted with a + // garbage `@` version (e.g. `pkg:nuget/Foo.1.0.0@tools`) + // — masking the real `pkg:nuget/Foo@1.0.0` the legacy branch + // would otherwise produce. + if !ver_str.starts_with(|c: char| c.is_ascii_digit()) { + continue; + } + let ver_path = name_dir.join(&*ver_str); if self.verify_nuget_package(&ver_path).await { @@ -785,6 +802,119 @@ mod tests { assert!(super::is_dotnet_project(dir.path()).await); } + /// Regression: a well-formed legacy `./` package that + /// also ships a content folder containing a `lib/` (a common tool / + /// runtime layout, e.g. `tools/lib/`) must still be reported with its + /// real identity. Before the version-shape gate in + /// `scan_global_cache_package`, the content folder verified and was + /// mistaken for a version directory, so the package was emitted as a + /// garbage `pkg:nuget/Foo.1.0.0@tools` and the real + /// `pkg:nuget/Foo@1.0.0` (which the legacy branch would have produced) + /// was lost to the `continue`. + #[tokio::test] + async fn test_legacy_pkg_with_nested_lib_folder_is_not_misparsed() { + let dir = tempfile::tempdir().unwrap(); + + let pkg = dir.path().join("Foo.1.0.0"); + // Top-level marker — this is a valid legacy package. + tokio::fs::create_dir_all(pkg.join("lib")).await.unwrap(); + // A content folder that itself contains a lib/ dir. This is what + // tripped the old global-cache heuristic. + tokio::fs::create_dir_all(pkg.join("tools").join("lib")) + .await + .unwrap(); + + let crawler = NuGetCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let pkgs = crawler.crawl_all(&options).await; + let purls: Vec<&str> = pkgs.iter().map(|p| p.purl.as_str()).collect(); + assert_eq!( + purls, + vec!["pkg:nuget/Foo@1.0.0"], + "legacy package must report its real identity, not a content folder; got {pkgs:?}" + ); + } + + /// Regression companion: a *malformed* legacy package (no top-level + /// `lib/` or `.nuspec`, only a nested verifying content folder) must + /// yield nothing rather than a garbage `@` package. + #[tokio::test] + async fn test_legacy_pkg_missing_marker_with_nested_lib_yields_nothing() { + let dir = tempfile::tempdir().unwrap(); + + let pkg = dir.path().join("Foo.1.0.0"); + tokio::fs::create_dir_all(pkg.join("tools").join("lib")) + .await + .unwrap(); + + let crawler = NuGetCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let pkgs = crawler.crawl_all(&options).await; + assert!( + pkgs.is_empty(), + "an unverifiable legacy dir must not emit a garbage version; got {pkgs:?}" + ); + } + + /// Guard the version-shape gate itself: a genuine global-cache package + /// (whose version dir starts with a digit) must still be discovered, + /// including multiple versions of the same id. + #[tokio::test] + async fn test_global_cache_multi_version_still_discovered() { + let dir = tempfile::tempdir().unwrap(); + + for v in ["13.0.1", "13.0.3"] { + let p = dir.path().join("newtonsoft.json").join(v); + tokio::fs::create_dir_all(p.join("lib")).await.unwrap(); + } + // A non-version sibling dir under the id (should be ignored, not + // emitted as `@tools`). + tokio::fs::create_dir_all( + dir.path() + .join("newtonsoft.json") + .join("tools") + .join("lib"), + ) + .await + .unwrap(); + + let crawler = NuGetCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: Some(dir.path().to_path_buf()), + batch_size: 100, + }; + + let mut purls: Vec = crawler + .crawl_all(&options) + .await + .iter() + .map(|p| p.purl.clone()) + .collect(); + purls.sort_unstable(); + assert_eq!( + purls, + vec![ + "pkg:nuget/newtonsoft.json@13.0.1".to_string(), + "pkg:nuget/newtonsoft.json@13.0.3".to_string(), + ], + "both versions discovered, non-version sibling ignored" + ); + } + #[tokio::test] async fn test_nuget_home_env_var() { // Test that NUGET_PACKAGES env var is respected diff --git a/crates/socket-patch-core/src/crawlers/pkg_managers.rs b/crates/socket-patch-core/src/crawlers/pkg_managers.rs index 042faae..76e9e47 100644 --- a/crates/socket-patch-core/src/crawlers/pkg_managers.rs +++ b/crates/socket-patch-core/src/crawlers/pkg_managers.rs @@ -37,8 +37,9 @@ pub enum NpmPkgManager { /// yarn classic — `yarn.lock` present, real `node_modules/`, no /// PnP loader. Behaves like npm at the FS level. YarnClassic, - /// yarn-berry with Plug'n'Play (`.pnp.cjs` present). Packages - /// live inside `.yarn/cache/*.zip`. Apply must refuse. + /// yarn-berry with Plug'n'Play (`.pnp.cjs`, `.pnp.js`, or + /// `.pnp.loader.mjs` present). Packages live inside + /// `.yarn/cache/*.zip`. Apply must refuse. YarnBerryPnP, /// bun-managed project — `bun.lock` (text, current default) or /// `bun.lockb` (binary, legacy) at the project root. Bun @@ -58,7 +59,7 @@ pub enum NpmPkgManager { /// /// Precedence (first match wins): /// -/// 1. `.pnp.cjs` or `.pnp.loader.mjs` → yarn-berry PnP. +/// 1. `.pnp.cjs`, `.pnp.js`, or `.pnp.loader.mjs` → yarn-berry PnP. /// 2. `bun.lock` or `bun.lockb` (+ `node_modules/`) → bun. /// 3. `node_modules/.modules.yaml` or `node_modules/.pnpm/` → pnpm. /// 4. `yarn.lock` (without PnP markers) + `node_modules/` → yarn classic. @@ -71,8 +72,17 @@ pub enum NpmPkgManager { /// lockfile filename disambiguates cleanly. pub fn detect_npm_pkg_manager(project_root: &Path) -> NpmPkgManager { // 1. yarn-berry PnP — highest priority because it determines - // whether the npm crawler can find anything at all. - if project_root.join(".pnp.cjs").is_file() || project_root.join(".pnp.loader.mjs").is_file() { + // whether the npm crawler can find anything at all. Yarn 3+ + // emits `.pnp.cjs`; Yarn 2.x emitted `.pnp.js` (renamed to + // `.cjs` in 3.0 to dodge `"type": "module"` resolution); newer + // installs may also ship the ESM `.pnp.loader.mjs`. All three + // mean "packages aren't on disk" — refuse rather than silently + // fall through to Unknown (a Yarn 2 PnP tree has no + // `node_modules/`, so it would otherwise escape the refusal). + if project_root.join(".pnp.cjs").is_file() + || project_root.join(".pnp.js").is_file() + || project_root.join(".pnp.loader.mjs").is_file() + { return NpmPkgManager::YarnBerryPnP; } @@ -327,6 +337,48 @@ mod tests { ); } + /// Yarn 2.x (berry) emitted the PnP loader as `.pnp.js` — Yarn 3.0 + /// renamed it to `.pnp.cjs`. A Yarn 2 PnP tree has no + /// `node_modules/` on disk, so if `.pnp.js` isn't recognized the + /// project escapes the safety-critical refusal and silently + /// classifies as Unknown. Pin the legacy marker so the refusal + /// fires for Yarn 2 installs too. + #[test] + fn yarn_berry_pnp_via_legacy_pnp_js() { + let d = tempfile::tempdir().unwrap(); + std::fs::write(d.path().join(".pnp.js"), "").unwrap(); + assert_eq!( + detect_npm_pkg_manager(d.path()), + NpmPkgManager::YarnBerryPnP + ); + } + + /// The legacy `.pnp.js` marker must outrank bun as well — same + /// structural override as `.pnp.cjs`/`.pnp.loader.mjs`: packages + /// aren't on disk, so refuse regardless of a stray lockfile or an + /// installed `node_modules/`. + #[test] + fn yarn_berry_legacy_pnp_js_priority_over_bun() { + let d = tempfile::tempdir().unwrap(); + std::fs::write(d.path().join(".pnp.js"), "").unwrap(); + std::fs::write(d.path().join("bun.lock"), "").unwrap(); + std::fs::create_dir_all(d.path().join("node_modules")).unwrap(); + assert_eq!( + detect_npm_pkg_manager(d.path()), + NpmPkgManager::YarnBerryPnP + ); + } + + /// Robustness: `.pnp.js` as a *directory* (not a regular file) must + /// not trip the PnP branch — the check is `.is_file()`. With no + /// other markers it falls through to Unknown. + #[test] + fn pnp_js_as_dir_does_not_trigger_pnp() { + let d = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(d.path().join(".pnp.js")).unwrap(); + assert_eq!(detect_npm_pkg_manager(d.path()), NpmPkgManager::Unknown); + } + /// Layout assumption: detection is *install*-based, not /// lockfile-based, for npm. A lone `package-lock.json` with no /// installed `node_modules/` is a fresh checkout — there's nothing diff --git a/crates/socket-patch-core/src/crawlers/python_crawler.rs b/crates/socket-patch-core/src/crawlers/python_crawler.rs index c0ae64c..c8cb1a5 100644 --- a/crates/socket-patch-core/src/crawlers/python_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/python_crawler.rs @@ -148,7 +148,12 @@ fn parse_dist_info_dir_name(dir_name: &str) -> Option<(String, String)> { /// Find directories matching a path pattern with wildcard segments. /// /// Supported wildcards: -/// - `"python3.*"` — matches directory entries starting with `python3.` +/// - `"python3.*"` — matches the minor-versioned interpreter dirs +/// (`python3.11`, `python3.12`, …) AND the bare `python3` dir. +/// The bare form is what Debian/Ubuntu use for apt-installed system +/// modules (`/usr/lib/python3/dist-packages`); a `python3.`-prefix +/// test (requiring the dot) would silently skip it, hiding every +/// distro-packaged module from a crawler whose job is to patch them. /// - `"*"` — matches any directory entry /// /// All other segments are treated as literal path components. @@ -170,14 +175,17 @@ pub async fn find_python_dirs(base_path: &Path, segments: &[&str]) -> Vec Vec { .or_else(|_| std::env::var("USERPROFILE")) .unwrap_or_else(|_| "~".to_string()); - // Helper closure to scan base/lib/python3.*/[dist|site]-packages + // Helper closure to scan base/{lib,lib64}/python3.*/[dist|site]-packages. + // `lib64` is the multilib dir on RHEL/Fedora/SUSE where compiled + // (C-extension) packages land — pure-Python ones go to `lib`, so both + // hold real, distinct packages. Scanning only `lib` would miss every + // native package on those distros. async fn scan_well_known( base: &Path, pkg_type: &str, seen: &mut HashSet, results: &mut Vec, ) { - let matches = find_python_dirs(base, &["lib", "python3.*", pkg_type]).await; + let mut matches = find_python_dirs(base, &["lib", "python3.*", pkg_type]).await; + matches.extend(find_python_dirs(base, &["lib64", "python3.*", pkg_type]).await); for m in matches { let resolved = if m.is_absolute() { m @@ -991,6 +1004,74 @@ mod tests { assert!(buggy.is_empty()); } + /// Debian/Ubuntu apt-installed modules live in the BARE `python3` + /// interpreter dir (`/usr/lib/python3/dist-packages`), not a + /// minor-versioned one. The `python3.*` segment must match it; a + /// `python3.`-prefix test (requiring the dot) silently hid every + /// distro-packaged module — exactly the bug this guards. + #[tokio::test] + async fn test_find_python_dirs_matches_bare_python3() { + let dir = tempfile::tempdir().unwrap(); + let dist = dir + .path() + .join("lib") + .join("python3") + .join("dist-packages"); + tokio::fs::create_dir_all(&dist).await.unwrap(); + + let results = find_python_dirs(dir.path(), &["lib", "python3.*", "dist-packages"]).await; + assert_eq!(results, vec![dist]); + } + + /// The bare-`python3` arm must be an EXACT match, not a loose prefix: + /// `python3` and `python3.12` are interpreters, but `python3foo` / + /// `python311` are not and must be ignored so the `python3.*` segment + /// never over-matches an unrelated directory. + #[tokio::test] + async fn test_find_python_dirs_bare_python3_exact_not_prefix() { + let dir = tempfile::tempdir().unwrap(); + let lib = dir.path().join("lib"); + for v in ["python3", "python3.12", "python3foo", "python311"] { + tokio::fs::create_dir_all(lib.join(v).join("site-packages")) + .await + .unwrap(); + } + + let results = find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await; + let mut got: Vec = results + .iter() + .map(|p| { + p.parent() + .unwrap() + .file_name() + .unwrap() + .to_string_lossy() + .into_owned() + }) + .collect(); + got.sort(); + // Only the real interpreter dirs — `python3` and `python3.12`. + assert_eq!(got, vec!["python3", "python3.12"]); + } + + /// `lib` and `lib64` coexist on RHEL/Fedora/SUSE and hold distinct + /// packages (pure-Python vs compiled). `scan_well_known` scans both; + /// the `lib64` segment is a plain literal, so this proves the matcher + /// reaches a `lib64/python3.X/site-packages` tree at all. + #[tokio::test] + async fn test_find_python_dirs_lib64_layout() { + let dir = tempfile::tempdir().unwrap(); + let sp = dir + .path() + .join("lib64") + .join("python3.11") + .join("site-packages"); + tokio::fs::create_dir_all(&sp).await.unwrap(); + + let results = find_python_dirs(dir.path(), &["lib64", "python3.*", "site-packages"]).await; + assert_eq!(results, vec![sp]); + } + #[tokio::test] async fn test_find_python_dirs_literal() { let dir = tempfile::tempdir().unwrap(); @@ -1171,3 +1252,4 @@ mod tests { assert!(!result.contains_key("pkg:pypi/flask@3.0.0")); } } + diff --git a/crates/socket-patch-core/src/crawlers/ruby_crawler.rs b/crates/socket-patch-core/src/crawlers/ruby_crawler.rs index d8d4397..1d14762 100644 --- a/crates/socket-patch-core/src/crawlers/ruby_crawler.rs +++ b/crates/socket-patch-core/src/crawlers/ruby_crawler.rs @@ -750,6 +750,93 @@ mod tests { assert_eq!(purls, HashSet::from(["pkg:gem/rails@7.1.0"])); } + /// A requested version that is *longer* than what is installed must + /// not resolve. The prefix scan keys on `--`, so a + /// requested `1.0.0` must reject both a plain `foo-1.0/` and a + /// platform `foo-1.0-x86_64-linux/` (installed version `1.0`). Guards + /// against a future change that compares versions bidirectionally. + #[tokio::test] + async fn find_by_purls_rejects_longer_requested_version() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::create_dir_all(dir.path().join("foo-1.0").join("lib")) + .await + .unwrap(); + tokio::fs::create_dir_all(dir.path().join("foo-1.0-x86_64-linux").join("lib")) + .await + .unwrap(); + let crawler = RubyCrawler::new(); + let result = crawler + .find_by_purls(dir.path(), &["pkg:gem/foo@1.0.0".to_string()]) + .await + .unwrap(); + assert!( + result.is_empty(), + "1.0.0 must not match installed 1.0 dirs: {result:?}" + ); + } + + /// The exact-match arm of `locate_gem_dir` must *verify gem content*, + /// not merely accept that `-/` exists on disk. When the + /// exact dir is present but empty (no `lib/`, no `.gemspec` — a + /// malformed/partial install), resolution must fall through to a valid + /// platform sibling rather than returning the hollow exact dir. + #[tokio::test] + async fn locate_gem_dir_skips_invalid_exact_for_valid_platform() { + let dir = tempfile::tempdir().unwrap(); + // Exact dir exists but is hollow — not a real gem. + tokio::fs::create_dir_all(dir.path().join("nokogiri-1.16.5")) + .await + .unwrap(); + // Valid platform sibling. + let plat = dir.path().join("nokogiri-1.16.5-x86_64-linux"); + tokio::fs::create_dir_all(plat.join("lib")).await.unwrap(); + + let crawler = RubyCrawler::new(); + let result = crawler + .find_by_purls(dir.path(), &["pkg:gem/nokogiri@1.16.5".to_string()]) + .await + .unwrap(); + assert_eq!(result.len(), 1); + assert_eq!(result.get("pkg:gem/nokogiri@1.16.5").unwrap().path, plat); + } + + /// `parse_gem_env_output` is the pure parser for `gem env ` + /// stdout: empty/whitespace-only input yields `None` (gem absent or no + /// path), and surrounding whitespace/newlines are trimmed off a real + /// path so it joins cleanly with `gems/`. + #[test] + fn parse_gem_env_output_contract() { + assert_eq!(parse_gem_env_output(""), None); + assert_eq!(parse_gem_env_output(" \n\t "), None); + assert_eq!( + parse_gem_env_output(" /usr/lib/ruby/gems/3.2.0\n"), + Some("/usr/lib/ruby/gems/3.2.0".to_string()) + ); + } + + /// Local mode must not walk the global gem store for a non-Ruby + /// project: with no `vendor/bundle/ruby/` and neither `Gemfile` nor + /// `Gemfile.lock` present, `get_gem_paths` returns empty (it never even + /// shells out to `gem env`). This pins the project-detection gate that + /// keeps a JS/Python checkout from being scanned as Ruby. + #[tokio::test] + async fn get_gem_paths_empty_for_non_ruby_project() { + let dir = tempfile::tempdir().unwrap(); + // A decoy non-Ruby file; no Gemfile, no vendor/bundle/ruby. + tokio::fs::write(dir.path().join("package.json"), b"{}") + .await + .unwrap(); + let crawler = RubyCrawler::new(); + let options = CrawlerOptions { + cwd: dir.path().to_path_buf(), + global: false, + global_prefix: None, + batch_size: 100, + }; + let paths = crawler.get_gem_paths(&options).await.unwrap(); + assert!(paths.is_empty(), "non-Ruby project must yield no gem paths: {paths:?}"); + } + /// Gem names with embedded underscores/digits and multi-dash names /// must keep their full name; the version starts at the first /// dash-then-digit boundary. diff --git a/crates/socket-patch-core/src/crawlers/types.rs b/crates/socket-patch-core/src/crawlers/types.rs index 08e904a..acf3572 100644 --- a/crates/socket-patch-core/src/crawlers/types.rs +++ b/crates/socket-patch-core/src/crawlers/types.rs @@ -544,6 +544,33 @@ mod tests { ); } + /// Every enabled ecosystem must have a *unique* `cli_name`: the + /// `--ecosystems` flag parses these tokens, so two ecosystems sharing + /// one token would make the flag ambiguous and silently route or drop + /// packages. A copy-paste in the `cli_name` match arm is exactly the + /// kind of regression this guards. + #[test] + fn test_all_cli_names_unique() { + let mut seen = std::collections::HashSet::new(); + for eco in Ecosystem::all() { + assert!( + seen.insert(eco.cli_name()), + "duplicate cli_name {:?}", + eco.cli_name() + ); + } + } + + /// `all()` is a hand-maintained list parallel to the enum; an accidental + /// duplicate entry would inflate counts and double-crawl. Pin uniqueness. + #[test] + fn test_all_has_no_duplicate_variants() { + let mut seen = std::collections::HashSet::new(); + for eco in Ecosystem::all() { + assert!(seen.insert(*eco), "duplicate variant {:?} in all()", eco); + } + } + /// The documented default batch size is 100. A regression to 0 would /// reintroduce the batch-size-0 division/panic class of bug seen in /// the scan path, so pin the contract here at the source of truth. diff --git a/crates/socket-patch-core/src/gem_setup/mod.rs b/crates/socket-patch-core/src/gem_setup/mod.rs index bfb07cd..bd33d0a 100644 --- a/crates/socket-patch-core/src/gem_setup/mod.rs +++ b/crates/socket-patch-core/src/gem_setup/mod.rs @@ -345,4 +345,86 @@ mod tests { assert_eq!(r.status, GemSetupStatus::AlreadyConfigured); assert!(plugins_rb_path(root).exists(), "user file must be left alone"); } + + #[tokio::test] + async fn test_add_plugin_files_writes_each_template_to_its_own_path() { + // Guards the path↔content mapping: plugins.rb must get PLUGINS_RB and the + // gemspec must get GEMSPEC (not swapped). A swap would leave each file + // failing its own `starts_with(GENERATED_MARKER)` content expectations. + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + add_plugin_files(root, false).await; + assert_eq!( + fs::read_to_string(plugins_rb_path(root)).await.unwrap(), + PLUGINS_RB, + "plugins.rb must receive the plugins.rb template" + ); + assert_eq!( + fs::read_to_string(gemspec_path(root)).await.unwrap(), + GEMSPEC, + "gemspec must receive the gemspec template" + ); + } + + #[tokio::test] + async fn test_add_plugin_files_rewrites_stale_content() { + // A drifted (hand-edited or older-version) plugins.rb must be re-synced to + // the current template, and the call must report `Updated` — not silently + // accept the stale bytes as already-configured. + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write(&plugins_rb_path(root), "# Code generated by stale\nold body\n").await; + write(&gemspec_path(root), GEMSPEC).await; + let r = add_plugin_files(root, false).await; + assert_eq!(r.status, GemSetupStatus::Updated, "stale plugins.rb is re-synced"); + assert_eq!( + fs::read_to_string(plugins_rb_path(root)).await.unwrap(), + PLUGINS_RB + ); + } + + #[tokio::test] + async fn test_add_plugin_files_syncs_only_the_drifted_file() { + // plugins.rb already matches; only the gemspec drifted. The call rewrites + // the gemspec, reports Updated, and leaves the matching plugins.rb intact. + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write(&plugins_rb_path(root), PLUGINS_RB).await; + write(&gemspec_path(root), "# drifted gemspec\n").await; + let r = add_plugin_files(root, false).await; + assert_eq!(r.status, GemSetupStatus::Updated); + assert_eq!(fs::read_to_string(gemspec_path(root)).await.unwrap(), GEMSPEC); + assert_eq!( + fs::read_to_string(plugins_rb_path(root)).await.unwrap(), + PLUGINS_RB + ); + } + + #[tokio::test] + async fn test_remove_plugin_files_dry_run_keeps_files() { + // Dry-run remove reports the change but must not delete anything. + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + add_plugin_files(root, false).await; + let r = remove_plugin_files(root, true).await; + assert_eq!(r.status, GemSetupStatus::Updated, "dry-run reports the removal"); + assert!( + plugin_files_present(root).await, + "dry-run remove must not delete the plugin files" + ); + } + + #[tokio::test] + async fn test_plugin_files_present_requires_both() { + // The "configured" signal must demand BOTH files, not either one. + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write(&plugins_rb_path(root), PLUGINS_RB).await; + assert!( + !plugin_files_present(root).await, + "plugins.rb alone is not 'configured' — the gemspec is required too" + ); + write(&gemspec_path(root), GEMSPEC).await; + assert!(plugin_files_present(root).await); + } } diff --git a/crates/socket-patch-core/src/gem_setup/update.rs b/crates/socket-patch-core/src/gem_setup/update.rs index ee35688..b186e49 100644 --- a/crates/socket-patch-core/src/gem_setup/update.rs +++ b/crates/socket-patch-core/src/gem_setup/update.rs @@ -261,6 +261,37 @@ mod tests { ); } + #[test] + fn test_remove_preserves_user_gems_added_below_the_block() { + // Real-world flow: setup appends the block, then the user adds more + // gems AFTER it. `remove` must excise exactly our "\n" and leave + // the user's later additions intact with clean formatting — never strip + // a user line or glue two lines together. + let added = gemfile_add(GEMFILE).unwrap(); + let user_edited = format!("{added}gem 'extra', '2.0'\n"); + assert!(is_plugin_directive_present(&user_edited)); + assert_eq!( + gemfile_remove(&user_edited).unwrap(), + format!("{GEMFILE}gem 'extra', '2.0'\n"), + "only our block is removed; the user's later gems survive verbatim" + ); + } + + #[test] + fn test_round_trips_crlf_content_byte_for_byte() { + // A Windows-authored Gemfile uses CRLF line endings. add appends an + // LF-delimited block; remove must still restore the original CRLF bytes + // exactly (the separator/block we strip is our own LF, not the user's). + let crlf = "source 'https://rubygems.org'\r\ngem 'colorize', '1.1.0'\r\n"; + let added = gemfile_add(crlf).unwrap(); + assert!(is_plugin_directive_present(&added)); + assert_eq!( + gemfile_remove(&added).unwrap(), + crlf, + "CRLF user content restored byte-for-byte" + ); + } + #[test] fn test_closing_marker_alone_is_not_detected_as_present() { // The "<<<" closing line must not satisfy the ">>>" opening marker. diff --git a/crates/socket-patch-core/src/go_setup/mod.rs b/crates/socket-patch-core/src/go_setup/mod.rs index 2a3d992..f57774f 100644 --- a/crates/socket-patch-core/src/go_setup/mod.rs +++ b/crates/socket-patch-core/src/go_setup/mod.rs @@ -272,6 +272,7 @@ fn find_main_dirs_inner<'a>( } else if ft.is_file() && name.ends_with(".go") && !name.ends_with("_test.go") + && !is_skipped_go_file(&name) && file_is_package_main(&entry.path()).await { has_main = true; @@ -298,6 +299,16 @@ fn is_skipped_dir(name: &str) -> bool { || name.starts_with('_') } +/// True if the go tool itself ignores this `.go` file by name. Files whose names +/// begin with `.` or `_` are excluded from the build by `go/build` (the same +/// convention that hides `.`/`_`-prefixed directories). A `_gen.go` declaring +/// `package main` inside a library package must NOT make that dir look like a +/// main dir — doing so would drop a conflicting `package main` import file and +/// break the build (the file-level twin of the `//go:build ignore` guard). +fn is_skipped_go_file(name: &str) -> bool { + name.starts_with('.') || name.starts_with('_') +} + /// True if a `.go` file's package clause is `package main` AND the file is not /// excluded from the build by an `ignore` build constraint. The `ignore` tag is /// the conventional marker for files the toolchain never compiles (e.g. `go run @@ -308,7 +319,12 @@ async fn file_is_package_main(path: &Path) -> bool { let Ok(content) = fs::read_to_string(path).await else { return false; }; - if has_ignore_build_tag(&content) { + // Go permits a leading UTF-8 BOM (U+FEFF) as the first code point of a source + // file. Strip it before parsing, or the package clause would read as the + // token `"\u{feff}package"` and a real `main` package would be missed — a + // fail-open: the dir gets no guard import. + let content = content.strip_prefix('\u{feff}').unwrap_or(&content); + if has_ignore_build_tag(content) { return false; } // The package clause is the first non-blank, non-comment line. Strip BOTH @@ -318,7 +334,7 @@ async fn file_is_package_main(path: &Path) -> bool { // `package main` import file into a non-main dir and break the build with // two conflicting package clauses. We also must stop AT the package clause, // not scan the whole file, for the same reason. - let cleaned = strip_go_comments(&content); + let cleaned = strip_go_comments(content); for line in cleaned.lines() { let t = line.trim(); if t.is_empty() { @@ -567,6 +583,63 @@ mod tests { ); } + #[tokio::test] + async fn test_underscore_and_dot_prefixed_main_file_is_not_a_main_dir() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write(&root.join("go.mod"), "module example.com/app\n\ngo 1.21\n").await; + // A real library package dir that also holds `_`/`.`-prefixed files that + // declare `package main`. The go tool IGNORES files whose names begin + // with `.` or `_`, so this dir's package is `lib`, NOT main — we must not + // drop a `package main` import file here (it would conflict and break the + // build). Twin of the `//go:build ignore` exclusion. + write(&root.join("pkg/lib.go"), "package lib\n").await; + write( + &root.join("pkg/_gen.go"), + "package main\n\nfunc main() {}\n", + ) + .await; + write( + &root.join("pkg/.hidden.go"), + "package main\n\nfunc main() {}\n", + ) + .await; + + let dirs = find_main_package_dirs(root).await; + assert!( + !dirs.contains(&root.join("pkg")), + "`_`/`.`-prefixed main file must not make pkg/ a main dir: {dirs:?}" + ); + } + + #[tokio::test] + async fn test_bom_prefixed_main_file_is_detected() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + write(&root.join("go.mod"), "module example.com/app\n\ngo 1.21\n").await; + // Go permits a leading UTF-8 BOM. A real `package main` file that starts + // with one must still be detected — a false negative is fail-open (the + // guard import is never wired for this binary). + write( + &root.join("cmd/app/main.go"), + "\u{feff}package main\n\nfunc main() {}\n", + ) + .await; + let dirs = find_main_package_dirs(root).await; + assert!( + dirs.contains(&root.join("cmd/app")), + "BOM-prefixed `package main` must still be detected: {dirs:?}" + ); + } + + #[test] + fn test_is_skipped_go_file() { + assert!(is_skipped_go_file("_gen.go")); + assert!(is_skipped_go_file(".hidden.go")); + assert!(!is_skipped_go_file("main.go")); + assert!(!is_skipped_go_file("gen_main.go")); // underscore not at start + } + #[tokio::test] async fn test_block_comment_package_main_is_not_a_main_dir() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/hash/git_sha256.rs b/crates/socket-patch-core/src/hash/git_sha256.rs index 73d6646..0604820 100644 --- a/crates/socket-patch-core/src/hash/git_sha256.rs +++ b/crates/socket-patch-core/src/hash/git_sha256.rs @@ -24,6 +24,10 @@ pub fn compute_git_sha256_from_bytes(data: &[u8]) -> String { /// would correspond to no real Git object. Rather than silently return a /// corrupt hash, this function reports an [`io::Error`] when the byte count /// disagrees with `size`. +/// +/// To avoid draining an arbitrarily large (or slow/unbounded) stream once the +/// hash is already known to be invalid, the loop bails out as soon as the bytes +/// read exceed `size`; it does not keep reading just to report a larger total. pub async fn compute_git_sha256_from_reader( size: u64, mut reader: R, @@ -41,6 +45,17 @@ pub async fn compute_git_sha256_from_reader( } hasher.update(&buf[..n]); total += n as u64; + if total > size { + // The stream already yielded more bytes than declared, so the hash + // can never match a real Git object. Stop now rather than draining + // the (possibly unbounded) remainder just to report a bigger total. + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "git sha256: declared size {size} is smaller than the stream (read at least {total} bytes)" + ), + )); + } } if total != size { @@ -223,6 +238,51 @@ mod tests { assert_eq!(sync_hash, async_hash); } + /// An effectively endless reader that records how many bytes it has served. + /// Used to prove that the over-size path does not drain the whole stream + /// once it knows the declared size is already exceeded. + struct EndlessReader { + served: std::sync::Arc, + } + + impl tokio::io::AsyncRead for EndlessReader { + fn poll_read( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> std::task::Poll> { + let n = buf.remaining(); + // Fill the buffer with arbitrary non-EOF data. + let chunk = vec![0xABu8; n]; + buf.put_slice(&chunk); + self.served + .fetch_add(n as u64, std::sync::atomic::Ordering::SeqCst); + std::task::Poll::Ready(Ok(())) + } + } + + /// With a tiny declared size against an endless stream, the loop must error + /// out promptly rather than reading without bound. We allow it to overshoot + /// by at most one internal buffer (8192 bytes) before noticing. + #[tokio::test] + async fn test_async_reader_oversize_bails_without_draining() { + let served = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0)); + let reader = EndlessReader { + served: served.clone(), + }; + + let result = compute_git_sha256_from_reader(10, reader).await; + let err = result.expect_err("endless stream vs tiny size must error"); + assert_eq!(err.kind(), io::ErrorKind::InvalidData); + + // It must have stopped after detecting the overshoot, not kept reading. + let total_served = served.load(std::sync::atomic::Ordering::SeqCst); + assert!( + total_served <= 8192, + "reader was drained for {total_served} bytes; should bail within one buffer" + ); + } + /// A zero-length stream with a correctly-declared size of 0 must hash to /// the canonical Git empty-blob id, matching the byte-slice path. #[tokio::test] @@ -231,4 +291,42 @@ mod tests { let async_hash = compute_git_sha256_from_reader(0, cursor).await.unwrap(); assert_eq!(async_hash, compute_git_sha256_from_bytes(b"")); } + + /// Pin the *reader* path directly to real Git SHA256 output rather than + /// only transitively via `compute_git_sha256_from_bytes`. A regression in + /// how the reader builds its `blob \0` header (wrong keyword, missing + /// NUL, size off-by-one) would slip past the reader-vs-bytes equality tests + /// if the bytes path regressed identically; this anchors it independently. + #[tokio::test] + async fn test_async_reader_known_answer_vectors() { + // `printf 'blob 0\0' | shasum -a 256` + let empty = tokio::io::BufReader::new(&b""[..]); + assert_eq!( + compute_git_sha256_from_reader(0, empty).await.unwrap(), + "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813", + ); + // `printf 'blob 13\0Hello, World!' | shasum -a 256` + let body = b"Hello, World!"; + let cursor = tokio::io::BufReader::new(&body[..]); + assert_eq!( + compute_git_sha256_from_reader(body.len() as u64, cursor) + .await + .unwrap(), + "e118a058f018dda253bb692320c940091b15e4f19067e12fff110606a111f5da", + ); + } + + /// The error path must trigger on the *first* over-size byte: a stream that + /// yields exactly `size` bytes and then one more must be rejected, not + /// accepted on a boundary. Guards the strict `>` (vs `>=`) comparison and + /// the placement of the check after the total bookkeeping. + #[tokio::test] + async fn test_async_reader_one_byte_over_errors() { + let content = b"exactly-this-many-bytes"; + let cursor = tokio::io::BufReader::new(&content[..]); + // Declare one fewer byte than the stream actually holds. + let result = compute_git_sha256_from_reader(content.len() as u64 - 1, cursor).await; + let err = result.expect_err("one byte over declared size must error"); + assert_eq!(err.kind(), io::ErrorKind::InvalidData); + } } diff --git a/crates/socket-patch-core/src/manifest/operations.rs b/crates/socket-patch-core/src/manifest/operations.rs index f7be299..d7381db 100644 --- a/crates/socket-patch-core/src/manifest/operations.rs +++ b/crates/socket-patch-core/src/manifest/operations.rs @@ -81,13 +81,65 @@ pub async fn read_manifest( } /// Write a manifest to the filesystem with pretty-printed JSON. +/// +/// The write is atomic: the JSON is staged in a sibling temp file, fsync'd, +/// then renamed over `path`. A bare `tokio::fs::write` would truncate the +/// existing manifest up front and stream the bytes in place, so a crash (or +/// ENOSPC) mid-write leaves a half-written file on disk. That matters here +/// because [`read_manifest`] treats malformed JSON as a hard `InvalidData` +/// error -- a torn manifest would brick every subsequent command +/// (apply/list/remove/rollback/repair) rather than degrading gracefully. +/// Staging + rename guarantees readers only ever observe the old or the new +/// manifest, never a partial one. pub async fn write_manifest( path: impl AsRef, manifest: &PatchManifest, ) -> Result<(), std::io::Error> { + let path = path.as_ref(); let content = serde_json::to_string_pretty(manifest) .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; - tokio::fs::write(path, content).await + + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let stem = path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "manifest.json".to_string()); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); + + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&stage) + .await?; + + use tokio::io::AsyncWriteExt; + if let Err(e) = file.write_all(content.as_bytes()).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + if let Err(e) = file.sync_all().await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + drop(file); + + if let Err(e) = tokio::fs::rename(&stage, path).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + + // Durability: `sync_all` flushed the file's data, but the rename only + // updated the parent directory entry. fsync the directory so the rename + // itself survives a crash. Unix only; best-effort, since a directory we + // can't open for fsync must not fail an otherwise-successful write. + #[cfg(unix)] + { + if let Ok(dir) = tokio::fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + + Ok(()) } #[cfg(test)] @@ -372,6 +424,72 @@ mod tests { assert_eq!(read_back.patches.len(), 2); } + // Regression: write_manifest must be atomic -- it stages a temp file and + // renames it over the target. After a successful write, no `.socket-stage-*` + // litter may remain in the directory (a leaked stage file would accumulate + // and could be mistaken for a manifest by directory walkers). + #[tokio::test] + async fn test_write_manifest_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("manifest.json"); + + let manifest = create_test_manifest(); + write_manifest(&path, &manifest).await.unwrap(); + // Overwrite a second time to exercise the rename-over-existing path. + write_manifest(&path, &manifest).await.unwrap(); + + let mut entries = tokio::fs::read_dir(dir.path()).await.unwrap(); + while let Some(entry) = entries.next_entry().await.unwrap() { + let name = entry.file_name(); + let name = name.to_string_lossy(); + assert!( + !name.starts_with(".socket-stage-"), + "atomic write must not leave a staging file behind, found {name}" + ); + } + // Final file must be a single, fully-readable manifest. + assert_eq!(read_manifest(&path).await.unwrap().unwrap(), manifest); + } + + // Regression: a failed write_manifest must NOT clobber an existing, valid + // manifest. Because the new content is staged in a temp file and only + // rename()d over the target on success, a write that fails before the + // rename (here: the target's parent directory does not exist, so even + // staging fails) leaves any prior manifest untouched. This is the property + // that prevents a half-written manifest from bricking later commands. + #[tokio::test] + async fn test_write_manifest_failure_preserves_existing() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("manifest.json"); + + // Establish a valid, on-disk manifest. + let original = create_test_manifest(); + write_manifest(&path, &original).await.unwrap(); + + // A write that fails before the rename: target's parent dir is missing, + // so staging the temp file (create_new in the missing parent) errors. + let bad = dir.path().join("does-not-exist").join("manifest.json"); + let mut other = create_test_manifest(); + other.patches.clear(); // a different payload, so we'd notice a clobber + let result = write_manifest(&bad, &other).await; + assert!(result.is_err(), "writing into a missing dir must fail"); + + // The pre-existing manifest is untouched (atomicity: nothing is mutated + // unless the staged write fully succeeds and renames into place). + assert_eq!(read_manifest(&path).await.unwrap().unwrap(), original); + + // No stage litter leaked into the dir alongside the good manifest. + let mut entries = tokio::fs::read_dir(dir.path()).await.unwrap(); + while let Some(entry) = entries.next_entry().await.unwrap() { + let name = entry.file_name(); + let name = name.to_string_lossy(); + assert!( + !name.starts_with(".socket-stage-"), + "a failed write must not leave stage litter, found {name}" + ); + } + } + #[test] fn test_resolve_manifest_path_relative_joins_cwd() { let cwd = Path::new("/tmp/proj"); diff --git a/crates/socket-patch-core/src/manifest/schema.rs b/crates/socket-patch-core/src/manifest/schema.rs index 97a2234..afb57cd 100644 --- a/crates/socket-patch-core/src/manifest/schema.rs +++ b/crates/socket-patch-core/src/manifest/schema.rs @@ -1,5 +1,21 @@ -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; +use serde::{Deserialize, Serialize, Serializer}; +use std::collections::{BTreeMap, HashMap}; + +/// Serialize a `HashMap` with its keys in sorted order so the emitted JSON is +/// deterministic across runs. The manifest is persisted as `.socket/manifest.json` +/// and committed to git; `HashMap`'s randomized iteration order would otherwise +/// re-shuffle the keys on every write, producing spurious diffs and merge +/// conflicts. This mirrors the `BTreeMap` choice in `vex::schema`, which the +/// project made for the same "easier diffing across runs" reason. The public +/// field type stays `HashMap` (so callers and deserialization are unaffected); +/// only the on-the-wire ordering is pinned. +fn serialize_sorted(map: &HashMap, serializer: S) -> Result +where + S: Serializer, + V: Serialize, +{ + map.iter().collect::>().serialize(serializer) +} /// Information about a vulnerability fixed by a patch. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] @@ -25,8 +41,10 @@ pub struct PatchRecord { pub uuid: String, pub exported_at: String, /// Maps relative file path -> hash info. + #[serde(serialize_with = "serialize_sorted")] pub files: HashMap, /// Maps vulnerability ID (e.g., "GHSA-...") -> vulnerability info. + #[serde(serialize_with = "serialize_sorted")] pub vulnerabilities: HashMap, pub description: String, pub license: String, @@ -58,17 +76,28 @@ impl SetupConfig { } } +/// Whether the optional `setup` block should be omitted from the serialized +/// manifest. It's omitted both when absent (`None`) *and* when present but +/// carrying no state (`Some` of an empty [`SetupConfig`]) — the two are +/// logically identical ("no setup state"), so collapsing them keeps the +/// on-disk `.socket/manifest.json` byte-stable regardless of which in-memory +/// representation produced it. +fn setup_is_absent(setup: &Option) -> bool { + setup.as_ref().map_or(true, SetupConfig::is_empty) +} + /// The top-level patch manifest structure. /// Stored as `.socket/manifest.json`. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct PatchManifest { /// Maps package PURL (e.g., "pkg:npm/lodash@4.17.21") -> patch record. + #[serde(serialize_with = "serialize_sorted")] pub patches: HashMap, /// Optional persisted `setup` state (e.g. excluded workspace members). /// Absent on manifests that predate / don't use it (serde default), and /// omitted from the serialized form when empty so existing manifests are /// byte-stable. - #[serde(default, skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "setup_is_absent")] pub setup: Option, } @@ -344,6 +373,210 @@ mod tests { .is_empty()); } + // ── Regression: deterministic, sorted serialization ── + // + // The manifest is persisted as `.socket/manifest.json` and committed to git. + // The maps are `HashMap`s, whose iteration order is randomized per instance, + // so a naive derive would emit keys in arbitrary order and churn the file on + // every write. `serialize_sorted` pins the keys to sorted order. These tests + // guard that contract (and would fail if the `serialize_with` attribute were + // dropped, surfacing the non-deterministic order). + + // Top-level `patches` keys (PURLs) must be emitted in sorted order, no matter + // what order they were inserted in. + #[test] + fn test_manifest_patches_serialize_in_sorted_order() { + let mk = |uuid: &str| PatchRecord { + uuid: uuid.to_string(), + exported_at: "2024-01-01T00:00:00Z".to_string(), + files: HashMap::new(), + vulnerabilities: HashMap::new(), + description: "d".to_string(), + license: "MIT".to_string(), + tier: "free".to_string(), + }; + + // Insert in deliberately reverse-sorted order. + let mut patches = HashMap::new(); + patches.insert("pkg:npm/zzz@1.0.0".to_string(), mk("u-z")); + patches.insert("pkg:npm/mmm@1.0.0".to_string(), mk("u-m")); + patches.insert("pkg:npm/aaa@1.0.0".to_string(), mk("u-a")); + let manifest = PatchManifest { + patches, + setup: None, + }; + + let json = serde_json::to_string(&manifest).unwrap(); + let a = json.find("pkg:npm/aaa@1.0.0").unwrap(); + let m = json.find("pkg:npm/mmm@1.0.0").unwrap(); + let z = json.find("pkg:npm/zzz@1.0.0").unwrap(); + assert!( + a < m && m < z, + "patches must serialize in sorted key order, got: {json}" + ); + } + + // Serialization must be byte-stable: two distinct HashMaps (which may have + // different internal iteration orders) holding the same logical content must + // produce identical JSON. Re-inserting in a different order proves the output + // doesn't depend on HashMap iteration order. + #[test] + fn test_manifest_serialization_is_byte_stable() { + let mk = |uuid: &str| { + let mut files = HashMap::new(); + files.insert( + "package/z.js".to_string(), + PatchFileInfo { + before_hash: "b1".to_string(), + after_hash: "a1".to_string(), + }, + ); + files.insert( + "package/a.js".to_string(), + PatchFileInfo { + before_hash: "b2".to_string(), + after_hash: "a2".to_string(), + }, + ); + let mut vulns = HashMap::new(); + vulns.insert( + "GHSA-zzzz".to_string(), + VulnerabilityInfo { + cves: vec![], + summary: "s".to_string(), + severity: "low".to_string(), + description: "d".to_string(), + }, + ); + vulns.insert( + "GHSA-aaaa".to_string(), + VulnerabilityInfo { + cves: vec![], + summary: "s".to_string(), + severity: "low".to_string(), + description: "d".to_string(), + }, + ); + PatchRecord { + uuid: uuid.to_string(), + exported_at: "2024-01-01T00:00:00Z".to_string(), + files, + vulnerabilities: vulns, + description: "d".to_string(), + license: "MIT".to_string(), + tier: "free".to_string(), + } + }; + + // Two manifests with the same content but opposite patch-insertion order. + let mut p1 = HashMap::new(); + p1.insert("pkg:npm/aaa@1.0.0".to_string(), mk("u-a")); + p1.insert("pkg:npm/zzz@1.0.0".to_string(), mk("u-z")); + let m1 = PatchManifest { + patches: p1, + setup: None, + }; + + let mut p2 = HashMap::new(); + p2.insert("pkg:npm/zzz@1.0.0".to_string(), mk("u-z")); + p2.insert("pkg:npm/aaa@1.0.0".to_string(), mk("u-a")); + let m2 = PatchManifest { + patches: p2, + setup: None, + }; + + assert_eq!( + serde_json::to_string_pretty(&m1).unwrap(), + serde_json::to_string_pretty(&m2).unwrap(), + "manifest JSON must be byte-stable regardless of HashMap order" + ); + + // And the nested `files` / `vulnerabilities` keys must themselves be sorted. + let json = serde_json::to_string(&m1).unwrap(); + assert!(json.find("package/a.js").unwrap() < json.find("package/z.js").unwrap()); + assert!(json.find("GHSA-aaaa").unwrap() < json.find("GHSA-zzzz").unwrap()); + } + + // ── Regression: the optional `setup` block is omitted when it carries no + // state ── + // + // The field doc promises `setup` is "omitted from the serialized form when + // empty so existing manifests are byte-stable." Before the fix, the skip + // predicate was `Option::is_none`, so a `Some` of an empty `SetupConfig` + // (which a load of `"setup": {}` produces, and which the also-then-dead + // `SetupConfig::is_empty` was written to detect) leaked a spurious + // `"setup":{}` key, breaking that contract. + + // A `Some` of an empty config must serialize byte-identically to `None`: + // no `setup` key at all. + #[test] + fn test_empty_setup_some_serializes_identically_to_none() { + let with_none = PatchManifest { + patches: HashMap::new(), + setup: None, + }; + let with_empty_some = PatchManifest { + patches: HashMap::new(), + setup: Some(SetupConfig::default()), + }; + + let none_json = serde_json::to_string_pretty(&with_none).unwrap(); + let empty_some_json = serde_json::to_string_pretty(&with_empty_some).unwrap(); + + assert!( + !none_json.contains("setup"), + "a None setup must not emit a `setup` key, got: {none_json}" + ); + assert!( + !empty_some_json.contains("setup"), + "a Some(empty) setup must also be omitted (byte-stability), got: {empty_some_json}" + ); + assert_eq!( + none_json, empty_some_json, + "None and Some(empty) setup must serialize byte-identically" + ); + } + + // A manifest deserialized from a literal `"setup": {}` must re-serialize + // without the empty block (the normalization the byte-stability contract + // depends on). + #[test] + fn test_loaded_empty_setup_object_is_dropped_on_reserialize() { + let json = r#"{ "patches": {}, "setup": {} }"#; + let manifest: PatchManifest = serde_json::from_str(json).unwrap(); + // The empty object parses into a (logically empty) config... + assert!(manifest.setup.as_ref().map_or(true, SetupConfig::is_empty)); + // ...but must not survive into the serialized form. + let reserialized = serde_json::to_string(&manifest).unwrap(); + assert!( + !reserialized.contains("setup"), + "an empty `setup` block must be dropped on re-serialize, got: {reserialized}" + ); + } + + // A *non-empty* setup block must still round-trip in full — the fix must + // omit only the empty case, never drop real state. + #[test] + fn test_populated_setup_roundtrips() { + let manifest = PatchManifest { + patches: HashMap::new(), + setup: Some(SetupConfig { + exclude: vec!["crates/member-a".to_string()], + manual: vec!["pypi".to_string()], + }), + }; + let json = serde_json::to_string(&manifest).unwrap(); + assert!(json.contains("\"setup\""), "populated setup must be emitted"); + assert!(json.contains("crates/member-a")); + assert!(json.contains("pypi")); + + let reparsed: PatchManifest = serde_json::from_str(&json).unwrap(); + assert_eq!(manifest, reparsed, "populated setup must round-trip exactly"); + let setup = reparsed.setup.unwrap(); + assert_eq!(setup.exclude, vec!["crates/member-a".to_string()]); + assert_eq!(setup.manual, vec!["pypi".to_string()]); + } + // A manifest missing the top-level `patches` key must be rejected (the TS // schema requires it; `{}` is not a valid manifest). #[test] diff --git a/crates/socket-patch-core/src/package_json/detect.rs b/crates/socket-patch-core/src/package_json/detect.rs index 3719c73..977c82d 100644 --- a/crates/socket-patch-core/src/package_json/detect.rs +++ b/crates/socket-patch-core/src/package_json/detect.rs @@ -174,15 +174,23 @@ pub fn remove_socket_patch_from_script(script: &str) -> (bool, Option) { } let segments: Vec<&str> = trimmed.split(" && ").collect(); + + // `changed` must reflect whether a *socket-patch* segment was removed — not + // whether `kept` is merely shorter than `segments`. Filtering also drops + // empty segments, so keying `changed` off `kept.len() != segments.len()` + // would falsely report a removal for a patch-free script that merely + // contained a stray empty segment (e.g. a double `" && "` separator), + // violating this function's documented `(false, ..)`/`(true, ..)` contract. + let had_patch = segments.iter().any(|s| script_is_configured(s.trim())); + let kept: Vec<&str> = segments .iter() .map(|s| s.trim()) .filter(|s| !s.is_empty() && !script_is_configured(s)) .collect(); - if kept.len() == segments.len() { - // Nothing matched a socket-patch pattern (and no empty segments) — - // unchanged. + if !had_patch { + // No socket-patch pattern present — leave the script as-is. return (false, Some(trimmed.to_string())); } @@ -752,6 +760,47 @@ mod tests { assert_eq!(new, None); } + #[test] + fn test_remove_script_empty_segment_no_patch_is_unchanged() { + // Regression: a patch-free script with a stray empty segment (double + // `" && "`) must report `changed == false`. Keying `changed` off + // `kept.len() != segments.len()` previously returned `(true, ..)` here, + // violating the documented contract — `(true, ..)` means a socket-patch + // segment was removed, which did not happen. + let (changed, new) = remove_socket_patch_from_script("echo a && && echo b"); + assert!(!changed, "no socket-patch present, must not report a removal"); + assert_eq!(new.as_deref(), Some("echo a && && echo b")); + } + + #[test] + fn test_remove_script_patch_in_middle_keeps_siblings() { + let (changed, new) = + remove_socket_patch_from_script("echo a && socket-patch apply && echo b"); + assert!(changed); + assert_eq!(new.as_deref(), Some("echo a && echo b")); + } + + #[test] + fn test_remove_script_multiple_patch_segments() { + // Defensive: more than one socket-patch invocation, all removed. + let (changed, new) = remove_socket_patch_from_script( + "socket-patch apply && build && npx @socketsecurity/socket-patch apply", + ); + assert!(changed); + assert_eq!(new.as_deref(), Some("build")); + } + + #[test] + fn test_remove_script_pnpm_command() { + // The pnpm canonical command must be recognized and stripped (it + // contains the "socket-patch apply" pattern). + let (changed, new) = remove_socket_patch_from_script( + "pnpm dlx @socketsecurity/socket-patch apply --silent --ecosystems npm && echo hi", + ); + assert!(changed); + assert_eq!(new.as_deref(), Some("echo hi")); + } + // ── remove_package_json_object ────────────────────────────────── #[test] @@ -785,6 +834,20 @@ mod tests { assert_eq!(pkg["scripts"]["postinstall"], "echo hi"); } + #[test] + fn test_remove_object_noop_when_empty_segment_no_patch() { + // Regression: a patch-free script whose only oddity is a stray empty + // segment must be a no-op — neither reported modified nor rewritten. + let mut pkg: serde_json::Value = serde_json::json!({ + "name": "test", + "scripts": { "postinstall": "echo a && && echo b" } + }); + let status = remove_package_json_object(&mut pkg); + assert!(!status.modified); + // The original (untouched) value must be preserved, empty segment and all. + assert_eq!(pkg["scripts"]["postinstall"], "echo a && && echo b"); + } + #[test] fn test_remove_object_noop_when_not_configured() { let mut pkg: serde_json::Value = serde_json::json!({ @@ -824,6 +887,23 @@ mod tests { assert!(!modified2); } + #[test] + fn test_remove_content_roundtrip_pnpm() { + // update (pnpm) then remove must fully revert to a no-socket-patch state. + let original = r#"{"name":"x","scripts":{"build":"tsc"}}"#; + let (_, updated, ..) = + update_package_json_content(original, PackageManager::Pnpm).unwrap(); + assert!(updated.contains("pnpm dlx @socketsecurity/socket-patch apply")); + + let (modified, removed, _) = remove_package_json_content(&updated).unwrap(); + assert!(modified); + assert!(!removed.contains("socket-patch")); + let parsed: serde_json::Value = serde_json::from_str(&removed).unwrap(); + assert_eq!(parsed["scripts"]["build"], "tsc"); + assert!(parsed["scripts"].get("postinstall").is_none()); + assert!(parsed["scripts"].get("dependencies").is_none()); + } + #[test] fn test_remove_content_invalid_json_errors() { assert!(remove_package_json_content("not json").is_err()); diff --git a/crates/socket-patch-core/src/package_json/find.rs b/crates/socket-patch-core/src/package_json/find.rs index f1d1f63..fc90f14 100644 --- a/crates/socket-patch-core/src/package_json/find.rs +++ b/crates/socket-patch-core/src/package_json/find.rs @@ -201,6 +201,14 @@ fn parse_yaml_list_value(raw: &str) -> String { } } + // A list item that is *only* a comment (`- # foo`) has no scalar value. + // The inline-comment scan below starts at index 1 (a `#` is a comment only + // when preceded by whitespace), so a leading `#` would otherwise survive as + // a bogus `"# foo"` pattern. Skip it here. + if s.starts_with('#') { + return String::new(); + } + // Unquoted scalar: a `#` preceded by whitespace begins an inline comment. let bytes = s.as_bytes(); let comment_start = @@ -316,11 +324,16 @@ async fn search_one_level(dir: &Path, results: &mut Vec) { }; while let Ok(Some(entry)) = entries.next_entry().await { - let ft = match entry.file_type().await { - Ok(ft) => ft, - Err(_) => continue, - }; - if !ft.is_dir() { + let path = entry.path(); + // A single-level `dir/*` glob follows a symlinked direct member, the + // way npm/pnpm (and our cargo `glob_dir`) resolve a workspace member + // that is itself a symlink. `entry.file_type()` reports the *link's* + // own type — `is_dir() == false` — so it would silently drop such a + // member; stat the path instead so the link is followed. (The + // recursive `**` searchers below deliberately do NOT follow symlinks, + // to avoid loops/escapes — there a symlink's `is_dir() == false` is the + // desired skip.) + if !fs::metadata(&path).await.map(|m| m.is_dir()).unwrap_or(false) { continue; } // A `dir/*` pattern must not pick up node_modules/hidden/output dirs as @@ -328,7 +341,7 @@ async fn search_one_level(dir: &Path, results: &mut Vec) { if is_ignored_dir(&entry.file_name().to_string_lossy()) { continue; } - let pkg_json = entry.path().join("package.json"); + let pkg_json = path.join("package.json"); if fs::metadata(&pkg_json).await.is_ok() { results.push(pkg_json); } @@ -797,6 +810,15 @@ mod tests { ); } + #[test] + fn test_parse_pnpm_comment_only_list_item_skipped() { + // A `- # comment` item is a YAML null (the value is just a comment) and + // must NOT become a literal `"# comment"` workspace pattern. Previously + // the inline-comment scan started at index 1, so a leading `#` survived. + let yaml = "packages:\n - # only a comment\n - real/*"; + assert_eq!(parse_pnpm_workspace_patterns(yaml), vec!["real/*"]); + } + #[test] fn test_parse_pnpm_quoted_value_keeps_hash() { // A `#` inside quotes is part of the value, not a comment. @@ -924,6 +946,76 @@ mod tests { assert_eq!(workspace_count, 1); } + #[cfg(unix)] + #[tokio::test] + async fn test_find_star_glob_follows_symlinked_member() { + // Regression: a single-level `packages/*` glob must follow a workspace + // member that is itself a symlink (npm/pnpm and our cargo `glob_dir` + // both resolve such members). `entry.file_type()` reports the link as a + // non-directory, so the old gate silently dropped it and `setup` never + // patched the package. + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("package.json"), + r#"{"workspaces": ["packages/*"]}"#, + ) + .await + .unwrap(); + // The real member lives outside `packages/`; `packages/a` links to it. + let real = dir.path().join("real"); + fs::create_dir_all(&real).await.unwrap(); + fs::write(real.join("package.json"), r#"{"name":"a"}"#) + .await + .unwrap(); + fs::create_dir_all(dir.path().join("packages")).await.unwrap(); + std::os::unix::fs::symlink(&real, dir.path().join("packages").join("a")).unwrap(); + + let result = find_package_json_files(dir.path()).await; + let workspace_count = result.files.iter().filter(|f| f.is_workspace).count(); + assert_eq!( + workspace_count, 1, + "symlinked workspace member must be discovered: {:?}", + result.files.iter().map(|f| &f.path).collect::>() + ); + } + + #[cfg(unix)] + #[tokio::test] + async fn test_find_double_glob_does_not_follow_symlinks() { + // The asymmetric counterpart: a recursive `apps/**` glob must NOT follow + // symlinks — a loop back to an ancestor would recurse forever and an + // escaping link would let `setup` edit an out-of-tree manifest. Only the + // real on-disk member is discovered. + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("package.json"), + r#"{"workspaces": ["apps/**"]}"#, + ) + .await + .unwrap(); + let real = dir.path().join("apps").join("web"); + fs::create_dir_all(&real).await.unwrap(); + fs::write(real.join("package.json"), r#"{"name":"web"}"#) + .await + .unwrap(); + // A loop symlink back to the repo root and an escape symlink to an + // out-of-tree package — neither must be traversed. + std::os::unix::fs::symlink(dir.path(), dir.path().join("apps").join("loop")).unwrap(); + let outside = tempfile::tempdir().unwrap(); + fs::write(outside.path().join("package.json"), r#"{"name":"escape"}"#) + .await + .unwrap(); + std::os::unix::fs::symlink(outside.path(), dir.path().join("apps").join("escape")).unwrap(); + + let result = find_package_json_files(dir.path()).await; + let workspace_count = result.files.iter().filter(|f| f.is_workspace).count(); + assert_eq!( + workspace_count, 1, + "only the real member must be found; symlinks not followed: {:?}", + result.files.iter().map(|f| &f.path).collect::>() + ); + } + // ── detect_package_manager ────────────────────────────────────── #[tokio::test] diff --git a/crates/socket-patch-core/src/package_json/update.rs b/crates/socket-patch-core/src/package_json/update.rs index f535de1..ac00b96 100644 --- a/crates/socket-patch-core/src/package_json/update.rs +++ b/crates/socket-patch-core/src/package_json/update.rs @@ -6,6 +6,57 @@ use super::detect::{ PackageManager, }; +/// Atomically write `content` to `path`. +/// +/// A bare `fs::write` truncates the target before writing, so a crash, power +/// loss, or interrupted process mid-write would leave the user's +/// `package.json` truncated or empty — destroying the file we only meant to +/// append two scripts to. Instead we write to a sibling stage file, fsync it, +/// then rename over the target (rename is atomic on the same filesystem) so the +/// reader ever sees either the old bytes or the complete new bytes. Mirrors the +/// hardened writer in `manifest/operations.rs`. +async fn atomic_write(path: &Path, content: &str) -> std::io::Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let stem = path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "package.json".to_string()); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); + + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&stage) + .await?; + + use tokio::io::AsyncWriteExt; + if let Err(e) = file.write_all(content.as_bytes()).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + if let Err(e) = file.sync_all().await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + drop(file); + + if let Err(e) = tokio::fs::rename(&stage, path).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + + // The rename only updated the parent directory entry; fsync the directory + // so the rename itself survives a crash. Best-effort, Unix only. + #[cfg(unix)] + { + if let Ok(dir) = tokio::fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + + Ok(()) +} + /// Result of updating a single package.json. #[derive(Debug, Clone)] pub struct UpdateResult { @@ -76,7 +127,7 @@ pub async fn update_package_json( } if !dry_run { - if let Err(e) = fs::write(package_json_path, &new_content).await { + if let Err(e) = atomic_write(package_json_path, &new_content).await { return UpdateResult { path: path_str, status: UpdateStatus::Error, @@ -171,7 +222,7 @@ pub async fn remove_package_json(package_json_path: &Path, dry_run: bool) -> Rem } if !dry_run { - if let Err(e) = fs::write(package_json_path, &new_content).await { + if let Err(e) = atomic_write(package_json_path, &new_content).await { return RemoveResult { path: path_str, status: RemoveStatus::Error, @@ -445,6 +496,72 @@ mod tests { assert_eq!(fs::read_to_string(&pkg).await.unwrap(), original); } + /// After a successful (non-dry-run) write the staged temp file must be + /// renamed into place, never left behind. A leaked `.socket-stage-*` + /// sibling would signal the atomic write didn't complete its rename. + async fn count_stage_litter(dir: &Path) -> usize { + let mut rd = fs::read_dir(dir).await.unwrap(); + let mut n = 0; + while let Some(entry) = rd.next_entry().await.unwrap() { + if entry + .file_name() + .to_string_lossy() + .starts_with(".socket-stage-") + { + n += 1; + } + } + n + } + + #[tokio::test] + async fn test_update_atomic_write_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + fs::write(&pkg, r#"{"name":"x","scripts":{"build":"tsc"}}"#) + .await + .unwrap(); + let result = update_package_json(&pkg, false, PackageManager::Npm).await; + assert_eq!(result.status, UpdateStatus::Updated); + // The write must have gone through stage+rename and cleaned up. + assert_eq!(count_stage_litter(dir.path()).await, 0); + // And produced valid, fully-written JSON (not a truncated stage). + let content = fs::read_to_string(&pkg).await.unwrap(); + let parsed: serde_json::Value = serde_json::from_str(&content).unwrap(); + assert!(parsed["scripts"]["postinstall"].is_string()); + assert!(parsed["scripts"]["dependencies"].is_string()); + } + + #[tokio::test] + async fn test_remove_atomic_write_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + fs::write(&pkg, r#"{"name":"x","scripts":{"build":"tsc"}}"#) + .await + .unwrap(); + update_package_json(&pkg, false, PackageManager::Npm).await; + + let result = remove_package_json(&pkg, false).await; + assert_eq!(result.status, RemoveStatus::Removed); + assert_eq!(count_stage_litter(dir.path()).await, 0); + let content = fs::read_to_string(&pkg).await.unwrap(); + let parsed: serde_json::Value = serde_json::from_str(&content).unwrap(); + assert_eq!(parsed["scripts"]["build"], "tsc"); + assert!(!content.contains("socket-patch")); + } + + /// A dry-run must never create a stage file either — it does no I/O at all. + #[tokio::test] + async fn test_update_dry_run_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + let pkg = dir.path().join("package.json"); + fs::write(&pkg, r#"{"name":"x","scripts":{"build":"tsc"}}"#) + .await + .unwrap(); + update_package_json(&pkg, true, PackageManager::Npm).await; + assert_eq!(count_stage_litter(dir.path()).await, 0); + } + // ── remove_package_json ───────────────────────────────────────── #[tokio::test] diff --git a/crates/socket-patch-core/src/patch/apply.rs b/crates/socket-patch-core/src/patch/apply.rs index 3fd4249..4ff6904 100644 --- a/crates/socket-patch-core/src/patch/apply.rs +++ b/crates/socket-patch-core/src/patch/apply.rs @@ -362,8 +362,23 @@ pub async fn apply_file_patch( let existing_meta = tokio::fs::metadata(&filepath).await.ok(); // Create parent directories if needed (e.g., new files added by a patch). + // + // `create_dir_all` needs write permission on the FIRST existing + // ancestor of `parent` to materialize the missing chain. Go's module + // cache (and some Nix/Bazel layouts) mark package directories + // read-only (0o555), so a patch that adds a file under a not-yet- + // existing subdir would fail here with EACCES — and the + // `DirWriteGuard` below can't help, because it relaxes the immediate + // parent, which does not exist yet. Temporarily grant owner-write on + // the nearest existing ancestor for the duration of the mkdir, then + // restore it exactly. (When `parent` already exists this ancestor IS + // `parent`; the guard relax+restore is then a harmless wash before the + // dedicated `DirWriteGuard` below re-relaxes it for the write.) if let Some(parent) = filepath.parent() { - tokio::fs::create_dir_all(parent).await?; + let mkdir_guard = DirWriteGuard::acquire(nearest_existing_ancestor(parent).await).await; + let mkdir_result = tokio::fs::create_dir_all(parent).await; + mkdir_guard.restore().await; + mkdir_result?; } // The atomic stage+rename below — and the copy-on-write break, which @@ -471,6 +486,23 @@ impl DirWriteGuard { } } +/// Walk up from `path` and return the first ancestor that exists on +/// disk. Used to find the directory whose write bit must be relaxed so +/// `create_dir_all` can materialize a missing subdir chain. Returns +/// `None` only if not even the filesystem root resolves (effectively +/// never), in which case the caller's `DirWriteGuard::acquire(None)` is a +/// no-op and `create_dir_all` proceeds unguarded. +async fn nearest_existing_ancestor(path: &Path) -> Option<&Path> { + let mut cur = Some(path); + while let Some(p) = cur { + if tokio::fs::metadata(p).await.is_ok() { + return Some(p); + } + cur = p.parent(); + } + None +} + /// Write `content` to `target` atomically via stage + rename. /// /// Two-phase commit: @@ -2199,6 +2231,95 @@ mod tests { assert_eq!(on_disk, original); } + /// New file in a NEW subdirectory inside a read-only package + /// directory. Go's module cache marks directories 0o555; a patch that + /// adds a file under a not-yet-existing subdir must still apply. + /// Regression: `create_dir_all` ran before any directory-permission + /// relaxation, so the mkdir failed with EACCES and the patch could not + /// be applied at all. The directory's mode must be restored afterward. + #[cfg(unix)] + #[tokio::test] + async fn test_apply_file_patch_new_file_in_new_subdir_of_readonly_dir() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let patched = b"brand new nested\n"; + let patched_hash = compute_git_sha256_from_bytes(patched); + // Deeply nested: forces create_dir_all to build several levels + // starting from the read-only package root. + tokio::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + apply_file_patch(dir.path(), "a/b/c/new.js", patched, &patched_hash) + .await + .expect("apply must succeed creating a subdir chain in a read-only pkg dir"); + + let path = dir.path().join("a/b/c/new.js"); + assert_eq!(tokio::fs::read(&path).await.unwrap(), patched); + // New file still defaults to read-only. + assert_eq!( + tokio::fs::metadata(&path).await.unwrap().permissions().mode() & 0o7777, + 0o444 + ); + // The pre-existing read-only package root is restored exactly. + assert_eq!( + tokio::fs::metadata(dir.path()).await.unwrap().permissions().mode() & 0o7777, + 0o555, + "package root mode must be restored after the mkdir" + ); + // No stage litter at the root. + let mut entries = tokio::fs::read_dir(dir.path()).await.unwrap(); + while let Some(entry) = entries.next_entry().await.unwrap() { + let name = entry.file_name().to_string_lossy().to_string(); + assert!(!name.starts_with(".socket-stage-"), "stage leaked: {name}"); + } + + // Re-grant write so the TempDir can clean itself up. + tokio::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + } + + /// New file under an EXISTING read-only subdirectory (not the root). + /// The immediate parent already exists and is 0o555; the dedicated + /// `DirWriteGuard` must relax it for the stage+rename and restore it. + #[cfg(unix)] + #[tokio::test] + async fn test_apply_file_patch_new_file_in_existing_readonly_subdir() { + use std::os::unix::fs::PermissionsExt; + let dir = tempfile::tempdir().unwrap(); + let sub = dir.path().join("sub"); + tokio::fs::create_dir_all(&sub).await.unwrap(); + let patched = b"nested\n"; + let patched_hash = compute_git_sha256_from_bytes(patched); + + // Lock the subdir (and root) read-only. + tokio::fs::set_permissions(&sub, std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + tokio::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o555)) + .await + .unwrap(); + + apply_file_patch(dir.path(), "sub/new.js", patched, &patched_hash) + .await + .expect("apply must succeed in an existing read-only subdir"); + + assert_eq!(tokio::fs::read(sub.join("new.js")).await.unwrap(), patched); + assert_eq!( + tokio::fs::metadata(&sub).await.unwrap().permissions().mode() & 0o7777, + 0o555, + "existing subdir mode must be restored" + ); + + tokio::fs::set_permissions(dir.path(), std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + tokio::fs::set_permissions(&sub, std::fs::Permissions::from_mode(0o755)) + .await + .unwrap(); + } + #[test] fn test_applied_via_as_tag() { assert_eq!(AppliedVia::Package.as_tag(), "package"); diff --git a/crates/socket-patch-core/src/patch/apply_lock.rs b/crates/socket-patch-core/src/patch/apply_lock.rs index 33cc079..9acefd7 100644 --- a/crates/socket-patch-core/src/patch/apply_lock.rs +++ b/crates/socket-patch-core/src/patch/apply_lock.rs @@ -87,7 +87,15 @@ pub fn acquire(socket_dir: &Path, timeout: Duration) -> Result return Ok(LockGuard { _file: file }), @@ -100,14 +108,22 @@ pub fn acquire(socket_dir: &Path, timeout: Duration) -> Result { let now = Instant::now(); - if now >= deadline { + // A `None` deadline (timeout overflowed `Instant`) never + // elapses; otherwise give up once the budget is spent. + if deadline.is_some_and(|d| now >= d) { return Err(LockError::Held); } // Never sleep past the deadline: a sub-100 ms budget - // must not be rounded up to a full 100 ms wait. The - // remaining slice is always > 0 here (now < deadline). - let remaining = deadline - now; - std::thread::sleep(remaining.min(Duration::from_millis(100))); + // must not be rounded up to a full 100 ms wait. When + // there is a deadline the remaining slice is always > 0 + // here (now < deadline); with no deadline, just use the + // full 100 ms quantum. + let cap = Duration::from_millis(100); + let sleep_for = match deadline { + Some(d) => (d - now).min(cap), + None => cap, + }; + std::thread::sleep(sleep_for); } Err(source) => { return Err(LockError::Io { @@ -256,6 +272,59 @@ mod tests { ); } + /// Regression: a near-infinite, user-supplied timeout must not + /// panic the process. `--lock-timeout` / `SOCKET_LOCK_TIMEOUT` is a + /// raw `u64` of seconds, so `Duration::from_secs(u64::MAX)` reaches + /// `acquire`. `Instant::now() + that` overflows and aborts; the + /// `checked_add` deadline turns it into an indefinite wait instead. + /// When the lock is free, acquisition still succeeds immediately. + #[test] + fn overflowing_timeout_does_not_panic_when_free() { + let dir = tempfile::tempdir().unwrap(); + // Would panic ("overflow when adding duration to instant") under + // the old `Instant::now() + timeout`. + let guard = acquire(dir.path(), Duration::from_secs(u64::MAX)).unwrap(); + assert!(dir.path().join("apply.lock").is_file()); + drop(guard); + } + + /// Regression companion: with an overflowing (effectively infinite) + /// timeout AND a contended lock, `acquire` must *wait* — not panic + /// and not give up — and then succeed once the holder releases. + /// Proves both the no-overflow-panic fix and that a `None` deadline + /// never spuriously elapses into `Held`. + #[test] + fn overflowing_timeout_waits_then_acquires_on_release() { + use std::sync::Arc; + + let dir = Arc::new(tempfile::tempdir().unwrap()); + let held = acquire(dir.path(), Duration::ZERO).unwrap(); + + // Release the lock a little while after the waiter starts. + let dir2 = Arc::clone(&dir); + let releaser = std::thread::spawn(move || { + std::thread::sleep(Duration::from_millis(150)); + drop(held); // releases the OS lock + // Keep the tempdir alive until the waiter has acquired. + std::thread::sleep(Duration::from_millis(200)); + drop(dir2); + }); + + // u64::MAX seconds == astronomically large; under the bug this + // panics before ever sleeping. With the fix it waits indefinitely + // and acquires once `held` drops above. + let start = Instant::now(); + let guard = acquire(dir.path(), Duration::from_secs(u64::MAX)).unwrap(); + let waited = start.elapsed(); + assert!( + waited >= Duration::from_millis(100), + "should have waited for the holder to release, waited {:?}", + waited + ); + drop(guard); + releaser.join().unwrap(); + } + /// The retry loop must not overshoot the deadline by a full sleep /// quantum. A 150 ms budget should resolve well under the old /// fixed-100 ms-sleep worst case (~200 ms) — the final sleep is diff --git a/crates/socket-patch-core/src/patch/cargo_config.rs b/crates/socket-patch-core/src/patch/cargo_config.rs index 9c4b5d0..a89848f 100644 --- a/crates/socket-patch-core/src/patch/cargo_config.rs +++ b/crates/socket-patch-core/src/patch/cargo_config.rs @@ -186,7 +186,7 @@ async fn edit_config( .await .map_err(|e| format!("create {}: {e}", parent.display()))?; } - fs::write(&path, new) + atomic_write(&path, new.as_bytes()) .await .map_err(|e| format!("write {}: {e}", path.display()))?; } @@ -196,6 +196,59 @@ async fn edit_config( } } +/// Atomically commit `content` to `path` via stage + fsync + rename. +/// +/// `.cargo/config.toml` is a *user-owned* file — it can hold `[build]`, +/// `[net]`, credentials-adjacent settings, and comments alongside our +/// `[patch]` / `[env]` entries. A bare `fs::write` truncates the target before +/// writing, so a crash, power loss, or `ENOSPC` mid-write would leave the +/// user's config truncated or empty, destroying content we only meant to add +/// two lines to. Instead we write a sibling stage file, fsync it, then rename +/// over the target (atomic on the same filesystem), so a reader/recovering +/// process only ever sees the complete old or the complete new bytes. Mirrors +/// the hardened writers in `patch/apply.rs` and `package_json/update.rs`. +async fn atomic_write(path: &Path, content: &[u8]) -> std::io::Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let stem = path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "config.toml".to_string()); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); + + let mut file = fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&stage) + .await?; + + use tokio::io::AsyncWriteExt; + if let Err(e) = file.write_all(content).await { + let _ = fs::remove_file(&stage).await; + return Err(e); + } + if let Err(e) = file.sync_all().await { + let _ = fs::remove_file(&stage).await; + return Err(e); + } + drop(file); + + if let Err(e) = fs::rename(&stage, path).await { + let _ = fs::remove_file(&stage).await; + return Err(e); + } + + // The rename only updated the parent directory entry; fsync the directory + // so the rename itself survives a crash. Best-effort, Unix only. + #[cfg(unix)] + { + if let Ok(dir) = fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + + Ok(()) +} + // ── pure transforms ────────────────────────────────────────────────────────── /// True if a `[patch]` `path` value lies under `.socket/cargo-patches/`. @@ -640,6 +693,77 @@ mod tests { assert!(!body.contains("SOCKET_PATCH_ROOT")); } + // ── atomic-commit: stage+rename leaves no litter, never truncates ──────── + /// List the non-hidden-temp entries left under `.cargo/` after a commit. The + /// atomic writer stages a `.socket-stage-*` sibling and renames it over the + /// target; if any stage file survives, the commit aborted mid-flight (or the + /// rename was actually a copy) — both are litter the user would have to clean. + async fn stage_litter(cargo_dir: &Path) -> Vec { + let mut names = Vec::new(); + let mut rd = fs::read_dir(cargo_dir).await.unwrap(); + while let Some(e) = rd.next_entry().await.unwrap() { + let n = e.file_name().to_string_lossy().into_owned(); + if n.contains("socket-stage") { + names.push(n); + } + } + names + } + + #[tokio::test] + async fn test_commit_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + assert!(ensure_patch_entry(dir.path(), "cfg-if", "1.0.0", false) + .await + .unwrap()); + let cargo_dir = dir.path().join(".cargo"); + assert!( + stage_litter(&cargo_dir).await.is_empty(), + "create-path commit must rename the stage file away, not leave it" + ); + // A second, mutating upsert (version bump) must also clean up after itself. + assert!(ensure_patch_entry(dir.path(), "cfg-if", "1.0.1", false) + .await + .unwrap()); + assert!( + stage_litter(&cargo_dir).await.is_empty(), + "overwrite-path commit must rename the stage file away, not leave it" + ); + } + + #[tokio::test] + async fn test_commit_overwrites_existing_user_config_in_place() { + // The dangerous case the atomic writer protects: an existing user config + // we must edit in place. A non-atomic truncate-then-write would risk + // leaving this empty on a crash; here we assert the user content survives + // and the new entry lands, with no stage file left behind. + let dir = tempfile::tempdir().unwrap(); + let cargo_dir = dir.path().join(".cargo"); + fs::create_dir_all(&cargo_dir).await.unwrap(); + fs::write( + cargo_dir.join("config.toml"), + "# user comment\n[build]\njobs = 7\n\n[net]\nretry = 5\n", + ) + .await + .unwrap(); + + assert!(ensure_patch_entry(dir.path(), "cfg-if", "1.0.0", false) + .await + .unwrap()); + + let body = fs::read_to_string(cargo_dir.join("config.toml")) + .await + .unwrap(); + assert!(body.contains("# user comment"), "comment preserved"); + assert!(body.contains("jobs = 7"), "[build] preserved"); + assert!(body.contains("retry = 5"), "[net] preserved"); + assert!(body.contains("cfg-if"), "our entry was added"); + assert!( + stage_litter(&cargo_dir).await.is_empty(), + "in-place overwrite must not leave a stage file" + ); + } + #[tokio::test] async fn test_drop_env_root_keeps_nonempty_cargo_dir() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/patch/cargo_redirect.rs b/crates/socket-patch-core/src/patch/cargo_redirect.rs index 1308840..7877dd3 100644 --- a/crates/socket-patch-core/src/patch/cargo_redirect.rs +++ b/crates/socket-patch-core/src/patch/cargo_redirect.rs @@ -161,6 +161,23 @@ fn copy_dir_for(project_root: &Path, name: &str, version: &str) -> PathBuf { .join(format!("{name}-{version}")) } +/// A crate `name` / `version` keys the on-disk copy dir +/// (`.socket/cargo-patches/-/`) and the `[patch]` path, so it +/// must be a single safe path segment. A component containing a path separator +/// or `..` would let a tampered manifest PURL escape `.socket/cargo-patches/` +/// and make `apply` copy + write the patched tree (or `rollback` delete a tree) +/// at an arbitrary filesystem location outside the project. Cargo crate names +/// are `[A-Za-z0-9_-]` and versions are semver, so neither can legitimately +/// contain any of these — reject them fail-closed before touching the disk. +fn is_safe_redirect_component(s: &str) -> bool { + !s.is_empty() + && s != "." + && s != ".." + && !s.contains('/') + && !s.contains('\\') + && !s.contains('\0') +} + /// Materialise a project-local patched copy and wire up the `[patch]` redirect. /// /// * `pristine_src` — the pristine registry/vendor source dir (the crawler's @@ -182,6 +199,23 @@ pub async fn apply_cargo_redirect( dry_run: bool, force: bool, ) -> ApplyResult { + // SECURITY: refuse coordinates that would escape `.socket/cargo-patches/`. + // A `..`/separator in `name` or `version` (a tampered manifest PURL) would + // otherwise make `fresh_copy` + the apply pipeline write the patched tree to + // an arbitrary location. Fail-closed before any disk access. + if !is_safe_redirect_component(name) || !is_safe_redirect_component(version) { + return synthesized_result( + purl, + Path::new(""), + Vec::new(), + false, + Some(format!( + "refusing cargo redirect for unsafe coordinates `{name}`/`{version}` \ + (a path separator or `..` would escape .socket/cargo-patches/)" + )), + ); + } + let copy_dir = copy_dir_for(project_root, name, version); // A redirect with no files to patch is meaningless: no-op success, no @@ -267,6 +301,16 @@ pub async fn remove_cargo_redirect( ) })?; + // SECURITY: the copy dir is `.socket/cargo-patches/-/` and is + // about to be `remove_tree`d. An unsafe `name`/`version` (`..`/separator) + // would target a tree outside the project for deletion — refuse it. + if !is_safe_redirect_component(name) || !is_safe_redirect_component(version) { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("refusing to remove cargo redirect for unsafe coordinates: {purl}"), + )); + } + cargo_config::drop_patch_entry(project_root, name, dry_run) .await .map_err(std::io::Error::other)?; @@ -362,6 +406,12 @@ pub async fn verify_cargo_redirect_state( let Some(record) = manifest.patches.get(purl) else { continue; }; + // SECURITY: skip coordinates that would resolve the copy dir outside + // `.socket/cargo-patches/` (a tampered manifest); never stat/hash files + // outside the project tree during an audit. Mirrors the apply guard. + if !is_safe_redirect_component(name) || !is_safe_redirect_component(version) { + continue; + } // Vendored crates are patched in place, not redirected, so they have // no copy/entry by design — skip them. The crawler stores vendored // crates under `/vendor/` in either `-/` or bare @@ -1089,6 +1139,188 @@ mod tests { .any(|d| matches!(d, Drift::OrphanEntry { .. }))); } + // ── filesystem-safety: coordinate traversal ────────────────────────── + + /// SECURITY regression: a tampered manifest PURL with `..` in the crate name + /// must NOT let `apply` copy + write the patched tree outside + /// `.socket/cargo-patches/`. Before the guard this returned success and + /// materialised the copy at `/../escape-1.0.0`. + #[tokio::test] + async fn test_apply_rejects_traversal_crate_name() { + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + let escaped = root.parent().unwrap().join("escape-1.0.0"); + // Make sure a stale copy from a prior run can't mask the assertion. + let _ = remove_tree(&escaped).await; + + let result = apply_cargo_redirect( + "pkg:cargo/../../../escape@1.0.0", + "../../../escape", + "1.0.0", + &pristine, + root, + &files, + &sources, + None, + false, + false, + ) + .await; + + assert!(!result.success, "traversal coordinates must be refused"); + assert!( + result.error.as_deref().unwrap_or("").contains("unsafe"), + "error should explain the refusal: {:?}", + result.error + ); + assert!( + !escaped.exists(), + "no copy may be written outside .socket/cargo-patches/ (found {})", + escaped.display() + ); + // No config entry was written either. + assert!(cargo_config::read_patch_entries(root).await.is_empty()); + let _ = remove_tree(&escaped).await; // belt-and-suspenders cleanup + } + + /// A `version` carrying a separator is equally rejected (keys the copy dir). + #[tokio::test] + async fn test_apply_rejects_traversal_version() { + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + let result = apply_cargo_redirect( + "pkg:cargo/cfg-if@../../../evil", + "cfg-if", + "../../../evil", + &pristine, + root, + &files, + &sources, + None, + false, + false, + ) + .await; + assert!(!result.success); + assert!(!root.join(".cargo/config.toml").exists()); + } + + /// SECURITY regression: `remove` must refuse unsafe coordinates rather than + /// `remove_tree` a directory outside the project. + #[tokio::test] + async fn test_remove_rejects_traversal() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + // A precious directory a sibling of the project root. + let precious = root.parent().unwrap().join("precious-1.0.0"); + tokio::fs::create_dir_all(&precious).await.unwrap(); + tokio::fs::write(precious.join("keep.txt"), b"keep") + .await + .unwrap(); + + let err = remove_cargo_redirect("pkg:cargo/../../../precious@1.0.0", root, false) + .await + .unwrap_err(); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + assert!( + precious.exists() && precious.join("keep.txt").exists(), + "remove must not delete a tree outside the project" + ); + tokio::fs::remove_dir_all(&precious).await.unwrap(); + } + + // ── scenario coverage ──────────────────────────────────────────────── + + #[tokio::test] + async fn test_reconcile_dry_run_does_not_mutate() { + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + apply_cargo_redirect( + "pkg:cargo/cfg-if@1.0.0", "cfg-if", "1.0.0", &pristine, root, &files, &sources, None, + false, false, + ) + .await; + let cfg_before = tokio::fs::read_to_string(root.join(".cargo/config.toml")) + .await + .unwrap(); + + let desired: HashSet = HashSet::new(); + let removed = reconcile_cargo_redirects(root, &desired, true).await; + assert!(removed.contains(&"pkg:cargo/cfg-if@1.0.0".to_string())); + // dry-run must NOT delete the copy or rewrite config. + assert!(root.join(".socket/cargo-patches/cfg-if-1.0.0").exists()); + let cfg_after = tokio::fs::read_to_string(root.join(".cargo/config.toml")) + .await + .unwrap(); + assert_eq!(cfg_before, cfg_after, "dry-run reconcile must not edit config"); + } + + #[tokio::test] + async fn test_version_bump_refreshes_entry() { + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + apply_cargo_redirect( + "pkg:cargo/cfg-if@1.0.0", "cfg-if", "1.0.0", &pristine, root, &files, &sources, None, + false, false, + ) + .await; + // Apply a NEW version (same crate). Build a fresh pristine for 1.0.1. + let result = apply_cargo_redirect( + "pkg:cargo/cfg-if@1.0.1", "cfg-if", "1.0.1", &pristine, root, &files, &sources, None, + false, false, + ) + .await; + assert!(result.success, "{:?}", result.error); + let entries = cargo_config::read_patch_entries(root).await; + assert_eq!( + entries["cfg-if"].path.as_deref(), + Some(".socket/cargo-patches/cfg-if-1.0.1"), + "entry must point at the bumped version" + ); + assert!(root.join(".socket/cargo-patches/cfg-if-1.0.1").exists()); + } + + #[tokio::test] + async fn test_realistic_cargo_lock_with_header() { + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + apply_cargo_redirect( + "pkg:cargo/cfg-if@1.0.0", "cfg-if", "1.0.0", &pristine, root, &files, &sources, None, + false, false, + ) + .await; + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:cargo/cfg-if@1.0.0".to_string(), + crate::manifest::schema::PatchRecord { + uuid: "u".into(), + exported_at: "t".into(), + files: files.clone(), + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + let desired: HashSet = ["pkg:cargo/cfg-if@1.0.0".to_string()].into_iter().collect(); + // Realistic lock: version header + source/checksum fields + dup version. + tokio::fs::write( + root.join("Cargo.lock"), + "version = 3\n\n[[package]]\nname = \"cfg-if\"\nversion = \"1.0.0\"\nsource = \"registry+https://github.com/rust-lang/crates.io-index\"\nchecksum = \"abc\"\n\n[[package]]\nname = \"cfg-if\"\nversion = \"0.1.10\"\nsource = \"registry+https://github.com/rust-lang/crates.io-index\"\nchecksum = \"def\"\n", + ) + .await + .unwrap(); + // patched 1.0.0 is among resolved versions → clean. + assert!(verify_cargo_redirect_state(root, &manifest, &desired) + .await + .is_ok()); + } + #[tokio::test] async fn test_empty_files_is_noop() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/patch/copy_tree.rs b/crates/socket-patch-core/src/patch/copy_tree.rs index b171565..8f2b72f 100644 --- a/crates/socket-patch-core/src/patch/copy_tree.rs +++ b/crates/socket-patch-core/src/patch/copy_tree.rs @@ -76,11 +76,15 @@ pub(crate) fn force_remove_dir_all(dir: &Path) -> std::io::Result<()> { { use std::os::unix::fs::PermissionsExt; for entry in walkdir::WalkDir::new(dir).into_iter().flatten() { - let mode = if entry.file_type().is_dir() { - 0o755 - } else { - 0o644 - }; + let ft = entry.file_type(); + // Never chmod a symlink: `set_permissions` follows the link + // and would mutate its *target's* mode — which may live + // outside the tree. A symlink is unlinked via the write bit + // on its (relaxed) parent dir; its own mode is irrelevant. + if ft.is_symlink() { + continue; + } + let mode = if ft.is_dir() { 0o755 } else { 0o644 }; let _ = std::fs::set_permissions( entry.path(), std::fs::Permissions::from_mode(mode), @@ -99,3 +103,179 @@ pub(crate) async fn remove_tree(dir: &Path) -> std::io::Result<()> { .await .map_err(|e| std::io::Error::other(e.to_string()))? } + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + #[cfg(unix)] + use std::os::unix::fs::PermissionsExt; + + #[tokio::test] + async fn copies_nested_and_empty_dirs() { + let src = tempfile::tempdir().unwrap(); + let dst = tempfile::tempdir().unwrap(); + let d = dst.path().join("copy"); + fs::create_dir_all(src.path().join("a/b")).unwrap(); + fs::create_dir_all(src.path().join("empty")).unwrap(); + fs::write(src.path().join("a/b/file.txt"), b"hello").unwrap(); + fs::write(src.path().join("top.txt"), b"top").unwrap(); + + fresh_copy(src.path(), &d, None).await.unwrap(); + + assert_eq!(fs::read(d.join("a/b/file.txt")).unwrap(), b"hello"); + assert_eq!(fs::read(d.join("top.txt")).unwrap(), b"top"); + assert!(d.join("empty").is_dir(), "empty dir not preserved"); + } + + #[tokio::test] + async fn skips_named_file_at_any_depth() { + let src = tempfile::tempdir().unwrap(); + let dst = tempfile::tempdir().unwrap(); + let d = dst.path().join("copy"); + fs::create_dir_all(src.path().join("sub")).unwrap(); + fs::write(src.path().join(".cargo-checksum.json"), b"{}").unwrap(); + fs::write(src.path().join("sub/.cargo-checksum.json"), b"{}").unwrap(); + fs::write(src.path().join("sub/keep.rs"), b"code").unwrap(); + + fresh_copy(src.path(), &d, Some(".cargo-checksum.json")).await.unwrap(); + + assert!(!d.join(".cargo-checksum.json").exists()); + assert!(!d.join("sub/.cargo-checksum.json").exists()); + assert!(d.join("sub/keep.rs").exists()); + } + + #[cfg(unix)] + #[tokio::test] + async fn skips_symlinks() { + let src = tempfile::tempdir().unwrap(); + let dst = tempfile::tempdir().unwrap(); + let d = dst.path().join("copy"); + fs::write(src.path().join("real.txt"), b"x").unwrap(); + std::os::unix::fs::symlink("real.txt", src.path().join("link.txt")).unwrap(); + // symlink to outside dir + std::os::unix::fs::symlink("/etc/passwd", src.path().join("escape")).unwrap(); + + fresh_copy(src.path(), &d, None).await.unwrap(); + + assert!(d.join("real.txt").exists()); + assert!(!d.join("link.txt").exists(), "symlink should be skipped"); + assert!(!d.join("escape").exists(), "escaping symlink should be skipped"); + } + + #[cfg(unix)] + #[tokio::test] + async fn remove_tree_handles_readonly_files_and_dirs() { + let base = tempfile::tempdir().unwrap(); + let root = base.path().join("tree"); + fs::create_dir_all(root.join("ro_dir/inner")).unwrap(); + fs::write(root.join("ro_dir/inner/f.txt"), b"x").unwrap(); + fs::write(root.join("ro_dir/g.txt"), b"y").unwrap(); + // Make files read-only then dirs read-only (bottom-up). + fs::set_permissions(root.join("ro_dir/inner/f.txt"), fs::Permissions::from_mode(0o444)).unwrap(); + fs::set_permissions(root.join("ro_dir/g.txt"), fs::Permissions::from_mode(0o444)).unwrap(); + fs::set_permissions(root.join("ro_dir/inner"), fs::Permissions::from_mode(0o555)).unwrap(); + fs::set_permissions(root.join("ro_dir"), fs::Permissions::from_mode(0o555)).unwrap(); + + remove_tree(&root).await.unwrap(); + assert!(!root.exists(), "read-only tree should be fully removed"); + } + + #[cfg(unix)] + #[tokio::test] + async fn remove_tree_handles_no_execute_dirs() { + let base = tempfile::tempdir().unwrap(); + let root = base.path().join("tree"); + fs::create_dir_all(root.join("d")).unwrap(); + fs::write(root.join("d/f.txt"), b"x").unwrap(); + // 0o444: read but NO execute -> cannot descend without relax + fs::set_permissions(root.join("d"), fs::Permissions::from_mode(0o444)).unwrap(); + + remove_tree(&root).await.unwrap(); + assert!(!root.exists(), "no-execute dir tree should be removed"); + } + + #[tokio::test] + async fn fresh_copy_overwrites_existing_dst() { + let src = tempfile::tempdir().unwrap(); + let dst = tempfile::tempdir().unwrap(); + let d = dst.path().join("copy"); + fs::create_dir_all(&d).unwrap(); + fs::write(d.join("stale.txt"), b"old").unwrap(); + fs::write(src.path().join("new.txt"), b"new").unwrap(); + + fresh_copy(src.path(), &d, None).await.unwrap(); + + assert!(!d.join("stale.txt").exists(), "stale file should be gone"); + assert!(d.join("new.txt").exists()); + } + + #[cfg(unix)] + #[tokio::test] + async fn fresh_copy_dirs_are_writable_even_from_readonly_source() { + let src = tempfile::tempdir().unwrap(); + let dst = tempfile::tempdir().unwrap(); + let d = dst.path().join("copy"); + fs::create_dir_all(src.path().join("ro")).unwrap(); + fs::write(src.path().join("ro/f.txt"), b"x").unwrap(); + fs::set_permissions(src.path().join("ro/f.txt"), fs::Permissions::from_mode(0o444)).unwrap(); + fs::set_permissions(src.path().join("ro"), fs::Permissions::from_mode(0o555)).unwrap(); + + fresh_copy(src.path(), &d, None).await.unwrap(); + + let dir_mode = fs::metadata(d.join("ro")).unwrap().permissions().mode() & 0o777; + assert!(dir_mode & 0o200 != 0, "copied dir should be writable, got {:o}", dir_mode); + // cleanup readonly src + fs::set_permissions(src.path().join("ro"), fs::Permissions::from_mode(0o755)).unwrap(); + } + + #[cfg(unix)] + #[tokio::test] + async fn remove_tree_does_not_follow_symlink_out_of_tree() { + // Safety: removing a tree must never delete the symlink *target*. + let base = tempfile::tempdir().unwrap(); + let outside = base.path().join("outside.txt"); + fs::write(&outside, b"precious").unwrap(); + let root = base.path().join("tree"); + fs::create_dir_all(&root).unwrap(); + std::os::unix::fs::symlink(&outside, root.join("link")).unwrap(); + + remove_tree(&root).await.unwrap(); + assert!(!root.exists()); + assert!(outside.exists(), "symlink target outside tree must survive"); + assert_eq!(fs::read(&outside).unwrap(), b"precious"); + } + + /// Regression: the perm-relax retry in [`force_remove_dir_all`] must not + /// chmod *through* a symlink. `set_permissions` follows links, so a symlink + /// entry would silently mutate its target's mode — which can live outside + /// the tree. (Copy trees are symlink-free today, but this is a general + /// pub(crate) helper and the safety property must hold regardless.) + #[cfg(unix)] + #[tokio::test] + async fn relax_loop_must_not_chmod_external_symlink_target() { + let base = tempfile::tempdir().unwrap(); + // An external precious file with restrictive perms. + let outside = base.path().join("secret.txt"); + fs::write(&outside, b"secret").unwrap(); + fs::set_permissions(&outside, fs::Permissions::from_mode(0o600)).unwrap(); + + // A tree whose FIRST remove_dir_all will FAIL (read-only dir) so the + // perm-relax retry path runs, and which contains a symlink to `outside`. + let root = base.path().join("tree"); + fs::create_dir_all(&root).unwrap(); + std::os::unix::fs::symlink(&outside, root.join("link")).unwrap(); + fs::write(root.join("f.txt"), b"x").unwrap(); + fs::set_permissions(root.join("f.txt"), fs::Permissions::from_mode(0o444)).unwrap(); + // Read-only (no write) dir -> first remove_dir_all fails -> relax runs. + fs::set_permissions(&root, fs::Permissions::from_mode(0o555)).unwrap(); + + remove_tree(&root).await.unwrap(); + + let mode = fs::metadata(&outside).unwrap().permissions().mode() & 0o777; + assert_eq!(mode, 0o600, "external symlink target perms were changed to {:o}", mode); + assert!(outside.exists()); + } +} + diff --git a/crates/socket-patch-core/src/patch/diff.rs b/crates/socket-patch-core/src/patch/diff.rs index 47d873a..bd68e08 100644 --- a/crates/socket-patch-core/src/patch/diff.rs +++ b/crates/socket-patch-core/src/patch/diff.rs @@ -10,10 +10,11 @@ use qbsdiff::Bspatch; /// Upper bound on how many bytes we pre-reserve for the patched output. /// /// `Bspatch::hint_target_size()` returns the target size read verbatim from -/// the bsdiff header (bytes 24..32). qbsdiff's parser validates the control -/// and delta block lengths against the actual payload but never validates -/// this field — so a malformed or hostile delta can claim an arbitrary -/// target size (up to `i64::MAX`) while carrying only a few bytes of data. +/// the bsdiff header (bytes 24..32) and never validates it — so a malformed or +/// hostile delta can claim an arbitrary target size (up to `i64::MAX`) while +/// carrying only a few bytes of data. (qbsdiff's `> patch.len()` check on the +/// control/diff block lengths is itself bypassable via integer overflow; see +/// [`validate_bsdiff_header`].) /// /// Feeding that value straight into `Vec::with_capacity` lets a tiny delta /// request a multi-exabyte reservation, which either panics with "capacity @@ -27,11 +28,66 @@ use qbsdiff::Bspatch; /// reallocations for legitimately large files. const MAX_PREALLOC_BYTES: u64 = 64 * 1024 * 1024; // 64 MiB +/// Decode a bsdiff "offtin" integer (8 little-endian bytes, sign-magnitude). +/// +/// This mirrors `qbsdiff`'s private `decode_int`: the top bit of the most +/// significant byte is a sign flag, not part of a two's-complement value. +fn decode_offtin(b: &[u8; 8]) -> i64 { + let x = u64::from_le_bytes(*b); + if x >> 63 == 0 || x == 1 << 63 { + x as i64 + } else { + ((x & ((1u64 << 63) - 1)) as i64).wrapping_neg() + } +} + +/// Reject bsdiff headers that would make `qbsdiff::Bspatch::new` panic. +/// +/// `qbsdiff`'s parser reads the compressed control- and diff-block lengths +/// from header bytes 8..16 and 16..24 with the sign-magnitude decoder above, +/// casts them to `u64`, then guards with `32 + csize + dsize > patch.len()` +/// using *wrapping* `u64` arithmetic before doing `split_at(csize)`. A header +/// whose length field has the sign bit set decodes to a "negative" value whose +/// `as u64` is enormous: the sum wraps back below `patch.len()`, slips past the +/// guard, and then either the addition overflows (debug builds) or +/// `split_at(huge)` indexes out of bounds (release builds) — a hard panic on +/// attacker-controlled input. +/// +/// We pre-validate with checked arithmetic so `apply_diff` always surfaces a +/// recoverable `io::Error` instead. Malformed-but-not-overflowing headers +/// (bad magic, too short) are left for `Bspatch::new` to report so the error +/// text stays consistent with the upstream parser. +fn validate_bsdiff_header(delta: &[u8]) -> Result<(), std::io::Error> { + // Defer the "too short / bad magic" cases to qbsdiff's own error. + if delta.len() < 32 || &delta[..8] != b"BSDIFF40" { + return Ok(()); + } + let csize = decode_offtin(delta[8..16].try_into().expect("8 bytes")); + let dsize = decode_offtin(delta[16..24].try_into().expect("8 bytes")); + let lengths_ok = csize >= 0 + && dsize >= 0 + && 32u64 + .checked_add(csize as u64) + .and_then(|s| s.checked_add(dsize as u64)) + .is_some_and(|needed| needed <= delta.len() as u64); + if lengths_ok { + Ok(()) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "bsdiff header: block lengths are negative or exceed the payload", + )) + } +} + /// Apply a bsdiff delta to `before` and return the resulting bytes. /// /// Returns an `std::io::Error` when the delta is malformed or applying it /// fails (for example, the delta was produced from a different source). pub fn apply_diff(before: &[u8], delta: &[u8]) -> Result, std::io::Error> { + // Guard the header before handing it to qbsdiff: a forged block-length + // field would otherwise panic its parser (see `validate_bsdiff_header`). + validate_bsdiff_header(delta)?; let patcher = Bspatch::new(delta)?; // Clamp the attacker-controlled size hint: a corrupt/hostile header must // not be able to turn a small delta into a process-killing allocation. @@ -139,6 +195,66 @@ mod tests { ); } + #[test] + fn test_apply_diff_forged_negative_block_length_does_not_panic() { + // Regression: qbsdiff's `parse` reads the control/diff block lengths + // (header bytes 8..16 and 16..24) via a sign-magnitude decoder, casts + // them to `u64`, and checks `32 + csize + dsize > patch.len()` with + // *wrapping* arithmetic before doing `split_at(csize)`. A header whose + // csize field has the high bit set decodes to a "negative" length whose + // `as u64` is enormous; the sum wraps back under `patch.len()`, slips + // past the guard, and then `split_at(huge)` panics (or the add itself + // panics in debug builds). `apply_diff` must reject such a header as a + // normal `io::Error`, upholding its never-panic-on-bad-input contract. + let before = b"the quick brown fox jumps over the lazy dog"; + let after = b"the quick brown cat jumps over the lazy dog"; + let mut forged = make_delta(before, after); + assert!(forged.len() >= 32, "delta must contain a full header"); + // Sign-magnitude encoding of -16: magnitude 16 with the sign bit set. + let neg: u64 = 16u64 | (1u64 << 63); + forged[8..16].copy_from_slice(&neg.to_le_bytes()); + + let result = apply_diff(before, &forged); + assert!( + result.is_err(), + "a forged negative block length must error, not panic" + ); + } + + #[test] + fn test_apply_diff_forged_negative_diff_block_length_does_not_panic() { + // Same class of bug as the csize case above, but via the diff-block + // length field (header bytes 16..24). Both feed `split_at` after the + // wrapping-overflow guard, so both must be rejected up front. + let before = b"alpha beta gamma delta epsilon zeta eta theta"; + let after = b"alpha beta gamma DELTA epsilon zeta eta theta"; + let mut forged = make_delta(before, after); + assert!(forged.len() >= 32, "delta must contain a full header"); + let neg: u64 = 8u64 | (1u64 << 63); + forged[16..24].copy_from_slice(&neg.to_le_bytes()); + + let result = apply_diff(before, &forged); + assert!( + result.is_err(), + "a forged negative diff-block length must error, not panic" + ); + } + + #[test] + fn test_validate_bsdiff_header_accepts_real_delta() { + // The guard must be transparent to honest deltas: a freshly built + // delta has well-formed, in-bounds block lengths and must pass. + let before = b"the quick brown fox jumps over the lazy dog"; + let after = b"the quick brown cat jumps over the lazy dog"; + let delta = make_delta(before, after); + validate_bsdiff_header(&delta).expect("honest header must validate"); + // ...and short / bad-magic inputs are deferred to Bspatch::new, so the + // guard returns Ok for them rather than masking the canonical error. + validate_bsdiff_header(b"too short").expect("short input deferred"); + validate_bsdiff_header(b"NOTBSDIFF.........................") + .expect("bad magic deferred"); + } + #[test] fn test_apply_diff_capacity_hint_is_clamped() { // Pin the clamp itself so the bound can't silently regress back to an diff --git a/crates/socket-patch-core/src/patch/go_mod_edit.rs b/crates/socket-patch-core/src/patch/go_mod_edit.rs index f40935c..b1635f7 100644 --- a/crates/socket-patch-core/src/patch/go_mod_edit.rs +++ b/crates/socket-patch-core/src/patch/go_mod_edit.rs @@ -123,7 +123,7 @@ async fn edit_go_mod( None => Ok(false), Some(new) => { if !dry_run { - fs::write(&path, new) + atomic_write(&path, new.as_bytes()) .await .map_err(|e| format!("write {}: {e}", path.display()))?; } @@ -132,6 +132,60 @@ async fn edit_go_mod( } } +/// Atomically commit `content` to `path` via stage + fsync + rename. +/// +/// A `go.mod` is a *user-owned* file that **defines the module** and carries +/// the user's own `require`/`exclude`/`retract`/`replace` directives and +/// comments alongside our socket `replace`. A bare `fs::write` truncates the +/// target before writing, so a crash, power loss, or `ENOSPC` mid-write would +/// leave `go.mod` truncated or empty — a corrupted manifest that no longer +/// builds, when we only meant to add or refresh one line. Instead we stage a +/// sibling file, fsync it, then rename over the target (atomic on the same +/// filesystem), so a reader/recovering process only ever sees the complete old +/// or the complete new bytes. Mirrors the hardened writers in +/// `patch/cargo_config.rs`, `patch/apply.rs`, and `package_json/update.rs`. +async fn atomic_write(path: &Path, content: &[u8]) -> std::io::Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let stem = path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "go.mod".to_string()); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); + + let mut file = fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&stage) + .await?; + + use tokio::io::AsyncWriteExt; + if let Err(e) = file.write_all(content).await { + let _ = fs::remove_file(&stage).await; + return Err(e); + } + if let Err(e) = file.sync_all().await { + let _ = fs::remove_file(&stage).await; + return Err(e); + } + drop(file); + + if let Err(e) = fs::rename(&stage, path).await { + let _ = fs::remove_file(&stage).await; + return Err(e); + } + + // The rename only updated the parent directory entry; fsync the directory + // so the rename itself survives a crash. Best-effort, Unix only. + #[cfg(unix)] + { + if let Ok(dir) = fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + + Ok(()) +} + // ── parsing ──────────────────────────────────────────────────────────────── /// Strip a trailing `// …` line comment. Module paths and our `./…` targets @@ -722,6 +776,65 @@ replace ( ); } + // ── atomic commit: stage+rename leaves no litter, never truncates ──────── + /// A real write must rename its `.socket-stage-*` sibling over `go.mod` and + /// leave nothing behind — a leftover stage file (or, worse, a half-written + /// truncated `go.mod`) is exactly the corruption the atomic writer exists to + /// prevent. Mirrors the litter guard in `patch/cargo_config.rs`. + #[tokio::test] + async fn test_ensure_leaves_no_stage_litter() { + let dir = tempfile::tempdir().unwrap(); + fs::write( + dir.path().join("go.mod"), + "module example.com/app\n\ngo 1.21\n\nrequire github.com/foo/bar v1.4.2\n", + ) + .await + .unwrap(); + + assert!(ensure_replace_entry(dir.path(), "github.com/foo/bar", "v1.4.2", false) + .await + .unwrap()); + + // Only go.mod should remain in the project root. + let mut names: Vec = std::fs::read_dir(dir.path()) + .unwrap() + .map(|e| e.unwrap().file_name().to_string_lossy().into_owned()) + .collect(); + names.sort(); + assert_eq!(names, vec!["go.mod".to_string()], "no stage-file litter"); + assert!( + !names.iter().any(|n| n.starts_with(".socket-stage-")), + "stage file must be renamed away, not left behind" + ); + } + + /// An overwrite must replace the whole file in one atomic step while + /// preserving every unrelated byte (module line, `go` line, `require`s, the + /// user's own `replace`, and comments) — the writer stages full new content + /// and renames, never truncates-in-place. + #[tokio::test] + async fn test_ensure_overwrite_preserves_unrelated_content_on_disk() { + let dir = tempfile::tempdir().unwrap(); + let original = "module example.com/app\n\ngo 1.21\n\n// keep me\nrequire github.com/foo/bar v1.4.2\n\nreplace example.com/other v2.0.0 => ../other-fork\n"; + fs::write(dir.path().join("go.mod"), original).await.unwrap(); + + assert!(ensure_replace_entry(dir.path(), "github.com/foo/bar", "v1.4.2", false) + .await + .unwrap()); + + let on_disk = fs::read_to_string(dir.path().join("go.mod")).await.unwrap(); + // Our directive landed… + assert!(on_disk.contains( + "replace github.com/foo/bar v1.4.2 => ./.socket/go-patches/github.com/foo/bar@v1.4.2" + )); + // …and nothing the user authored was lost. + assert!(on_disk.contains("module example.com/app")); + assert!(on_disk.contains("// keep me")); + assert!(on_disk.contains("require github.com/foo/bar v1.4.2")); + assert!(on_disk.contains("replace example.com/other v2.0.0 => ../other-fork")); + assert!(on_disk.starts_with(original), "original content kept verbatim as a prefix"); + } + #[tokio::test] async fn test_ensure_missing_go_mod_errors() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/patch/go_redirect.rs b/crates/socket-patch-core/src/patch/go_redirect.rs index 67ed0e5..2a4aaef 100644 --- a/crates/socket-patch-core/src/patch/go_redirect.rs +++ b/crates/socket-patch-core/src/patch/go_redirect.rs @@ -125,6 +125,44 @@ fn copy_dir_for(project_root: &Path, module: &str, version: &str) -> PathBuf { .join(format!("{module}@{version}")) } +/// SECURITY: the `module`+`version` key the on-disk copy dir +/// (`.socket/go-patches/@/`) and the `replace` target path, so +/// a tampered manifest PURL must not be able to make them escape +/// `.socket/go-patches/`. A `..`/`.` segment, an absolute path, or a backslash/ +/// NUL would otherwise let `apply` copy + write the patched tree (or `rollback` +/// delete a tree) at an arbitrary filesystem location outside the project. +/// +/// Unlike a cargo crate name, a Go module path legitimately contains `/` +/// separators (`github.com/foo/bar`), so we validate it **per segment** rather +/// than rejecting all separators. A real Go module path never contains a `..`/ +/// `.` segment or a backslash, so fail-closed rejection is safe. +fn is_safe_redirect_module(module: &str) -> bool { + if module.is_empty() || module.starts_with('/') || module.contains('\\') || module.contains('\0') + { + return false; + } + module + .split('/') + .all(|seg| !seg.is_empty() && seg != "." && seg != "..") +} + +/// A Go module version (e.g. `v1.4.2`, `v0.0.0-2006…-abcdef`) is a single path +/// segment — no separators, no `..`. Mirrors the cargo redirect guard. +fn is_safe_redirect_version(version: &str) -> bool { + !version.is_empty() + && version != "." + && version != ".." + && !version.contains('/') + && !version.contains('\\') + && !version.contains('\0') +} + +/// True iff both coordinates are safe to key an on-disk copy dir / `replace` +/// path. Reject fail-closed before any disk access. +fn are_safe_redirect_coords(module: &str, version: &str) -> bool { + is_safe_redirect_module(module) && is_safe_redirect_version(version) +} + /// Materialise a project-local patched copy and wire up the `replace` redirect. /// /// * `pristine_src` — the pristine module-cache source dir (the crawler's @@ -144,6 +182,23 @@ pub async fn apply_go_redirect( dry_run: bool, force: bool, ) -> ApplyResult { + // SECURITY: refuse coordinates that would escape `.socket/go-patches/`. + // A `..`/separator-laden `module`/`version` (a tampered manifest PURL) would + // otherwise make `fresh_copy` + the apply pipeline write the patched tree to + // an arbitrary location. Fail-closed before any disk access. + if !are_safe_redirect_coords(module, version) { + return synthesized_result( + purl, + Path::new(""), + Vec::new(), + false, + Some(format!( + "refusing go redirect for unsafe coordinates `{module}`/`{version}` \ + (a `..` segment, absolute path, or separator would escape .socket/go-patches/)" + )), + ); + } + let copy_dir = copy_dir_for(project_root, module, version); // A redirect with no files to patch is meaningless: no-op success, no @@ -233,6 +288,16 @@ pub async fn remove_go_redirect( ) })?; + // SECURITY: the copy dir is `.socket/go-patches/@/` and is + // about to be `remove_tree`d. Unsafe coordinates (`..` segment / separator / + // absolute) would target a tree outside the project for deletion — refuse. + if !are_safe_redirect_coords(module, version) { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("refusing to remove go redirect for unsafe coordinates: {purl}"), + )); + } + go_mod_edit::drop_replace_entry(project_root, module, dry_run) .await .map_err(std::io::Error::other)?; @@ -322,6 +387,13 @@ pub async fn verify_go_redirect_state( continue; }; + // SECURITY: skip coordinates that would resolve the copy dir outside + // `.socket/go-patches/` (a tampered manifest); never stat/hash files + // outside the project tree during an audit. Mirrors the apply guard. + if !are_safe_redirect_coords(module, version) { + continue; + } + // go.mod `require` cross-check: if the graph resolves this module to a // version that is NOT the patched one, the version-pinned `replace` is // unused and the build links the unpatched module — a silent-stale hole @@ -429,12 +501,65 @@ async fn redirect_in_sync( /// Synthesize Go's minimal `go.mod` (`module `) in the copy iff it has /// none — required for a `replace` target derived from a pre-modules package. +/// +/// The copy under `.socket/go-patches/` is a *committed artifact* that the build +/// redirects to, so its `go.mod` is committed to the repo. Write it atomically +/// (stage + fsync + rename) rather than with a bare truncating `fs::write`: a +/// crash / power loss / `ENOSPC` mid-write would otherwise commit a torn or +/// empty `go.mod`. A reader (a concurrent `go build`, or the file landing in a +/// commit) then only ever sees the complete file, never a half-written one. async fn ensure_module_go_mod(copy_dir: &Path, module: &str) -> std::io::Result<()> { let go_mod = copy_dir.join("go.mod"); if tokio::fs::metadata(&go_mod).await.is_ok() { return Ok(()); } - tokio::fs::write(&go_mod, format!("module {module}\n")).await + atomic_write(&go_mod, format!("module {module}\n").as_bytes()).await +} + +/// Atomically commit `content` to `path` via stage + fsync + rename. Mirrors the +/// hardened writers in [`crate::patch::cargo_config`] / +/// [`crate::patch::go_mod_edit`]: a reader/recovering process only ever sees the +/// complete old or complete new bytes, never a truncated intermediate. +async fn atomic_write(path: &Path, content: &[u8]) -> std::io::Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let stem = path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "go.mod".to_string()); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); + + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&stage) + .await?; + + use tokio::io::AsyncWriteExt; + if let Err(e) = file.write_all(content).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + if let Err(e) = file.sync_all().await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + drop(file); + + if let Err(e) = tokio::fs::rename(&stage, path).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + + // The rename only updated the parent directory entry; fsync the directory so + // the rename itself survives a crash. Best-effort, Unix only. + #[cfg(unix)] + { + if let Ok(dir) = tokio::fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + + Ok(()) } fn synthesized_result( @@ -715,6 +840,41 @@ mod tests { ); } + #[tokio::test] + async fn test_synthesized_go_mod_is_atomic_no_litter() { + // The synthesized go.mod must be committed atomically: after apply the + // copy dir holds the real go.mod with the full `module …` line and NO + // leftover `.socket-stage-*` sibling (a torn/empty go.mod or a stage-file + // litter would be exactly the corruption the atomic writer prevents). + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + // Pre-modules package → synthesis path is exercised. + tokio::fs::remove_file(pristine.join("go.mod")).await.unwrap(); + let sources = PatchSources::blobs_only(&blobs); + + let result = apply_go_redirect( + PURL, MODULE, VERSION, &pristine, root, &files, &sources, None, false, false, + ) + .await; + assert!(result.success, "apply failed: {:?}", result.error); + + let copy = root.join(".socket/go-patches/github.com/foo/bar@v1.4.2"); + assert_eq!( + tokio::fs::read_to_string(copy.join("go.mod")).await.unwrap(), + "module github.com/foo/bar\n", + "synthesized go.mod must be the complete module line, never torn/empty" + ); + // No stage-file litter anywhere in the copy dir. + let mut rd = tokio::fs::read_dir(©).await.unwrap(); + while let Ok(Some(e)) = rd.next_entry().await { + let name = e.file_name().to_string_lossy().into_owned(); + assert!( + !name.starts_with(".socket-stage-"), + "stage file must be renamed away, found litter: {name}" + ); + } + } + #[tokio::test] async fn test_remove_drops_directive_and_copy() { let (dir, blobs, pristine, files, _after) = fixture().await; @@ -880,6 +1040,159 @@ mod tests { assert!(read_replace_entries(root).await.is_empty()); } + // ── filesystem-safety: coordinate traversal ────────────────────────── + + #[test] + fn test_safe_redirect_coords() { + // Legitimate multi-segment module + semver-ish version. + assert!(are_safe_redirect_coords("github.com/foo/bar", "v1.4.2")); + assert!(are_safe_redirect_coords("gopkg.in/inf.v0", "v0.9.1")); + assert!(are_safe_redirect_coords( + "github.com/foo/bar/v2", + "v2.0.0-20210101000000-abcdef123456" + )); + // Traversal / escape attempts in the module. + assert!(!are_safe_redirect_coords("../../../etc", "v1.0.0")); + assert!(!are_safe_redirect_coords("github.com/../../../etc", "v1.0.0")); + assert!(!are_safe_redirect_coords("/abs/path", "v1.0.0")); + assert!(!are_safe_redirect_coords("github.com//bar", "v1.0.0")); // empty segment + assert!(!are_safe_redirect_coords("foo/./bar", "v1.0.0")); + assert!(!are_safe_redirect_coords("foo\\bar", "v1.0.0")); + assert!(!are_safe_redirect_coords("", "v1.0.0")); + // Traversal / separators in the version. + assert!(!are_safe_redirect_coords("github.com/foo/bar", "../../../evil")); + assert!(!are_safe_redirect_coords("github.com/foo/bar", "v1/0/0")); + assert!(!are_safe_redirect_coords("github.com/foo/bar", "..")); + assert!(!are_safe_redirect_coords("github.com/foo/bar", "")); + } + + /// SECURITY regression: a tampered manifest PURL with `..` in the module path + /// must NOT let `apply` copy + write the patched tree outside + /// `.socket/go-patches/`. Without the guard `copy_dir_for` would resolve to + /// `/.socket/go-patches/../../../escape@v1.0.0` and `fresh_copy` + /// would materialise it there. + #[tokio::test] + async fn test_apply_rejects_traversal_module() { + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + let sources = PatchSources::blobs_only(&blobs); + let escaped = root.parent().unwrap().join("escape@v1.0.0"); + let _ = remove_tree(&escaped).await; // clear any stale copy + + let result = apply_go_redirect( + "pkg:golang/../../../escape@v1.0.0", + "../../../escape", + "v1.0.0", + &pristine, + root, + &files, + &sources, + None, + false, + false, + ) + .await; + + assert!(!result.success, "traversal coordinates must be refused"); + assert!( + result.error.as_deref().unwrap_or("").contains("unsafe"), + "error should explain the refusal: {:?}", + result.error + ); + assert!( + !escaped.exists(), + "no copy may be written outside .socket/go-patches/ (found {})", + escaped.display() + ); + // go.mod was never touched (no replace directive added). + assert!(read_replace_entries(root).await.is_empty()); + let _ = remove_tree(&escaped).await; + } + + /// A `version` carrying a separator is equally rejected (it keys the copy dir + /// and the `replace` path). + #[tokio::test] + async fn test_apply_rejects_traversal_version() { + let (dir, blobs, pristine, files, _after) = fixture().await; + let root = dir.path(); + let gomod_before = tokio::fs::read_to_string(root.join("go.mod")).await.unwrap(); + let sources = PatchSources::blobs_only(&blobs); + let result = apply_go_redirect( + "pkg:golang/github.com/foo/bar@../../../evil", + MODULE, + "../../../evil", + &pristine, + root, + &files, + &sources, + None, + false, + false, + ) + .await; + assert!(!result.success); + // go.mod is byte-unchanged. + assert_eq!( + tokio::fs::read_to_string(root.join("go.mod")).await.unwrap(), + gomod_before + ); + } + + /// SECURITY regression: `remove` must refuse unsafe coordinates rather than + /// `remove_tree` a directory outside the project. + #[tokio::test] + async fn test_remove_rejects_traversal() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + tokio::fs::write(root.join("go.mod"), "module m\n\ngo 1.21\n").await.unwrap(); + // A precious directory that is a sibling of the project root. + let precious = root.parent().unwrap().join("precious@v1.0.0"); + tokio::fs::create_dir_all(&precious).await.unwrap(); + tokio::fs::write(precious.join("keep.txt"), b"keep").await.unwrap(); + + let err = remove_go_redirect("pkg:golang/../../../precious@v1.0.0", root, false) + .await + .unwrap_err(); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + assert!( + precious.exists() && precious.join("keep.txt").exists(), + "remove must not delete a tree outside the project" + ); + tokio::fs::remove_dir_all(&precious).await.unwrap(); + } + + /// SECURITY regression: an audit must not stat/hash files outside the tree + /// for an unsafe coordinate — it is skipped, not chased through `..`. + #[tokio::test] + async fn test_verify_skips_unsafe_coords() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path(); + tokio::fs::write(root.join("go.mod"), "module m\n\ngo 1.21\n").await.unwrap(); + + let unsafe_purl = "pkg:golang/../../../escape@v1.0.0"; + let mut manifest = PatchManifest::new(); + let mut files = HashMap::new(); + files.insert( + "package/x.go".to_string(), + PatchFileInfo { before_hash: "b".into(), after_hash: "a".into() }, + ); + manifest.patches.insert( + unsafe_purl.to_string(), + crate::manifest::schema::PatchRecord { + uuid: "u".into(), + exported_at: "t".into(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + let desired: HashSet = [unsafe_purl.to_string()].into_iter().collect(); + // The unsafe coord is silently skipped → no drift (and no escape-stat). + assert!(verify_go_redirect_state(root, &manifest, &desired).await.is_ok()); + } + #[test] fn test_collect_copy_modules_reconstructs_nested_purl() { // Pure-ish check of the path→PURL reconstruction via build_golang_purl. diff --git a/crates/socket-patch-core/src/patch/package.rs b/crates/socket-patch-core/src/patch/package.rs index f25f925..7113df7 100644 --- a/crates/socket-patch-core/src/patch/package.rs +++ b/crates/socket-patch-core/src/patch/package.rs @@ -105,22 +105,55 @@ pub fn read_archive_to_map(archive_path: &Path) -> Result { + saw_normal_component = true; + true + } + Component::CurDir => true, + _ => false, + }); + if normalized.is_empty() + || normalized.as_bytes().contains(&0) || leading_separator - || normalized_path - .components() - .any(|c| matches!(c, std::path::Component::ParentDir)) + || normalized_path.is_absolute() + || !all_components_safe + || !saw_normal_component { return Err(ArchiveError::UnsafePath(path_str)); } @@ -366,6 +399,144 @@ mod tests { assert!(matches!(err, ArchiveError::UnsafePath(_))); } + #[test] + fn test_read_archive_rejects_empty_normalized_path() { + // A raw regular-file entry named `package/` normalizes to "" — which + // resolves to the package directory itself (`pkg_path.join("")` == + // `pkg_path`). Such an entry must be rejected, not handed downstream + // as a writable "file". + let dir = tempfile::tempdir().unwrap(); + let archive = dir.path().join("arc.tar.gz"); + write_raw_archive(&archive, b"package/", b"evil"); + + let err = read_archive_to_map(&archive).unwrap_err(); + assert!( + matches!(err, ArchiveError::UnsafePath(_)), + "empty normalized path must be rejected, got {err:?}" + ); + } + + #[test] + fn test_read_archive_rejects_curdir_only_path() { + // `.` (and `./`) name no file — they collapse to the package + // directory. They have a CurDir component but no Normal component, + // so the "must contain a real segment" rule must reject them. + let dir = tempfile::tempdir().unwrap(); + for name in [&b"."[..], &b"./"[..]] { + let archive = dir.path().join("arc.tar.gz"); + write_raw_archive(&archive, name, b"evil"); + let err = read_archive_to_map(&archive).unwrap_err(); + assert!( + matches!(err, ArchiveError::UnsafePath(_)), + "curdir-only path {:?} must be rejected, got {err:?}", + String::from_utf8_lossy(name) + ); + } + } + + /// Build one 512-byte ustar header block for `name`/`typeflag`/`size`. + fn ustar_block(name: &[u8], typeflag: u8, size: u64) -> [u8; 512] { + let mut block = [0u8; 512]; + let copy_len = name.len().min(100); + block[..copy_len].copy_from_slice(&name[..copy_len]); + block[100..108].copy_from_slice(b"0000644\0"); + let size_str = format!("{:011o}", size); + block[124..135].copy_from_slice(size_str.as_bytes()); + block[135] = 0; + block[136..147].copy_from_slice(b"00000000000"); + block[147] = 0; + block[156] = typeflag; + block[257..263].copy_from_slice(b"ustar\0"); + block[263..265].copy_from_slice(b"00"); + block[148..156].fill(b' '); + let sum: u32 = block.iter().map(|&b| b as u32).sum(); + let sum_str = format!("{:06o}\0 ", sum); + block[148..156].copy_from_slice(sum_str.as_bytes()); + block + } + + /// Write a `.tar.gz` whose single regular-file entry carries `long_name` + /// via a GNU `././@LongLink` (typeflag `L`) pseudo-entry. This is the only + /// way to smuggle bytes a plain ustar name field can't hold — notably an + /// embedded NUL (the ustar name field is NUL-terminated). + fn write_gnu_longname_archive(path: &Path, long_name: &[u8], data: &[u8]) { + let mut tar_bytes = Vec::new(); + // GNU long-name body = the name plus a single trailing NUL (the tar + // reader trims exactly one trailing NUL, preserving any embedded ones). + let mut lname = long_name.to_vec(); + lname.push(0); + tar_bytes.extend_from_slice(&ustar_block(b"././@LongLink", b'L', lname.len() as u64)); + tar_bytes.extend_from_slice(&lname); + let pad = (512 - (lname.len() % 512)) % 512; + tar_bytes.extend(std::iter::repeat_n(0u8, pad)); + // The real entry. Its own name field is a harmless placeholder; the + // preceding long-name entry overrides it. + tar_bytes.extend_from_slice(&ustar_block(b"placeholder", b'0', data.len() as u64)); + tar_bytes.extend_from_slice(data); + let pad = if data.is_empty() { + 0 + } else { + (512 - (data.len() % 512)) % 512 + }; + tar_bytes.extend(std::iter::repeat_n(0u8, pad)); + tar_bytes.extend([0u8; 1024]); + + let file = std::fs::File::create(path).unwrap(); + let mut gz = GzEncoder::new(file, Compression::default()); + gz.write_all(&tar_bytes).unwrap(); + gz.finish().unwrap(); + } + + #[test] + fn test_read_archive_rejects_nul_byte_path() { + // A plain ustar name field is NUL-terminated, so an embedded NUL can + // only reach the validator through a GNU long-name entry. `safe\0evil` + // is a single path component (no `/`, no `..`, not absolute) — so it + // is ONLY rejectable by the explicit NUL guard, which mirrors + // `is_safe_relative_subpath` in apply.rs. Refuse the OsStr/C-string + // truncation ambiguity outright. + let dir = tempfile::tempdir().unwrap(); + let archive = dir.path().join("arc.tar.gz"); + write_gnu_longname_archive(&archive, b"safe\0evil.txt", b"evil"); + + let err = read_archive_to_map(&archive).unwrap_err(); + assert!( + matches!(err, ArchiveError::UnsafePath(_)), + "embedded-NUL long-name path must be rejected, got {err:?}" + ); + } + + #[test] + fn test_read_archive_accepts_gnu_longname_without_nul() { + // Sanity check that the long-name machinery itself works (so the NUL + // test above isn't vacuously passing because long names are dropped). + let dir = tempfile::tempdir().unwrap(); + let archive = dir.path().join("arc.tar.gz"); + let long = format!("package/{}.js", "a".repeat(120)); + write_gnu_longname_archive(&archive, long.as_bytes(), b"ok"); + + let map = read_archive_to_map(&archive).unwrap(); + assert_eq!(map.len(), 1); + assert_eq!(map.values().next().map(|v| v.as_slice()), Some(&b"ok"[..])); + } + + #[test] + fn test_read_archive_accepts_curdir_prefixed_real_path() { + // The hardening must NOT over-reject: a leading `./` in front of a + // real segment is a legitimate relative path and must still pass. + // Use the raw writer so the literal `./` reaches the validator (the + // tar `Builder` would otherwise normalize the prefix away). + let dir = tempfile::tempdir().unwrap(); + let archive = dir.path().join("arc.tar.gz"); + write_raw_archive(&archive, b"./lib/util.js", b"ok"); + + let map = read_archive_to_map(&archive).unwrap(); + // The entry survives validation (the `./` segment is preserved in the + // key, matching the existing non-canonicalizing behavior). + assert_eq!(map.len(), 1, "curdir-prefixed real path must be accepted"); + assert_eq!(map.values().next().map(|v| v.as_slice()), Some(&b"ok"[..])); + } + #[test] fn test_read_archive_skips_non_regular_entries() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/patch/rollback.rs b/crates/socket-patch-core/src/patch/rollback.rs index 1c583ac..12b5c31 100644 --- a/crates/socket-patch-core/src/patch/rollback.rs +++ b/crates/socket-patch-core/src/patch/rollback.rs @@ -69,6 +69,21 @@ pub async fn verify_file_rollback( blobs_path: &Path, ) -> VerifyRollbackResult { let normalized = normalize_file_path(file_name); + // SECURITY: never resolve a key that escapes the package directory. + // A poisoned `.socket/manifest.json` key like `../../home/u/.bashrc` + // or `/etc/cron.d/x` must not be hashed, restored, or (for new files) + // deleted. Mirror the apply path's guard — returning a blocking status + // aborts the whole package rollback before the delete loop runs. + if !crate::patch::apply::is_safe_relative_subpath(normalized) { + return VerifyRollbackResult { + file: file_name.to_string(), + status: VerifyRollbackStatus::NotFound, + message: Some("Unsafe patch path (escapes package directory)".to_string()), + current_hash: None, + expected_hash: None, + target_hash: None, + }; + } let filepath = pkg_path.join(normalized); let is_new_file = file_info.before_hash.is_empty(); @@ -302,6 +317,20 @@ pub async fn rollback_package_patch( // New files (empty beforeHash): delete instead of restoring. if file_info.before_hash.is_empty() { let normalized = normalize_file_path(file_name); + // SECURITY: this delete path constructs the target itself and + // does NOT go through `apply_file_patch`, so it must enforce the + // same path-escape guard. Without it a poisoned manifest entry + // (empty beforeHash + a `../../`/absolute key) would unlink an + // arbitrary file outside the package directory. Verify already + // blocks such keys, but defense-in-depth: never trust an + // unvalidated key at the syscall. + if !crate::patch::apply::is_safe_relative_subpath(normalized) { + result.error = Some(format!( + "Unsafe patch path (escapes package directory): {}", + file_name + )); + return result; + } let filepath = pkg_path.join(normalized); // Unlinking a directory entry requires write permission on the // *parent directory*, not the file. Go's module cache marks @@ -969,6 +998,78 @@ mod tests { ); } + /// SECURITY (verify path-escape guard): a manifest key that escapes + /// the package directory must be refused at verification — never + /// hashed or stat'd through `pkg_path.join`. Returns a blocking + /// status (not Ready/AlreadyOriginal) so the package rollback aborts. + /// Regression: verify joined the raw key with no safety check, the + /// same hole the apply path closes with `is_safe_relative_subpath`. + #[tokio::test] + async fn test_verify_file_rollback_rejects_path_escape() { + let pkg_dir = tempfile::tempdir().unwrap(); + let blobs_dir = tempfile::tempdir().unwrap(); + + let file_info = PatchFileInfo { + before_hash: "aaa".to_string(), + after_hash: "bbb".to_string(), + }; + + for escape in ["package/../../escape.js", "../escape.js", "/etc/passwd"] { + let result = + verify_file_rollback(pkg_dir.path(), escape, &file_info, blobs_dir.path()).await; + assert_ne!(result.status, VerifyRollbackStatus::Ready, "key: {escape}"); + assert_ne!( + result.status, + VerifyRollbackStatus::AlreadyOriginal, + "key: {escape}" + ); + assert!(result.message.unwrap().contains("Unsafe patch path")); + } + } + + /// SECURITY (new-file delete path-escape): the new-file deletion + /// branch builds the path itself and calls `remove_file` directly, + /// bypassing `apply_file_patch`'s guard. A poisoned manifest with an + /// empty `beforeHash` and an escaping key must NOT unlink a file + /// outside the package dir. Regression: the bare `remove_file` would + /// delete an arbitrary host file. + #[tokio::test] + async fn test_rollback_package_patch_new_file_path_escape_blocked() { + let root = tempfile::tempdir().unwrap(); + let pkg_dir = root.path().join("pkg"); + let blobs_dir = root.path().join("blobs"); + tokio::fs::create_dir_all(&pkg_dir).await.unwrap(); + tokio::fs::create_dir_all(&blobs_dir).await.unwrap(); + + // A sentinel file OUTSIDE the package directory that must survive. + let sentinel_content = b"do not delete me\n"; + let sentinel = root.path().join("sentinel.txt"); + tokio::fs::write(&sentinel, sentinel_content).await.unwrap(); + + let mut files = HashMap::new(); + files.insert( + // Empty beforeHash => "new file", delete branch. afterHash matches + // the sentinel so a missing guard would let the delete through. + "package/../sentinel.txt".to_string(), + PatchFileInfo { + before_hash: String::new(), + after_hash: compute_git_sha256_from_bytes(sentinel_content), + }, + ); + + let result = + rollback_package_patch("pkg:npm/test@1.0.0", &pkg_dir, &files, &blobs_dir, false).await; + + assert!(!result.success, "escaping delete must be refused"); + assert!(result.files_rolled_back.is_empty()); + // The out-of-tree sentinel must be untouched. + assert_eq!( + tokio::fs::read(&sentinel).await.unwrap(), + sentinel_content, + "rollback must not delete a file outside the package directory" + ); + } + /// New-file rollback (empty `beforeHash`): the file the patch added /// is deleted when its content still matches `afterHash`. #[tokio::test] diff --git a/crates/socket-patch-core/src/patch/sidecars/cargo.rs b/crates/socket-patch-core/src/patch/sidecars/cargo.rs index 9ae4857..378325d 100644 --- a/crates/socket-patch-core/src/patch/sidecars/cargo.rs +++ b/crates/socket-patch-core/src/patch/sidecars/cargo.rs @@ -32,7 +32,7 @@ use serde_json::{Map, Value}; use sha2::{Digest, Sha256}; use crate::hash::git_sha256::compute_git_sha256_from_bytes; -use crate::patch::apply::{apply_file_patch, normalize_file_path}; +use crate::patch::apply::{apply_file_patch, is_safe_relative_subpath, normalize_file_path}; use super::{SidecarError, SidecarFile, SidecarFileAction, SidecarPayload}; @@ -153,6 +153,28 @@ async fn update_entries( ) -> Result<(), SidecarError> { for file_name in patched { let normalized = normalize_file_path(file_name).to_string(); + + // SECURITY (fail closed): `normalized` is joined to `pkg_path` and + // both read (to hash) and used as a `.cargo-checksum.json` key. An + // escaping key (`../../etc/passwd`, an absolute path) would make us + // hash an arbitrary out-of-tree file and embed its digest under a + // bogus key in the committed checksum — an info leak that also + // corrupts the checksum so cargo can no longer verify the crate. + // The apply *write* path (`apply_file_patch`) already refuses these, + // but `fixup` is `pub(crate)` and reached directly via `dispatch_fixup` + // and tests, so the *read* path must guard itself too. Mirror apply's + // `InvalidData` refusal rather than silently skipping — an escaping + // key never names a legitimate patch target. + if !is_safe_relative_subpath(&normalized) { + return Err(SidecarError::Io { + path: file_name.clone(), + source: std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("Unsafe patch path (escapes package directory): {file_name}"), + ), + }); + } + let on_disk = pkg_path.join(&normalized); let hash = sha256_file(&on_disk) .await @@ -461,6 +483,76 @@ mod tests { ); } + /// Security regression (path escape via `..`): a poisoned patch + /// entry whose key walks out of the package dir must be refused — + /// NOT hashed and embedded under an escaping key in the committed + /// checksum. Before the guard, `sha256_file` read the out-of-tree + /// target and `update_entries` inserted `../secret.txt` into the + /// `files` map (info leak + checksum corruption). + #[tokio::test] + async fn refuses_dotdot_escape_path() { + let d = tempfile::tempdir().unwrap(); + let pkg = d.path().join("pkg"); + tokio::fs::create_dir_all(&pkg).await.unwrap(); + + // A secret living OUTSIDE the package dir, reachable only via `..`. + let secret = d.path().join("secret.txt"); + tokio::fs::write(&secret, b"top secret bytes").await.unwrap(); + + let starting = serde_json::json!({ + "files": { "Cargo.toml": "ff".repeat(32) }, + "package": "x", + }); + let checksum = pkg.join(CHECKSUM_FILE); + let original = serde_json::to_string_pretty(&starting).unwrap(); + tokio::fs::write(&checksum, &original).await.unwrap(); + + let err = fixup(&pkg, &["../secret.txt".to_string()]) + .await + .unwrap_err(); + match err { + SidecarError::Io { path, source } => { + assert!(path.contains("secret.txt"), "error must name the bad key"); + assert_eq!(source.kind(), std::io::ErrorKind::InvalidData); + } + other => panic!("expected InvalidData Io error, got {other:?}"), + } + + // The checksum file must be untouched — no escaping key, no leaked + // hash of the secret. + let after = tokio::fs::read_to_string(&checksum).await.unwrap(); + assert_eq!(after, original, "checksum must not be rewritten on refusal"); + assert!( + !after.contains(&expected_sha256(b"top secret bytes")), + "the out-of-tree secret's hash must never be embedded" + ); + } + + /// Security regression (absolute-path escape): `Path::join` discards + /// the base when the key is absolute, so an absolute key would hash + /// an arbitrary system file. Must be refused exactly like `..`. + #[tokio::test] + async fn refuses_absolute_escape_path() { + let d = tempfile::tempdir().unwrap(); + let pkg = d.path(); + let starting = serde_json::json!({ + "files": { "Cargo.toml": "ff".repeat(32) }, + "package": "x", + }); + tokio::fs::write( + pkg.join(CHECKSUM_FILE), + serde_json::to_string_pretty(&starting).unwrap(), + ) + .await + .unwrap(); + + let err = fixup(pkg, &["/etc/hosts".to_string()]).await.unwrap_err(); + assert!(matches!( + err, + SidecarError::Io { source, .. } if source.kind() == std::io::ErrorKind::InvalidData + )); + } + /// Atomicity hygiene: the stage+rename commit must leave no /// `.socket-stage-*` litter in the package directory. #[tokio::test] diff --git a/crates/socket-patch-core/src/patch/sidecars/nuget.rs b/crates/socket-patch-core/src/patch/sidecars/nuget.rs index d0d1b76..eeea1f0 100644 --- a/crates/socket-patch-core/src/patch/sidecars/nuget.rs +++ b/crates/socket-patch-core/src/patch/sidecars/nuget.rs @@ -104,16 +104,35 @@ pub(crate) async fn fixup(pkg_path: &Path) -> Result, Sid /// junk left over from corrupt installs) without an implicit-else /// arm that coverage can never reach on filesystems that reject /// non-UTF-8 bytes at creation time (APFS). +/// +/// The name match alone is not sufficient: a *directory* (or socket, +/// FIFO, …) whose name happens to end in `.nupkg.sha512` is not a +/// content-signing marker, and treating it as one emits a spurious +/// "package may be flagged as tampered" advisory that misleads +/// operators. We therefore require the entry to resolve to a regular +/// file. The check follows symlinks (`fs::metadata`, not the +/// non-following `DirEntry::file_type`) so a marker that ships as a +/// symlink to a real `.sha512` still counts — fail-closed against the +/// directory false-positive, not fail-open against a symlinked marker +/// (the symlink-drop trap the npm/cargo crawlers were bitten by). async fn has_signed_marker(pkg_path: &Path) -> bool { let mut entries = match tokio::fs::read_dir(pkg_path).await { Ok(rd) => rd, Err(_) => return false, }; while let Ok(Some(entry)) = entries.next_entry().await { - if entry + if !entry .file_name() .as_encoded_bytes() .ends_with(b".nupkg.sha512") + { + continue; + } + // Name matches — confirm it's a regular file before believing it. + if tokio::fs::metadata(entry.path()) + .await + .map(|m| m.is_file()) + .unwrap_or(false) { return true; } @@ -219,6 +238,103 @@ mod tests { ); } + /// Regression (directory false-positive): a *directory* whose name + /// ends in `.nupkg.sha512` is NOT a content-signing marker. Before + /// the `is_file` guard, `has_signed_marker` matched on name alone + /// and emitted a spurious "package may be flagged as tampered" + /// advisory for it — misleading an operator into thinking an + /// unsigned package was signed. There's no metadata here either, so + /// the correct outcome is a clean `None`. + #[tokio::test] + async fn directory_named_like_marker_is_not_a_signature() { + let d = tempfile::tempdir().unwrap(); + // A directory — not a file — bearing the marker suffix. + tokio::fs::create_dir(d.path().join("weird.nupkg.sha512")) + .await + .unwrap(); + + let out = fixup(d.path()).await.unwrap(); + assert!( + out.is_none(), + "a directory named *.nupkg.sha512 must not be treated as a signing marker" + ); + } + + /// A directory matching the marker name must not even flip the + /// advisory when there IS metadata to delete: the file entry is + /// present, but the advisory stays absent. + #[tokio::test] + async fn marker_dir_with_metadata_deletes_without_advisory() { + let d = tempfile::tempdir().unwrap(); + tokio::fs::write(d.path().join(METADATA_FILE), b"{}") + .await + .unwrap(); + tokio::fs::create_dir(d.path().join("pkg.1.0.0.nupkg.sha512")) + .await + .unwrap(); + + let payload = fixup(d.path()).await.unwrap().expect("metadata existed"); + assert_eq!(payload.files.len(), 1); + assert_eq!(payload.files[0].action, SidecarFileAction::Deleted); + assert!( + payload.advisory.is_none(), + "a directory marker must not raise the signed-package advisory" + ); + } + + /// A marker shipped as a *symlink to a real `.sha512` file* must + /// still count — the `is_file` guard follows symlinks, so it does + /// not fail open the way the non-following `DirEntry::file_type` + /// would have (the symlink-drop trap the crawlers were bitten by). + #[cfg(unix)] + #[tokio::test] + async fn symlinked_marker_still_counts_as_signed() { + let d = tempfile::tempdir().unwrap(); + // The real sha512 lives elsewhere; the package dir only has a + // symlink to it. + let real = d.path().join("real.sha512"); + tokio::fs::write(&real, b"hash").await.unwrap(); + tokio::fs::symlink(&real, d.path().join("pkg.1.0.0.nupkg.sha512")) + .await + .unwrap(); + + let payload = fixup(d.path()) + .await + .unwrap() + .expect("symlinked signature marker must surface an advisory"); + assert!(payload.files.is_empty()); + let adv = payload.advisory.expect("expected advisory"); + assert_eq!(adv.code, SidecarAdvisoryCode::NugetSignedPackageTampered); + } + + /// Deleting `.nupkg.metadata` must leave the `.nupkg.sha512` + /// signature sibling on disk — we only neutralize the recomputable + /// metadata hash, never the archive-level signature (which we + /// cannot honestly fix and only advise on). Pins that the unlink + /// targets exactly the metadata file and nothing else. + #[tokio::test] + async fn delete_does_not_remove_signature_sibling() { + let d = tempfile::tempdir().unwrap(); + tokio::fs::write(d.path().join(METADATA_FILE), b"{}") + .await + .unwrap(); + let sig = d.path().join("pkg.1.0.0.nupkg.sha512"); + tokio::fs::write(&sig, b"hash").await.unwrap(); + + fixup(d.path()).await.unwrap(); + + assert!( + tokio::fs::metadata(d.path().join(METADATA_FILE)) + .await + .is_err(), + "metadata must be gone" + ); + assert!( + tokio::fs::metadata(&sig).await.is_ok(), + "the .nupkg.sha512 signature sibling must be left untouched" + ); + } + /// Signed package WITH metadata: the typed payload now carries /// BOTH the file entry and the advisory — the lossy collapse /// from the old design is fixed. diff --git a/crates/socket-patch-core/src/pth_hook/detect.rs b/crates/socket-patch-core/src/pth_hook/detect.rs index 9756f2f..ba8d40b 100644 --- a/crates/socket-patch-core/src/pth_hook/detect.rs +++ b/crates/socket-patch-core/src/pth_hook/detect.rs @@ -90,12 +90,19 @@ pub async fn detect_python_pm(cwd: &Path) -> PythonPackageManager { fn has_table(content: &str, prefix: &str) -> bool { content.lines().any(|line| { let l = line.trim(); - if let Some(rest) = l.strip_prefix('[') { - let header = rest.trim_start_matches('[').trim_end_matches(']'); - header == prefix || header.starts_with(&format!("{prefix}.")) - } else { - false - } + let Some(rest) = l.strip_prefix('[') else { + return false; + }; + // Tolerate array-of-tables (`[[..]]`) by dropping a second opening + // bracket, then take everything up to the closing `]` so a trailing + // inline comment (`[tool.uv] # note`) or interior padding + // (`[ tool.uv ]`) — both valid TOML — doesn't defeat the match. + let rest = rest.trim_start_matches('['); + let Some(end) = rest.find(']') else { + return false; + }; + let header = rest[..end].trim(); + header == prefix || header.starts_with(&format!("{prefix}.")) }) } @@ -103,10 +110,19 @@ fn has_table(content: &str, prefix: &str) -> bool { /// form. Space- and case-insensitive so `socket-patch [hook]` / `Socket-Patch` /// are recognised. pub fn deps_contain_hook(text: &str) -> bool { - let normalized: String = text.to_lowercase().chars().filter(|c| !c.is_whitespace()).collect(); - HOOK_MARKERS - .iter() - .any(|m| normalized.contains(&m.to_lowercase())) + // Normalize per line: drop intra-line whitespace so `socket-patch [hook]` + // matches, but keep line boundaries intact. Stripping newlines too would + // glue adjacent specs together (this is called on whole-file content by + // `setup`'s state probe), turning a trailing `socket-patch` plus a following + // `[hook]` into a phantom marker — a false positive. + text.lines().any(|line| { + let normalized: String = line + .to_lowercase() + .chars() + .filter(|c| !c.is_whitespace()) + .collect(); + HOOK_MARKERS.iter().any(|m| normalized.contains(*m)) + }) } /// True if a single PEP 508 dependency spec is the hook dependency. @@ -135,6 +151,31 @@ mod tests { assert!(!deps_contain_hook("")); } + #[test] + fn test_deps_contain_hook_no_cross_line_glue() { + // `deps_contain_hook` is run on whole-file content by the setup state + // probe. Two unrelated specs on adjacent lines must NOT be glued into + // a phantom `socket-patch[hook]` marker. + let requirements = "socket-patch\n[hook]\nrequests\n"; + assert!(!deps_contain_hook(requirements)); + + // A wrapped TOML dependency array around a plain socket-patch dep also + // must not synthesize the marker across line breaks. + let pyproject = "dependencies = [\n \"socket-patch\",\n]\nextras = [\"hook\"]\n"; + assert!(!deps_contain_hook(pyproject)); + } + + #[test] + fn test_deps_contain_hook_real_marker_in_multiline() { + // The genuine hook spec on its own line within whole-file content is + // still detected (intra-line spaces tolerated). + let requirements = "requests==2.31.0\nsocket-patch [hook]\nflask\n"; + assert!(deps_contain_hook(requirements)); + let pyproject = + "dependencies = [\n \"requests\",\n \"socket-patch[hook]>=3.3.0\",\n]\n"; + assert!(deps_contain_hook(pyproject)); + } + #[test] fn test_has_table() { let toml = "[tool.poetry]\nname='x'\n[tool.poetry.dependencies]\n"; @@ -145,6 +186,21 @@ mod tests { assert!(!has_table("name = \"tool.poetry helper\"\n", "tool.poetry")); } + #[test] + fn test_has_table_trailing_comment_and_padding() { + // A trailing inline comment after the header is valid TOML and must + // not defeat detection (previously `trim_end_matches(']')` left the + // comment glued to the header). + assert!(has_table("[tool.uv] # the uv table\n", "tool.uv")); + assert!(has_table("[tool.uv.sources] # comment\n", "tool.uv")); + // Interior padding inside the brackets is also valid TOML. + assert!(has_table("[ tool.pdm ]\n", "tool.pdm")); + // Array-of-tables form, with a comment, still resolves the namespace. + assert!(has_table("[[tool.poetry.source]] # extra\n", "tool.poetry")); + // A sibling prefix must still not match (no spurious widening). + assert!(!has_table("[tool.uvicorn] # web\n", "tool.uv")); + } + #[tokio::test] async fn test_detect_uv_by_lock() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/pth_hook/edit.rs b/crates/socket-patch-core/src/pth_hook/edit.rs index 11c89cc..ab2236a 100644 --- a/crates/socket-patch-core/src/pth_hook/edit.rs +++ b/crates/socket-patch-core/src/pth_hook/edit.rs @@ -18,6 +18,58 @@ use toml_edit::{Array, DocumentMut, InlineTable, Item, Table, Value}; use super::detect::{deps_contain_hook, spec_is_hook, HOOK_DEP}; +/// Atomically write `content` to `path`. +/// +/// A bare `fs::write` truncates the target before writing, so a crash, power +/// loss, or interrupted process mid-write would leave the user's hand-authored +/// `pyproject.toml` / `requirements.txt` (with its comments, formatting, and +/// other dependencies) truncated or empty — destroying the file we only meant +/// to add one dependency line to. Instead we write to a sibling stage file, +/// fsync it, then rename over the target (rename is atomic on the same +/// filesystem) so a reader ever sees either the old bytes or the complete new +/// bytes. Mirrors the hardened writer in `package_json::update`. +async fn atomic_write(path: &Path, content: &str) -> std::io::Result<()> { + let parent = path.parent().unwrap_or_else(|| Path::new(".")); + let stem = path + .file_name() + .map(|n| n.to_string_lossy().into_owned()) + .unwrap_or_else(|| "manifest".to_string()); + let stage = parent.join(format!(".socket-stage-{}-{}", stem, uuid::Uuid::new_v4())); + + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&stage) + .await?; + + use tokio::io::AsyncWriteExt; + if let Err(e) = file.write_all(content.as_bytes()).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + if let Err(e) = file.sync_all().await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + drop(file); + + if let Err(e) = tokio::fs::rename(&stage, path).await { + let _ = tokio::fs::remove_file(&stage).await; + return Err(e); + } + + // The rename only updated the parent directory entry; fsync the directory + // so the rename itself survives a crash. Best-effort, Unix only. + #[cfg(unix)] + { + if let Ok(dir) = tokio::fs::File::open(parent).await { + let _ = dir.sync_all().await; + } + } + + Ok(()) +} + /// Which manifest format a path is. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ManifestKind { @@ -84,7 +136,7 @@ pub async fn add_hook_dependency(path: &Path, kind: ManifestKind, dry_run: bool) Ok(None) => PthEditResult::ok(path, kind, PthStatus::AlreadyConfigured), Ok(Some(new_content)) => { if !dry_run { - if let Err(e) = fs::write(path, &new_content).await { + if let Err(e) = atomic_write(path, &new_content).await { return PthEditResult::err(path, kind, e.to_string()); } } @@ -119,7 +171,7 @@ pub async fn remove_hook_dependency( Ok(None) => PthEditResult::ok(path, kind, PthStatus::AlreadyConfigured), Ok(Some(new_content)) => { if !dry_run { - if let Err(e) = fs::write(path, &new_content).await { + if let Err(e) = atomic_write(path, &new_content).await { return PthEditResult::err(path, kind, e.to_string()); } } @@ -372,6 +424,64 @@ fn item_has_hook_extra(item: &Item) -> bool { .unwrap_or(false) } +/// True if a parsed `pyproject.toml` already declares the hook dependency in any +/// form `setup` could have written: a PEP 621 `[project].dependencies` entry, a +/// classic-Poetry `socket-patch` dep carrying the `hook` extra, or a legacy bare +/// `socket-patch-hook` key. +/// +/// This is the structural counterpart to the textual +/// [`super::detect::deps_contain_hook`]. It exists because `poetry_add` writes +/// the hook as `socket-patch = { version = "*", extras = ["hook"] }`, which has +/// no literal `socket-patch[hook]` substring — so the textual probe reports a +/// freshly-and-correctly-configured classic-Poetry project as *unconfigured*. +/// The `setup --check` / state probes must use this for `pyproject.toml` so a +/// round-trip (setup → check) is consistent. Falls back to the textual check on +/// unparseable TOML (best effort rather than a hard failure). +pub fn pyproject_contains_hook(content: &str) -> bool { + let doc = match content.parse::() { + Ok(d) => d, + Err(_) => return deps_contain_hook(content), + }; + + // PEP 621 `[project].dependencies` (the textual `socket-patch[hook]` spec, + // or the bare `socket-patch-hook` wheel). + let in_pep621 = doc + .get("project") + .and_then(Item::as_table) + .and_then(|p| p.get("dependencies")) + .and_then(Item::as_array) + .map(|deps| { + deps.iter() + .any(|v| v.as_str().map(spec_is_hook).unwrap_or(false)) + }) + .unwrap_or(false); + if in_pep621 { + return true; + } + + // Classic Poetry `[tool.poetry.dependencies]`: a bare `socket-patch-hook` + // key, or a `socket-patch` dep carrying the `hook` extra. + if let Some(deps) = doc + .get("tool") + .and_then(Item::as_table) + .and_then(|t| t.get("poetry")) + .and_then(Item::as_table) + .and_then(|p| p.get("dependencies")) + .and_then(Item::as_table) + { + if deps.contains_key("socket-patch-hook") { + return true; + } + if let Some(item) = deps.get("socket-patch") { + if item_has_hook_extra(item) { + return true; + } + } + } + + false +} + #[cfg(test)] mod tests { use super::*; @@ -589,4 +699,156 @@ mod tests { assert_eq!(res.status, PthStatus::Updated); assert!(!req.exists(), "dry-run must not create the file"); } + + // ── atomic-write contract (no truncation / no stage litter) ────── + // + // The edit must go through stage+fsync+rename, never a bare truncating + // write, so a crash can't leave the user's hand-authored manifest empty. + // A leaked `.socket-stage-*` sibling would mean the rename didn't complete. + + async fn count_stage_litter(dir: &Path) -> usize { + let mut rd = tokio::fs::read_dir(dir).await.unwrap(); + let mut n = 0; + while let Some(entry) = rd.next_entry().await.unwrap() { + if entry + .file_name() + .to_string_lossy() + .starts_with(".socket-stage-") + { + n += 1; + } + } + n + } + + #[tokio::test] + async fn test_add_pyproject_atomic_no_litter_and_intact() { + let dir = tempfile::tempdir().unwrap(); + let py = dir.path().join("pyproject.toml"); + let original = "[build-system]\nrequires = [\"setuptools\"]\n\n[project]\nname = \"x\"\ndependencies = [\"requests\"]\n"; + tokio::fs::write(&py, original).await.unwrap(); + + let res = add_hook_dependency(&py, ManifestKind::Pyproject, false).await; + assert_eq!(res.status, PthStatus::Updated); + + // No half-written stage file left behind. + assert_eq!(count_stage_litter(dir.path()).await, 0); + // The file is fully written, valid TOML, and preserved prior content. + let body = tokio::fs::read_to_string(&py).await.unwrap(); + let doc = body.parse::().unwrap(); + assert!(body.contains("[build-system]")); + let deps = doc["project"]["dependencies"].as_array().unwrap(); + assert!(deps.iter().any(|v| v.as_str() == Some("requests"))); + assert!(deps.iter().any(|v| v.as_str() == Some("socket-patch[hook]"))); + } + + #[tokio::test] + async fn test_remove_requirements_atomic_no_litter() { + let dir = tempfile::tempdir().unwrap(); + let req = dir.path().join("requirements.txt"); + tokio::fs::write(&req, "requests\nsocket-patch[hook]\n") + .await + .unwrap(); + + let res = remove_hook_dependency(&req, ManifestKind::Requirements, false).await; + assert_eq!(res.status, PthStatus::Updated); + assert_eq!(count_stage_litter(dir.path()).await, 0); + assert_eq!( + tokio::fs::read_to_string(&req).await.unwrap(), + "requests\n" + ); + } + + // ── structural hook detection (pyproject_contains_hook) ────────── + // + // The `setup --check` probe must agree with what `setup` wrote. The classic + // Poetry form has no `socket-patch[hook]` substring, so the textual probe + // alone mis-reports a configured project as needing configuration. + + #[test] + fn test_pyproject_contains_hook_poetry_form_roundtrips() { + // Regression: poetry_add writes the structural `extras = ["hook"]` form; + // the textual probe can't see it, but the structural one must. + let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\npython = \"^3.9\"\n"; + let out = pyproject_add(toml).unwrap().unwrap(); + assert!( + pyproject_contains_hook(&out), + "structural probe must see the poetry extras form:\n{out}" + ); + // This is precisely why the structural probe is needed: the textual one + // (used for requirements.txt) cannot detect the poetry form. + assert!( + !deps_contain_hook(&out), + "textual probe is (by design) blind to the poetry form; if this \ + ever becomes true the structural probe may be redundant:\n{out}" + ); + } + + #[test] + fn test_pyproject_contains_hook_pep621_and_wheel() { + // PEP 621 array, extra spelling. + assert!(pyproject_contains_hook( + "[project]\ndependencies = [\"requests\", \"socket-patch[hook]>=3.3.0\"]\n" + )); + // PEP 621 array, bare wheel spelling. + assert!(pyproject_contains_hook( + "[project]\ndependencies = [\"socket-patch-hook\"]\n" + )); + // Poetry bare-wheel key. + assert!(pyproject_contains_hook( + "[tool.poetry.dependencies]\nsocket-patch-hook = \"*\"\n" + )); + } + + #[test] + fn test_pyproject_contains_hook_negative() { + // A plain socket-patch dep (CLI only, no hook) is NOT the hook — in + // either surface. + assert!(!pyproject_contains_hook( + "[project]\ndependencies = [\"socket-patch>=3.3.0\"]\n" + )); + assert!(!pyproject_contains_hook( + "[tool.poetry.dependencies]\nsocket-patch = \"^3.3.0\"\n" + )); + // A socket-patch dep carrying some *other* extra is not the hook. + assert!(!pyproject_contains_hook( + "[tool.poetry.dependencies]\nsocket-patch = {version = \"*\", extras = [\"cli\"]}\n" + )); + // Empty / unrelated. + assert!(!pyproject_contains_hook("[project]\nname = \"x\"\n")); + } + + #[test] + fn test_pyproject_contains_hook_malformed_falls_back_to_textual() { + // Unparseable TOML: fall back to the textual probe rather than hard-fail. + assert!(pyproject_contains_hook("this = = not toml [[[ socket-patch[hook]")); + assert!(!pyproject_contains_hook("this = = not toml [[[ requests")); + } + + #[test] + fn test_pyproject_contains_hook_after_remove_is_false() { + // Round-trip: add then remove → structural probe reports not-configured. + let toml = "[tool.poetry]\nname = \"x\"\n\n[tool.poetry.dependencies]\nsocket-patch = \"^3.3.0\"\n"; + let added = pyproject_add(toml).unwrap().unwrap(); + assert!(pyproject_contains_hook(&added)); + let removed = pyproject_remove(&added).unwrap().unwrap(); + assert!( + !pyproject_contains_hook(&removed), + "after remove the hook must be gone:\n{removed}" + ); + } + + #[tokio::test] + async fn test_dry_run_does_no_io_for_pyproject() { + let dir = tempfile::tempdir().unwrap(); + let py = dir.path().join("pyproject.toml"); + let original = "[project]\nname = \"x\"\ndependencies = [\"requests\"]\n"; + tokio::fs::write(&py, original).await.unwrap(); + + let res = add_hook_dependency(&py, ManifestKind::Pyproject, true).await; + assert_eq!(res.status, PthStatus::Updated); + // Dry-run must neither stage nor mutate the original. + assert_eq!(count_stage_litter(dir.path()).await, 0); + assert_eq!(tokio::fs::read_to_string(&py).await.unwrap(), original); + } } diff --git a/crates/socket-patch-core/src/pth_hook/mod.rs b/crates/socket-patch-core/src/pth_hook/mod.rs index 1fbf627..84802b4 100644 --- a/crates/socket-patch-core/src/pth_hook/mod.rs +++ b/crates/socket-patch-core/src/pth_hook/mod.rs @@ -21,5 +21,6 @@ pub mod edit; pub use detect::{deps_contain_hook, detect_python_pm, PythonPackageManager, HOOK_DEP}; pub use edit::{ - add_hook_dependency, remove_hook_dependency, ManifestKind, PthEditResult, PthStatus, + add_hook_dependency, pyproject_contains_hook, remove_hook_dependency, ManifestKind, + PthEditResult, PthStatus, }; diff --git a/crates/socket-patch-core/src/utils/cleanup_blobs.rs b/crates/socket-patch-core/src/utils/cleanup_blobs.rs index 93883d3..309a858 100644 --- a/crates/socket-patch-core/src/utils/cleanup_blobs.rs +++ b/crates/socket-patch-core/src/utils/cleanup_blobs.rs @@ -103,10 +103,16 @@ pub async fn cleanup_unused_archives( ) -> Result { let used_uuids: HashSet = manifest.patches.values().map(|r| r.uuid.clone()).collect(); cleanup_dir(archives_dir, dry_run, |name| { - // Strip the .tar.gz suffix to recover the UUID; if it doesn't - // end in .tar.gz, treat the entry as orphaned (not "used"). - let uuid_part = name.strip_suffix(".tar.gz").unwrap_or(name); - used_uuids.contains(uuid_part) + // Strip the .tar.gz suffix to recover the UUID. A file that does + // not end in .tar.gz is never a valid archive, so it is always an + // orphan -- even if its bare name happens to equal a manifest UUID + // (e.g. a stray `` file with no extension). Returning false + // here keeps that contract: only well-formed `.tar.gz` files + // whose UUID is referenced are kept. + match name.strip_suffix(".tar.gz") { + Some(uuid_part) => used_uuids.contains(uuid_part), + None => false, + } }) .await } @@ -504,6 +510,64 @@ mod tests { assert!(result.removed_blobs.contains(&"stray.txt".to_string())); } + #[tokio::test] + async fn test_cleanup_archives_removes_bare_uuid_without_extension() { + // Regression: a stray file whose *bare* name equals a referenced + // manifest UUID but lacks the `.tar.gz` extension is NOT a valid + // archive and must be removed as an orphan. The previous + // `strip_suffix(..).unwrap_or(name)` form fell back to matching the + // whole filename against the UUID set and incorrectly KEPT it. + let dir = tempfile::tempdir().unwrap(); + let archives = dir.path().join("packages"); + tokio::fs::create_dir_all(&archives).await.unwrap(); + + let manifest = create_test_manifest(); + // Bare UUID, no extension -- must be treated as an orphan. + tokio::fs::write(archives.join(TEST_UUID), b"not an archive") + .await + .unwrap(); + // The legitimate archive for the same UUID must survive. + tokio::fs::write(archives.join(format!("{TEST_UUID}.tar.gz")), b"keep") + .await + .unwrap(); + + let result = cleanup_unused_archives(&manifest, &archives, false) + .await + .unwrap(); + + assert_eq!(result.blobs_removed, 1); + assert!(result.removed_blobs.contains(&TEST_UUID.to_string())); + assert!(tokio::fs::metadata(archives.join(TEST_UUID)).await.is_err()); + assert!( + tokio::fs::metadata(archives.join(format!("{TEST_UUID}.tar.gz"))) + .await + .is_ok() + ); + } + + #[tokio::test] + async fn test_cleanup_archives_removes_wrong_suffix_with_uuid_stem() { + // A file named `.tar.gz.bak` (or any non-`.tar.gz` suffix) does + // not end in `.tar.gz`, so it is an orphan regardless of its stem. + let dir = tempfile::tempdir().unwrap(); + let archives = dir.path().join("packages"); + tokio::fs::create_dir_all(&archives).await.unwrap(); + + let manifest = create_test_manifest(); + tokio::fs::write(archives.join(format!("{TEST_UUID}.tar.gz.bak")), b"junk") + .await + .unwrap(); + + let result = cleanup_unused_archives(&manifest, &archives, false) + .await + .unwrap(); + + assert_eq!(result.blobs_removed, 1); + assert!(result + .removed_blobs + .contains(&format!("{TEST_UUID}.tar.gz.bak"))); + } + #[tokio::test] async fn test_cleanup_archives_nonexistent_dir() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/utils/env_compat.rs b/crates/socket-patch-core/src/utils/env_compat.rs index 1026519..32cea68 100644 --- a/crates/socket-patch-core/src/utils/env_compat.rs +++ b/crates/socket-patch-core/src/utils/env_compat.rs @@ -89,7 +89,14 @@ pub const LEGACY_ENV_RENAMES: &[(&str, &str)] = &[ /// The warning fires unconditionally — even under `--silent` / `--json` /// — so the transition signal isn't swallowed in CI logs. pub fn promote_legacy_env_vars() { - for (new_name, legacy_name) in LEGACY_ENV_RENAMES { + promote_renames(LEGACY_ENV_RENAMES); +} + +/// Core of [`promote_legacy_env_vars`], parameterized over the rename table so +/// it can be exercised in tests with isolated env-var names (the real names are +/// read concurrently by other tests in this binary). +fn promote_renames(renames: &[(&'static str, &'static str)]) { + for &(new_name, legacy_name) in renames { let new_already_set = std::env::var(new_name) .ok() .filter(|v| !v.is_empty()) @@ -197,4 +204,59 @@ mod tests { std::env::remove_var(NEW); std::env::remove_var(LEGACY); } + + /// `promote_renames` copies a set legacy value over to the unset new name, + /// so downstream readers (clap `env =`, core code) only need the new name. + #[test] + fn promote_copies_legacy_to_new_when_new_unset() { + const NEW: &str = "SOCKET_TEST_PROMOTE_COPY_NEW"; + const LEGACY: &str = "SOCKET_TEST_PROMOTE_COPY_NEW_PATCH"; + std::env::remove_var(NEW); + std::env::set_var(LEGACY, "legacy-value"); + promote_renames(&[(NEW, LEGACY)]); + assert_eq!(std::env::var(NEW).ok().as_deref(), Some("legacy-value")); + std::env::remove_var(NEW); + std::env::remove_var(LEGACY); + } + + /// A non-empty new value must win: promote must not clobber it with the + /// legacy value. + #[test] + fn promote_does_not_clobber_existing_new() { + const NEW: &str = "SOCKET_TEST_PROMOTE_KEEP_NEW"; + const LEGACY: &str = "SOCKET_TEST_PROMOTE_KEEP_NEW_PATCH"; + std::env::set_var(NEW, "new-value"); + std::env::set_var(LEGACY, "legacy-value"); + promote_renames(&[(NEW, LEGACY)]); + assert_eq!(std::env::var(NEW).ok().as_deref(), Some("new-value")); + std::env::remove_var(NEW); + std::env::remove_var(LEGACY); + } + + /// An empty new value counts as unset, so the legacy value is promoted in + /// over it — mirroring `read_env_with_legacy`'s empty-is-unset rule. + #[test] + fn promote_treats_empty_new_as_unset() { + const NEW: &str = "SOCKET_TEST_PROMOTE_EMPTY_NEW"; + const LEGACY: &str = "SOCKET_TEST_PROMOTE_EMPTY_NEW_PATCH"; + std::env::set_var(NEW, ""); + std::env::set_var(LEGACY, "legacy-value"); + promote_renames(&[(NEW, LEGACY)]); + assert_eq!(std::env::var(NEW).ok().as_deref(), Some("legacy-value")); + std::env::remove_var(NEW); + std::env::remove_var(LEGACY); + } + + /// An empty legacy value is not promoted (empty == unset on the legacy + /// side too), leaving the new name untouched. + #[test] + fn promote_ignores_empty_legacy() { + const NEW: &str = "SOCKET_TEST_PROMOTE_EMPTY_LEGACY_NEW"; + const LEGACY: &str = "SOCKET_TEST_PROMOTE_EMPTY_LEGACY_NEW_PATCH"; + std::env::remove_var(NEW); + std::env::set_var(LEGACY, ""); + promote_renames(&[(NEW, LEGACY)]); + assert_eq!(std::env::var(NEW).ok(), None); + std::env::remove_var(LEGACY); + } } diff --git a/crates/socket-patch-core/src/utils/fs.rs b/crates/socket-patch-core/src/utils/fs.rs index 56432aa..08da9bf 100644 --- a/crates/socket-patch-core/src/utils/fs.rs +++ b/crates/socket-patch-core/src/utils/fs.rs @@ -190,6 +190,76 @@ mod tests { } } + /// Regression: `list_dir_entries` must hit the `read_dir` Err arm + /// when handed a path that is a regular file (not a directory) and + /// return an empty vec rather than panic. Crawlers routinely probe + /// candidate paths that may turn out to be files (e.g. a stray + /// `node_modules` that is actually a file), and rely on this + /// fail-soft behavior. + #[tokio::test] + async fn list_dir_entries_on_a_file_returns_empty() { + let tmp = tempfile::tempdir().unwrap(); + let file = tmp.path().join("not_a_dir"); + tokio::fs::write(&file, b"x").await.unwrap(); + let entries = list_dir_entries(&file).await; + assert!( + entries.is_empty(), + "read_dir on a regular file must yield no entries" + ); + } + + /// Regression: `entry_is_dir` must resolve a *chain* of symlinks, + /// not just a single hop. `link_a -> link_b -> real_dir` has to + /// report `true`; otherwise a crawler walking through indirection + /// (common in pnpm/virtualenv layouts) would silently skip the + /// package directory. + #[cfg(unix)] + #[tokio::test] + async fn entry_is_dir_follows_symlink_chain() { + let tmp = tempfile::tempdir().unwrap(); + let real_dir = tmp.path().join("real_dir"); + tokio::fs::create_dir(&real_dir).await.unwrap(); + let link_b = tmp.path().join("link_b"); + tokio::fs::symlink(&real_dir, &link_b).await.unwrap(); + // link_a points at link_b, which points at real_dir. + tokio::fs::symlink(&link_b, tmp.path().join("link_a")) + .await + .unwrap(); + + let link = list_dir_entries(tmp.path()) + .await + .into_iter() + .find(|e| e.file_name().to_string_lossy() == "link_a") + .expect("chained symlink entry present"); + assert!( + entry_is_dir(&link).await, + "a chain of symlinks ending at a directory must resolve to is_dir = true" + ); + } + + /// `entry_file_type` reports the plain kinds (dir / file) faithfully + /// when no symlink is involved — it only diverges from + /// `entry_is_dir` on links. + #[tokio::test] + async fn entry_file_type_reports_plain_dir_and_file() { + let tmp = tempfile::tempdir().unwrap(); + tokio::fs::create_dir(tmp.path().join("d")).await.unwrap(); + tokio::fs::write(tmp.path().join("f"), b"x").await.unwrap(); + for entry in list_dir_entries(tmp.path()).await { + let name = entry.file_name().to_string_lossy().to_string(); + let ft = entry_file_type(&entry).await.expect("file_type available"); + match name.as_str() { + "d" => { + assert!(ft.is_dir() && !ft.is_symlink(), "d is a plain dir"); + } + "f" => { + assert!(ft.is_file() && !ft.is_symlink(), "f is a plain file"); + } + other => panic!("unexpected entry: {other}"), + } + } + } + /// `entry_file_type` is the symlink-aware counterpart: it reports /// the link itself (`is_symlink`), never the resolved target. #[cfg(unix)] diff --git a/crates/socket-patch-core/src/utils/purl.rs b/crates/socket-patch-core/src/utils/purl.rs index d2fccca..6ea0e80 100644 --- a/crates/socket-patch-core/src/utils/purl.rs +++ b/crates/socket-patch-core/src/utils/purl.rs @@ -1,8 +1,18 @@ -/// Strip query string qualifiers from a PURL. +/// Strip the trailing `?qualifiers` and `#subpath` components from a PURL, +/// leaving the canonical `pkg:type/namespace/name@version` base. /// -/// e.g., `"pkg:pypi/requests@2.28.0?artifact_id=abc"` -> `"pkg:pypi/requests@2.28.0"` +/// The PURL grammar is `pkg:type/ns/name@version?qualifiers#subpath`, so a +/// subpath can appear *with or without* a preceding qualifier. Cutting only +/// at `?` would let a bare `#subpath` (no qualifier) leak into the base — +/// corrupting the version when the result is later split on `@`, and +/// breaking the grouping/matching keys callers build from it (two PURLs for +/// the same `name@version` differing only by subpath must collapse to one +/// base). So we cut at whichever of `?`/`#` comes first. +/// +/// e.g. `"pkg:pypi/requests@2.28.0?artifact_id=abc"` -> `"pkg:pypi/requests@2.28.0"` +/// and `"pkg:golang/github.com/foo/bar@v1.0.0#cmd/tool"` -> `"pkg:golang/github.com/foo/bar@v1.0.0"` pub fn strip_purl_qualifiers(purl: &str) -> &str { - match purl.find('?') { + match purl.find(['?', '#']) { Some(idx) => &purl[..idx], None => purl, } @@ -242,7 +252,11 @@ pub fn purl_matches_identifier(manifest_key: &str, identifier: &str) -> bool { if identifier.contains('?') { manifest_key == identifier } else { - strip_purl_qualifiers(manifest_key) == identifier + // Base identifier: compare bases. Strip both sides so a subpath + // (`#...`) carried by either the key or the identifier doesn't + // defeat the match — `strip_purl_qualifiers(identifier)` is a no-op + // for a plain base PURL, so existing behaviour is unchanged. + strip_purl_qualifiers(manifest_key) == strip_purl_qualifiers(identifier) } } @@ -673,4 +687,76 @@ mod tests { "pkg:gem/nokogiri@1.16.5" )); } + + // --- Regression: PURL subpath (`#...`) handling ------------------------- + // + // The PURL grammar is `pkg:type/ns/name@version?qualifiers#subpath`. A + // subpath can appear *without* a preceding qualifier, so stripping only + // at `?` lets it leak into the base — which then corrupts the version + // (split on `@`) and breaks every grouping/matching key built from it. + + #[test] + fn test_strip_subpath_without_qualifier() { + // No `?`, but a trailing `#subpath` must still be removed. + assert_eq!( + strip_purl_qualifiers("pkg:golang/github.com/foo/bar@v1.0.0#cmd/tool"), + "pkg:golang/github.com/foo/bar@v1.0.0" + ); + } + + #[test] + fn test_strip_qualifier_and_subpath_together() { + // Cutting at the first of `?`/`#` removes both components at once. + assert_eq!( + strip_purl_qualifiers("pkg:pypi/requests@2.28.0?artifact_id=abc#dist/info"), + "pkg:pypi/requests@2.28.0" + ); + } + + #[test] + fn test_parse_pypi_subpath_not_folded_into_version() { + // The `#dist` must not bleed into the parsed version. + assert_eq!( + parse_pypi_purl("pkg:pypi/requests@2.28.0#dist"), + Some(("requests", "2.28.0")) + ); + } + + #[cfg(feature = "golang")] + #[test] + fn test_parse_golang_subpath_stripped() { + // Go subpaths point at a sub-package of the same module; the parsed + // version must remain clean. + assert_eq!( + parse_golang_purl("pkg:golang/github.com/gin-gonic/gin@v1.9.1#middleware"), + Some(("github.com/gin-gonic/gin", "v1.9.1")) + ); + } + + #[test] + fn test_purl_matches_identifier_base_id_matches_subpath_bearing_key() { + // A manifest key carrying a subpath must still match its own base + // identifier — they describe the same package@version. + assert!(purl_matches_identifier( + "pkg:golang/github.com/foo/bar@v1.0.0#cmd/tool", + "pkg:golang/github.com/foo/bar@v1.0.0" + )); + // ...but a different version still must not match. + assert!(!purl_matches_identifier( + "pkg:golang/github.com/foo/bar@v2.0.0#cmd/tool", + "pkg:golang/github.com/foo/bar@v1.0.0" + )); + } + + // --- Regression: name must not absorb the version separator ------------- + + #[test] + fn test_parse_multiple_at_takes_last_as_version_separator() { + // `rfind('@')` (not `find`) ensures the *last* `@` splits the + // version, so a name/path that itself contained an `@` keeps it. + assert_eq!( + parse_pypi_purl("pkg:pypi/weird@name@1.0.0"), + Some(("weird@name", "1.0.0")) + ); + } } diff --git a/crates/socket-patch-core/src/utils/telemetry.rs b/crates/socket-patch-core/src/utils/telemetry.rs index c59bae0..b36a590 100644 --- a/crates/socket-patch-core/src/utils/telemetry.rs +++ b/crates/socket-patch-core/src/utils/telemetry.rs @@ -268,27 +268,47 @@ fn days_to_ymd(days: u64) -> (u64, u64, u64) { // Send event // --------------------------------------------------------------------------- -/// Send a telemetry event to the API. +/// Decide which endpoint a telemetry event goes to, and whether to attach +/// the bearer token. /// -/// This is fire-and-forget: errors are logged in debug mode but never -/// propagated. Uses `reqwest` with a 5-second timeout. -async fn send_telemetry_event( - event: &PatchTelemetryEvent, - api_token: Option<&str>, - org_slug: Option<&str>, -) { - let (url, use_auth) = match (api_token, org_slug) { +/// The authenticated `/v0/orgs//telemetry` endpoint is used only when +/// BOTH a non-empty token and a non-empty org slug are present. An empty +/// string is treated as absent: a `Some("")` slug would otherwise build a +/// malformed `/v0/orgs//telemetry` URL and a `Some("")` token an empty +/// `Bearer ` header. This mirrors the empty-slug guard in +/// `get_api_client_from_env`, keeping the contract robust even if a caller +/// hands us blank values directly. +fn resolve_telemetry_endpoint(api_token: Option<&str>, org_slug: Option<&str>) -> (String, bool) { + let token = api_token.filter(|t| !t.is_empty()); + let slug = org_slug.filter(|s| !s.is_empty()); + + match (token, slug) { (Some(_token), Some(slug)) => { let api_url = std::env::var("SOCKET_API_URL") - .unwrap_or_else(|_| DEFAULT_SOCKET_API_URL.to_string()); + .ok() + .filter(|u| !u.is_empty()) + .unwrap_or_else(|| DEFAULT_SOCKET_API_URL.to_string()); (format!("{api_url}/v0/orgs/{slug}/telemetry"), true) } _ => { let proxy_url = read_env_with_legacy("SOCKET_PROXY_URL", "SOCKET_PATCH_PROXY_URL") + .filter(|u| !u.is_empty()) .unwrap_or_else(|| DEFAULT_PATCH_API_PROXY_URL.to_string()); (format!("{proxy_url}/patch/telemetry"), false) } - }; + } +} + +/// Send a telemetry event to the API. +/// +/// This is fire-and-forget: errors are logged in debug mode but never +/// propagated. Uses `reqwest` with a 5-second timeout. +async fn send_telemetry_event( + event: &PatchTelemetryEvent, + api_token: Option<&str>, + org_slug: Option<&str>, +) { + let (url, use_auth) = resolve_telemetry_endpoint(api_token, org_slug); debug_log(&format!("Sending telemetry to {url}")); @@ -1061,4 +1081,149 @@ mod tests { let (y, m, d) = days_to_ymd(19723); assert_eq!((y, m, d), (2024, 1, 1)); } + + /// Independent brute-force civil-date counter used to cross-check + /// `days_to_ymd` (Howard Hinnant's algorithm) without sharing any of its + /// arithmetic — so a regression in either is caught. + fn brute_days_to_ymd(days: u64) -> (u64, u64, u64) { + fn is_leap(y: u64) -> bool { + (y % 4 == 0 && y % 100 != 0) || y % 400 == 0 + } + let mut rem = days; + let mut y = 1970u64; + loop { + let year_len = if is_leap(y) { 366 } else { 365 }; + if rem < year_len { + break; + } + rem -= year_len; + y += 1; + } + let months = [ + 31, + if is_leap(y) { 29 } else { 28 }, + 31, + 30, + 31, + 30, + 31, + 31, + 30, + 31, + 30, + 31, + ]; + let mut m = 0usize; + while rem >= months[m] { + rem -= months[m]; + m += 1; + } + (y, (m + 1) as u64, rem + 1) + } + + /// Spot-check the trickiest civil-date edges: leap day, the day after, + /// non-leap century boundaries (1900/2100/2200/2300 — divisible by 100 but + /// not 400, so NOT leap) and leap centuries (2000/2400), plus year/month + /// rollovers. Each is computed by hand to anchor the value. + #[test] + fn test_days_to_ymd_edge_dates() { + // 2000-02-29 (leap century) and the day after. + assert_eq!(brute_days_to_ymd(11016), (2000, 2, 29)); // anchor the oracle + assert_eq!(days_to_ymd(11016), (2000, 2, 29)); + assert_eq!(days_to_ymd(11017), (2000, 3, 1)); + + // 2100-02-28 must NOT be followed by Feb 29 — 2100 is not a leap year. + let feb28_2100 = brute_days_to_ymd(47540); + assert_eq!(feb28_2100, (2100, 2, 28)); + assert_eq!(days_to_ymd(47540), (2100, 2, 28)); + assert_eq!(days_to_ymd(47541), (2100, 3, 1)); + + // Year/month rollover: Dec 31 -> Jan 1. + assert_eq!(days_to_ymd(19722), (2023, 12, 31)); + assert_eq!(days_to_ymd(19723), (2024, 1, 1)); + } + + /// Exhaustive cross-check against the independent counter across ~1265 + /// years, covering every leap rule and century boundary through 3235. + #[test] + fn test_days_to_ymd_matches_brute_force() { + for days in 0..462_000u64 { + assert_eq!( + days_to_ymd(days), + brute_days_to_ymd(days), + "mismatch at day {days}" + ); + } + } + + /// The time-of-day split in `chrono_now_iso` must carve a within-day + /// second offset into the right h/m/s buckets. We reconstruct the exact + /// arithmetic for a known offset (23:59:59 on day 0 = epoch) by parsing + /// the rendered prefix, since the live timestamp can't be pinned. + #[test] + fn test_chrono_now_iso_components_well_formed() { + let ts = chrono_now_iso(); + // YYYY-MM-DDTHH:MM:SS.mmmZ — validate every field range, not just shape. + let (date, rest) = ts.split_once('T').expect("has T separator"); + let parts: Vec<&str> = date.split('-').collect(); + assert_eq!(parts.len(), 3); + let (year, month, day): (u64, u64, u64) = ( + parts[0].parse().unwrap(), + parts[1].parse().unwrap(), + parts[2].parse().unwrap(), + ); + assert!((2026..=2100).contains(&year), "year {year} out of range"); + assert!((1..=12).contains(&month), "month {month} out of range"); + assert!((1..=31).contains(&day), "day {day} out of range"); + + let time = rest.strip_suffix('Z').expect("ends with Z"); + let (hms, millis) = time.split_once('.').expect("has millis"); + let hms_parts: Vec<&str> = hms.split(':').collect(); + assert_eq!(hms_parts.len(), 3); + let h: u64 = hms_parts[0].parse().unwrap(); + let m: u64 = hms_parts[1].parse().unwrap(); + let s: u64 = hms_parts[2].parse().unwrap(); + assert!(h < 24, "hour {h} out of range"); + assert!(m < 60, "minute {m} out of range"); + assert!(s < 60, "second {s} out of range"); + assert_eq!(millis.len(), 3); + assert!(millis.parse::().unwrap() < 1000); + } + + /// Endpoint selection must use the authenticated org route only when both + /// a non-empty token and non-empty slug are present; blank values fall + /// back to the public proxy (no `/v0/orgs//telemetry`, no `Bearer `). + #[test] + fn test_resolve_telemetry_endpoint_auth_and_proxy() { + let (url, auth) = resolve_telemetry_endpoint(Some("tok"), Some("acme")); + assert!(auth, "token + slug should authenticate"); + assert!(url.contains("/v0/orgs/acme/telemetry"), "got {url}"); + assert!(!url.contains("/orgs//"), "no empty slug segment: {url}"); + + // Missing slug -> proxy. + let (url, auth) = resolve_telemetry_endpoint(Some("tok"), None); + assert!(!auth); + assert!(url.ends_with("/patch/telemetry"), "got {url}"); + + // Missing token -> proxy. + let (_url, auth) = resolve_telemetry_endpoint(None, Some("acme")); + assert!(!auth); + } + + /// Regression: an empty-string token or slug must be treated as absent, + /// not spliced into the URL/header. Guards the `/v0/orgs//telemetry` + /// malformed-URL class that bit the API client. + #[test] + fn test_resolve_telemetry_endpoint_empty_strings_fall_back() { + let (url, auth) = resolve_telemetry_endpoint(Some("tok"), Some("")); + assert!(!auth, "empty slug must not authenticate"); + assert!(!url.contains("/orgs//"), "empty slug leaked into URL: {url}"); + assert!(url.ends_with("/patch/telemetry"), "got {url}"); + + let (_url, auth) = resolve_telemetry_endpoint(Some(""), Some("acme")); + assert!(!auth, "empty token must not authenticate"); + + let (_url, auth) = resolve_telemetry_endpoint(Some(""), Some("")); + assert!(!auth); + } } diff --git a/crates/socket-patch-core/src/vex/build.rs b/crates/socket-patch-core/src/vex/build.rs index 381523c..e935935 100644 --- a/crates/socket-patch-core/src/vex/build.rs +++ b/crates/socket-patch-core/src/vex/build.rs @@ -608,6 +608,83 @@ mod tests { } } + /// Every applied patch lacking a vulnerability record → `None`. + /// Distinct from `applied_patch_with_zero_vulnerabilities_emits_no_statement` + /// (which mixes a with-vuln patch in): here the *entire* applied set + /// is vuln-free, so `grouped` stays empty and the builder must + /// short-circuit to `None` rather than emit a statement-less document. + #[test] + fn all_applied_patches_vuln_free_returns_none() { + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/a@1.0.0".to_string(), + record("u1", vec![]), + ); + manifest.patches.insert( + "pkg:npm/b@2.0.0".to_string(), + record("u2", vec![]), + ); + let doc = build_document( + &manifest, + &[ + "pkg:npm/a@1.0.0".to_string(), + "pkg:npm/b@2.0.0".to_string(), + ], + &opts(), + ); + assert!(doc.is_none(), "no vuln records anywhere → None, not an empty doc"); + } + + /// Order-independence: the `statements` payload is fully determined + /// by the *logical* manifest content, NOT by `HashMap` iteration + /// order. `build_is_deterministic_modulo_timestamps` only re-iterates + /// the *same* manifest (so it sees the same order twice) — it proves + /// purity, not order-independence. Here we build two manifests whose + /// patches/vulns/cves are inserted in opposite orders and assert the + /// stripped documents are byte-identical, pinning the sort-based + /// determinism the transpose relies on. + #[test] + fn output_is_independent_of_manifest_insertion_order() { + let strip = |mut d: Document| -> Document { + d.timestamp = String::new(); + for s in d.statements.iter_mut() { + s.timestamp = None; + } + d + }; + + // Manifest A: forward insertion order. + let mut a = PatchManifest::new(); + a.patches.insert( + "pkg:npm/aaa@1.0.0".to_string(), + record("u-a", vec![("GHSA-shared", vec!["CVE-1", "CVE-2"])]), + ); + a.patches.insert( + "pkg:npm/zzz@9.0.0".to_string(), + record("u-z", vec![("GHSA-shared", vec!["CVE-3"]), ("GHSA-only-z", vec!["CVE-9"])]), + ); + + // Manifest B: same logical content, reversed insertion order + // (and reversed cve order) to force a different iteration order. + let mut b = PatchManifest::new(); + b.patches.insert( + "pkg:npm/zzz@9.0.0".to_string(), + record("u-z", vec![("GHSA-only-z", vec!["CVE-9"]), ("GHSA-shared", vec!["CVE-3"])]), + ); + b.patches.insert( + "pkg:npm/aaa@1.0.0".to_string(), + record("u-a", vec![("GHSA-shared", vec!["CVE-2", "CVE-1"])]), + ); + + let applied = vec![ + "pkg:npm/aaa@1.0.0".to_string(), + "pkg:npm/zzz@9.0.0".to_string(), + ]; + let da = strip(build_document(&a, &applied, &opts()).unwrap()); + let db = strip(build_document(&b, &applied, &opts()).unwrap()); + assert_eq!(da, db, "output must not depend on manifest insertion order"); + } + /// Subcomponent IDs are sorted within a merged statement. Pin /// this so downstream tools can rely on stable diff output. #[test] diff --git a/crates/socket-patch-core/src/vex/conformance_tests.rs b/crates/socket-patch-core/src/vex/conformance_tests.rs index 19d2562..9472b16 100644 --- a/crates/socket-patch-core/src/vex/conformance_tests.rs +++ b/crates/socket-patch-core/src/vex/conformance_tests.rs @@ -362,6 +362,21 @@ fn document_timestamp_is_rfc3339_z_form() { assert_eq!(&doc.timestamp[10..11], "T"); assert_eq!(&doc.timestamp[13..14], ":"); assert_eq!(&doc.timestamp[16..17], ":"); + // Separators alone are not enough — a regression that emitted + // `20X4-..` with correct punctuation would slip through. Assert the + // numeric fields actually parse into plausible calendar ranges. + let year: u32 = doc.timestamp[0..4].parse().expect("year digits"); + let month: u32 = doc.timestamp[5..7].parse().expect("month digits"); + let day: u32 = doc.timestamp[8..10].parse().expect("day digits"); + let hour: u32 = doc.timestamp[11..13].parse().expect("hour digits"); + let minute: u32 = doc.timestamp[14..16].parse().expect("minute digits"); + let second: u32 = doc.timestamp[17..19].parse().expect("second digits"); + assert!((1970..3000).contains(&year), "year out of range: {year}"); + assert!((1..=12).contains(&month), "month out of range: {month}"); + assert!((1..=31).contains(&day), "day out of range: {day}"); + assert!(hour < 24, "hour out of range: {hour}"); + assert!(minute < 60, "minute out of range: {minute}"); + assert!(second < 60, "second out of range: {second}"); } // ── 8. Document revision counter ──────────────────────────────── @@ -374,6 +389,22 @@ fn newly_built_document_starts_at_version_1() { assert_eq!(doc.version, 1); } +#[test] +fn document_version_serializes_as_a_json_number_not_string() { + // Regression: the struct field is `u32`, but a future `#[serde]` + // attribute (or a switch to `String`) could emit `"version": "1"`. + // OpenVEX validators (vexctl/Grype) require an integer here, so pin + // the JSON *type* — `doc.version == 1` (test above) can't catch a + // numeric-string drift since serde would still round-trip it. + let v = serde_json::to_value(sample_doc()).unwrap(); + assert!( + v["version"].is_u64(), + "version must serialize as a JSON number, got {:?}", + v["version"] + ); + assert_eq!(v["version"].as_u64(), Some(1)); +} + // ── 9. Full round-trip with every optional field populated ────── #[test] @@ -620,3 +651,54 @@ fn statement_level_id_renders_under_at_sign() { assert!(!obj.contains_key("@id"), "absent statement id must omit @id"); assert!(!obj.contains_key("id")); } + +// ── 17. One statement per vulnerability id (grouping invariant) ── + +#[test] +fn no_two_statements_share_a_vulnerability_name() { + // The builder's transpose groups by vuln id, so a well-formed doc + // never emits two statements for the same vulnerability — merging + // collapses them into one (with all PURLs as subcomponents). Pin + // that at the document layer: `sample_doc` carries two *distinct* + // vulns (GHSA-aaaa / GHSA-bbbb) and `merged_doc` collapses a shared + // one (GHSA-shared) to a single statement. A grouping regression + // (e.g. keying on purl+vuln instead of vuln) would surface as a + // duplicate name here. + for doc in [sample_doc(), merged_doc()] { + let mut seen = std::collections::HashSet::new(); + for st in &doc.statements { + assert!( + seen.insert(st.vulnerability.name.clone()), + "duplicate vulnerability name {:?} across statements", + st.vulnerability.name + ); + } + } + // Non-vacuity: the two fixtures exercise both the multi-statement + // (distinct vulns) and the single-merged-statement shapes. + assert_eq!(sample_doc().statements.len(), 2); + assert_eq!(merged_doc().statements.len(), 1); +} + +// ── 18. Fixtures stay non-vacuous (guards the tests above) ────── + +#[test] +fn fixtures_carry_subcomponents_so_at_id_walks_have_teeth() { + // Several tests above (#2 `@`-prefix walk, #6 non-emptiness) only + // reach the subcomponent assertions when the fixture actually + // produces subcomponents. If a future fixture edit dropped them, + // those tests would pass vacuously. Pin the precondition directly. + for st in &sample_doc().statements { + for p in &st.products { + assert!( + !p.subcomponents.is_empty(), + "sample_doc product must carry >=1 subcomponent" + ); + } + } + let merged = merged_doc(); + assert!( + merged.statements[0].products[0].subcomponents.len() >= 2, + "merged_doc must produce a product with >=2 subcomponents" + ); +} diff --git a/crates/socket-patch-core/src/vex/product.rs b/crates/socket-patch-core/src/vex/product.rs index d53a268..fa8f6bd 100644 --- a/crates/socket-patch-core/src/vex/product.rs +++ b/crates/socket-patch-core/src/vex/product.rs @@ -320,9 +320,12 @@ fn split_remote_host_path(url: &str) -> Option<(&str, &str)> { None } -/// Parse ` = ""`. Returns `None` if the key doesn't match, -/// the value isn't a double-quoted string literal, or the value is -/// empty. Inline-table forms like `version = { workspace = true }` +/// Parse ` = ""` or ` = ''`. Returns `None` if +/// the key doesn't match, the value isn't a quoted string literal, or +/// the value is empty. TOML permits BOTH double-quoted basic strings +/// and single-quoted literal strings, so we accept either delimiter and +/// terminate at the matching closing quote. Inline-table forms like +/// `version = { workspace = true }` and bare values like `version = 42` /// fail this check and are skipped by the caller. fn parse_toml_string_kv(line: &str, key: &str) -> Option { let eq = line.find('=')?; @@ -331,8 +334,13 @@ fn parse_toml_string_kv(line: &str, key: &str) -> Option { return None; } let rhs = rhs[1..].trim(); // drop the leading '=' and surrounding ws - let stripped = rhs.strip_prefix('"')?; - let end = stripped.find('"')?; + // The value must open with a string delimiter; match it to its twin. + // `'` is a literal string (no escapes), `"` a basic string — for our + // purposes (names/versions, which never contain escaped quotes) the + // first matching delimiter terminates the value in both cases. + let quote = rhs.chars().next().filter(|c| *c == '"' || *c == '\'')?; + let stripped = &rhs[quote.len_utf8()..]; + let end = stripped.find(quote)?; let value = &stripped[..end]; if value.is_empty() { None @@ -1123,6 +1131,105 @@ mod tests { /// When multiple manifests are present but NONE parse, there is no /// product to surface and therefore no "using X" warning to emit /// (it would name a manifest that wasn't actually used). + // ── Regression: TOML single-quoted (literal) string values ──────── + // TOML permits `key = 'value'` (literal strings) as well as + // `key = "value"`. The scanner previously only accepted the + // double-quoted form, so a manifest written with single quotes + // (common with cargo-edit / hand-edited files) yielded None and + // product detection silently failed. Mirrors the cargo-crawler + // single-quote fix. + + /// `parse_toml_string_kv`: single-quoted literal value is accepted. + #[test] + fn parse_toml_kv_accepts_single_quoted_value() { + assert_eq!( + parse_toml_string_kv("name = 'serde'", "name").as_deref(), + Some("serde") + ); + } + + /// `parse_toml_string_kv`: empty single-quoted value → None, same as + /// the empty double-quoted case. + #[test] + fn parse_toml_kv_single_quoted_empty_is_none() { + assert!(parse_toml_string_kv("name = ''", "name").is_none()); + } + + /// `parse_toml_string_kv`: a single-quoted literal string keeps any + /// embedded double quotes verbatim (literal strings don't process + /// escapes), and a leading `'` must NOT terminate on a `"`. + #[test] + fn parse_toml_kv_single_quoted_preserves_inner_double_quote() { + assert_eq!( + parse_toml_string_kv(r#"name = 'he said "hi"'"#, "name").as_deref(), + Some(r#"he said "hi""#) + ); + } + + /// `parse_toml_string_kv`: an unterminated single-quoted value → None + /// (matches the double-quoted unterminated behaviour). + #[test] + fn parse_toml_kv_single_quoted_unterminated_is_none() { + assert!(parse_toml_string_kv("name = 'no-close", "name").is_none()); + } + + /// `scan_toml_section`: a section using single-quoted name/version is + /// parsed end-to-end. + #[test] + fn scan_toml_section_handles_single_quoted_values() { + let toml = "[package]\nname = 'my-rust'\nversion = '2.0.0'\n"; + let (n, v) = scan_toml_section(toml, "package").unwrap(); + assert_eq!(n, "my-rust"); + assert_eq!(v, "2.0.0"); + } + + /// `scan_toml_section`: mixed quoting (single name, double version) + /// works — each value is matched to its own delimiter. + #[test] + fn scan_toml_section_handles_mixed_quoting() { + let toml = "[package]\nname = 'mixed'\nversion = \"3.1.4\"\n"; + let (n, v) = scan_toml_section(toml, "package").unwrap(); + assert_eq!(n, "mixed"); + assert_eq!(v, "3.1.4"); + } + + /// End-to-end: a `Cargo.toml` with single-quoted name/version still + /// produces a cargo PURL (previously returned None). + #[tokio::test] + async fn detect_cargo_toml_single_quoted() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("Cargo.toml"), + "[package]\nname = 'my-rust'\nversion = '2.0.0'\nedition = '2021'\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:cargo/my-rust@2.0.0")); + } + + /// End-to-end: a single-quoted `[project]` pyproject still produces a + /// PyPI PURL. + #[tokio::test] + async fn detect_pyproject_single_quoted() { + let dir = tempfile::tempdir().unwrap(); + tokio::fs::write( + dir.path().join("pyproject.toml"), + "[project]\nname = 'my-pylib'\nversion = '0.4.0'\n", + ) + .await + .unwrap(); + let r = detect_product(dir.path()).await; + assert_eq!(r.purl.as_deref(), Some("pkg:pypi/my-pylib@0.4.0")); + } + + /// Regression guard: a bare (unquoted) numeric value is still + /// rejected — the quote-detection must not accept non-string scalars. + #[test] + fn parse_toml_kv_bare_number_still_rejected() { + assert!(parse_toml_string_kv("version = 42", "version").is_none()); + } + #[tokio::test] async fn multi_manifest_all_unparseable_emits_no_warning() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/socket-patch-core/src/vex/schema.rs b/crates/socket-patch-core/src/vex/schema.rs index 1d6405a..4773c0b 100644 --- a/crates/socket-patch-core/src/vex/schema.rs +++ b/crates/socket-patch-core/src/vex/schema.rs @@ -736,4 +736,63 @@ mod tests { ); } } + + /// Document-level multi-word key `last_updated` must stay snake_case + /// too. `Document` has no `rename_all`, so this guards against a + /// future `rename_all = "camelCase"` slipping in (ser/de would stay + /// symmetric, so the round-trip tests can't catch it). + #[test] + fn document_multiword_keys_emit_in_snake_case() { + let mut doc = empty_doc(); + doc.last_updated = Some("2024-02-01T00:00:00Z".to_string()); + let v = serde_json::to_value(&doc).unwrap(); + let obj = v.as_object().unwrap(); + assert!(obj.contains_key("last_updated"), "missing snake_case key"); + assert!( + !obj.contains_key("lastUpdated"), + "camelCase last_updated must never be emitted" + ); + } + + /// An unknown `status` literal must fail to parse even when it's + /// nested inside an otherwise-valid full document — not just when + /// the bare `Status` enum is deserialized in isolation. Pins that + /// the enum's strictness survives composition into `Statement`. + #[test] + fn document_with_unknown_status_literal_is_rejected() { + let bad = r#"{ + "@context": "https://openvex.dev/ns/v0.2.0", + "@id": "urn:uuid:1", + "author": "Socket", + "timestamp": "2024-01-01T00:00:00Z", + "version": 1, + "statements": [ + { + "vulnerability": {"name": "GHSA-x"}, + "products": [{"@id": "pkg:npm/app@1.0.0"}], + "status": "totally_made_up" + } + ] + }"#; + let r: Result = serde_json::from_str(bad); + assert!(r.is_err(), "unknown nested status literal must fail to parse"); + } + + /// A document `version` supplied as a JSON string (`"1"`) must be + /// rejected — the field is `u32` and OpenVEX validators require a + /// JSON number. Guards against a producer/consumer drift where the + /// counter is quoted. + #[test] + fn document_version_as_json_string_is_rejected() { + let bad = r#"{ + "@context": "https://openvex.dev/ns/v0.2.0", + "@id": "urn:uuid:1", + "author": "Socket", + "timestamp": "2024-01-01T00:00:00Z", + "version": "1", + "statements": [] + }"#; + let r: Result = serde_json::from_str(bad); + assert!(r.is_err(), "string-typed version must fail to parse"); + } } diff --git a/crates/socket-patch-core/src/vex/time.rs b/crates/socket-patch-core/src/vex/time.rs index 096661a..150ab81 100644 --- a/crates/socket-patch-core/src/vex/time.rs +++ b/crates/socket-patch-core/src/vex/time.rs @@ -302,6 +302,48 @@ mod tests { assert!(s.ends_with('Z'), "output must still end with Z"); } + /// Plain within-month day carry (not a month/year boundary): + /// 2024-05-24 23:59:59 → 2024-05-25 00:00:00. The other boundary + /// tests only cross at month edges; this pins the day increment in + /// the middle of a month together with the day→00:00:00 time reset. + #[test] + fn within_month_day_carry() { + assert_eq!( + format_unix_secs_rfc3339(1_716_595_199), + "2024-05-24T23:59:59Z" + ); + assert_eq!( + format_unix_secs_rfc3339(1_716_595_200), + "2024-05-25T00:00:00Z" + ); + } + + /// RFC 3339 UTC strings with fixed-width zero-padded fields sort + /// lexicographically in chronological order. Sweep ~50 years at a + /// ~1.7-day stride and assert each output is strictly greater than + /// the previous one. This is an oracle-free guard: any regression + /// that scrambles a field, drops zero-padding, or miscomputes a + /// carry would break monotonicity even where this file's other + /// tests don't have an exact expected string. + #[test] + fn outputs_sort_in_chronological_order() { + const STRIDE: u64 = 147_853; // ~1.71 days, coprime-ish with day/year + let mut prev = format_unix_secs_rfc3339(0); + let mut secs = STRIDE; + // 0 .. ~50 years. + while secs < 1_600_000_000 { + let cur = format_unix_secs_rfc3339(secs); + assert!( + cur > prev, + "non-monotonic at secs={secs}: {prev:?} !< {cur:?}" + ); + // Every output must keep the canonical 20-char shape. + assert_eq!(cur.len(), 20, "bad width at secs={secs}: {cur:?}"); + prev = cur; + secs += STRIDE; + } + } + /// `now_rfc3339` must produce a string that round-trips through /// our own `format_unix_secs_rfc3339` — i.e. the year/month/day /// fields are within plausible ranges (years 1970..3000, months diff --git a/crates/socket-patch-core/src/vex/verify.rs b/crates/socket-patch-core/src/vex/verify.rs index 86bcce9..2a83594 100644 --- a/crates/socket-patch-core/src/vex/verify.rs +++ b/crates/socket-patch-core/src/vex/verify.rs @@ -645,6 +645,130 @@ mod tests { assert_eq!(out.failed[0].reason, "hash_mismatch"); } + /// SECURITY: a path-escaping manifest key (`../evil.js`) must NEVER + /// be attested as applied — even when the out-of-tree file it points + /// at happens to hash to the record's `afterHash`. `verify_file_patch` + /// fail-closes on the `is_safe_relative_subpath` guard *before* reading + /// anything, so a poisoned manifest cannot launder an arbitrary + /// on-disk file into a `not_affected` VEX attestation. + #[tokio::test] + async fn path_escaping_key_is_never_applied() { + let root = tempfile::tempdir().unwrap(); + let pkg_dir = root.path().join("pkg"); + tokio::fs::create_dir(&pkg_dir).await.unwrap(); + + // An out-of-tree file whose content matches the after_hash we + // will claim. If the guard were missing, verification would read + // this and wrongly report the patch as applied. + let out_of_tree = b"out-of-tree-content"; + let hash = compute_git_sha256_from_bytes(out_of_tree); + tokio::fs::write(root.path().join("evil.js"), out_of_tree) + .await + .unwrap(); + + let mut files = HashMap::new(); + files.insert( + "../evil.js".to_string(), + PatchFileInfo { + before_hash: "aaaa".to_string(), + after_hash: hash, // matches the out-of-tree file + }, + ); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + PatchRecord { + uuid: "u".to_string(), + exported_at: String::new(), + files, + vulnerabilities: HashMap::new(), + description: String::new(), + license: String::new(), + tier: String::new(), + }, + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.clone()); + + let out = applied_patches(&manifest, &paths).await; + assert!( + out.applied.is_empty(), + "a path-escaping key must never be attested as applied" + ); + assert_eq!(out.failed.len(), 1); + assert_eq!(out.failed[0].reason, "file_not_found"); + } + + /// A directory sitting where the manifest expects a file is reported + /// as `file_not_found`, not applied — `verify_file_patch` rejects + /// non-regular files (the hashing step refuses to read a directory). + #[tokio::test] + async fn directory_at_file_path_is_not_applied() { + let pkg_dir = tempfile::tempdir().unwrap(); + // Create a directory named "index.js" where a file is expected. + tokio::fs::create_dir(pkg_dir.path().join("index.js")) + .await + .unwrap(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/x@1.0.0".to_string(), + record_with_one_file( + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + ), + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/x@1.0.0".to_string(), pkg_dir.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert_eq!(out.failed.len(), 1); + assert_eq!(out.failed[0].reason, "file_not_found"); + } + + /// Two independently failing PURLs each produce exactly one + /// `FailedPatch` — the failed bucket accumulates across PURLs (one + /// failure per PURL, not collapsed or duplicated). + #[tokio::test] + async fn multiple_failing_purls_each_recorded() { + // bad1: file present at wrong content → hash_mismatch. + let bad1 = tempfile::tempdir().unwrap(); + tokio::fs::write(bad1.path().join("index.js"), b"wrong") + .await + .unwrap(); + // bad2: file absent → file_not_found. + let bad2 = tempfile::tempdir().unwrap(); + + let mut manifest = PatchManifest::new(); + manifest.patches.insert( + "pkg:npm/bad1@1.0.0".to_string(), + record_with_one_file( + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + ), + ); + manifest.patches.insert( + "pkg:npm/bad2@1.0.0".to_string(), + record_with_one_file( + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + ), + ); + + let mut paths = HashMap::new(); + paths.insert("pkg:npm/bad1@1.0.0".to_string(), bad1.path().to_path_buf()); + paths.insert("pkg:npm/bad2@1.0.0".to_string(), bad2.path().to_path_buf()); + + let out = applied_patches(&manifest, &paths).await; + assert!(out.applied.is_empty()); + assert_eq!(out.failed.len(), 2, "one FailedPatch per failing PURL"); + + let mut reasons: Vec<&str> = out.failed.iter().map(|f| f.reason.as_str()).collect(); + reasons.sort_unstable(); + assert_eq!(reasons, vec!["file_not_found", "hash_mismatch"]); + } + /// At most ONE `FailedPatch` is recorded per PURL even when several /// files would fail — `verify_patch_record` returns on the first /// failure. Two distinct failing files, single failure recorded. diff --git a/crates/socket-patch-core/tests/binary_fetch_error_classification_e2e.rs b/crates/socket-patch-core/tests/binary_fetch_error_classification_e2e.rs new file mode 100644 index 0000000..02656ae --- /dev/null +++ b/crates/socket-patch-core/tests/binary_fetch_error_classification_e2e.rs @@ -0,0 +1,138 @@ +//! Regression: the binary transport path (`fetch_blob` / `fetch_diff` / +//! `fetch_package`, all sharing `fetch_binary`) must classify authenticated +//! 401 / 403 / 429 responses the same way the JSON path does. +//! +//! Before the fix, `fetch_binary` collapsed every non-OK/404 status into +//! `ApiError::Other`. That defeated `is_fallback_candidate` (which keys on +//! `Unauthorized` / `Forbidden`) so a stale/revoked token blocked binary +//! downloads instead of rerouting to the public proxy, and the tailored +//! 401/403/429 operator messages were lost. +//! +//! These tests drive the *authenticated* `fetch_binary` branch (token + org +//! slug, not public-proxy) against a mock server, so they exercise exactly the +//! endpoint that can legitimately return those statuses. + +use socket_patch_core::api::client::{ + is_fallback_candidate, ApiClient, ApiClientOptions, ApiError, +}; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, ResponseTemplate}; + +/// A 64-hex SHA-256 the validator accepts, so the request actually reaches the +/// transport (and the mock) rather than short-circuiting on bad input. +const VALID_HASH: &str = "abcdef0123456789abcdef0123456789abcdef0123456789abcdef0123456789"; + +fn authed_client(api_url: &str) -> ApiClient { + ApiClient::new(ApiClientOptions { + api_url: api_url.to_string(), + api_token: Some("sktsec_token_placeholder_api".to_string()), + use_public_proxy: false, + org_slug: Some("my-org".to_string()), + }) +} + +#[tokio::test] +async fn fetch_blob_401_classifies_as_unauthorized_and_is_fallback_candidate() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/my-org/patches/blob/{VALID_HASH}"))) + .respond_with(ResponseTemplate::new(401)) + .mount(&server) + .await; + + let client = authed_client(&server.uri()); + let err = client + .fetch_blob(VALID_HASH) + .await + .expect_err("401 must surface as an error"); + + assert!( + matches!(err, ApiError::Unauthorized(_)), + "binary 401 must be Unauthorized, not Other; got: {err:?}" + ); + assert!( + is_fallback_candidate(&err), + "a binary 401 must be eligible for the auth→proxy fallback" + ); +} + +#[tokio::test] +async fn fetch_blob_403_classifies_as_forbidden_and_is_fallback_candidate() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/my-org/patches/blob/{VALID_HASH}"))) + .respond_with(ResponseTemplate::new(403)) + .mount(&server) + .await; + + let client = authed_client(&server.uri()); + let err = client + .fetch_blob(VALID_HASH) + .await + .expect_err("403 must surface as an error"); + + assert!( + matches!(err, ApiError::Forbidden(_)), + "binary 403 must be Forbidden, not Other; got: {err:?}" + ); + assert!( + is_fallback_candidate(&err), + "a binary 403 must be eligible for the auth→proxy fallback" + ); + // Authenticated path → org-access wording, not the proxy paid-subscriber hint. + assert!( + err.to_string().contains("organization"), + "authenticated 403 must carry the org-access message; got: {err}" + ); +} + +#[tokio::test] +async fn fetch_blob_429_classifies_as_rate_limited_and_not_fallback() { + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/my-org/patches/blob/{VALID_HASH}"))) + .respond_with(ResponseTemplate::new(429)) + .mount(&server) + .await; + + let client = authed_client(&server.uri()); + let err = client + .fetch_blob(VALID_HASH) + .await + .expect_err("429 must surface as an error"); + + assert!( + matches!(err, ApiError::RateLimited(_)), + "binary 429 must be RateLimited, not Other; got: {err:?}" + ); + // Rate limits surface as-is — never rerouted to the proxy. + assert!(!is_fallback_candidate(&err)); +} + +#[tokio::test] +async fn fetch_blob_500_still_classifies_as_other() { + // Genuine server errors must keep flowing through to `Other` with the + // status code embedded — the fix must not over-classify. + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path(format!("/v0/orgs/my-org/patches/blob/{VALID_HASH}"))) + .respond_with(ResponseTemplate::new(500).set_body_string("boom")) + .mount(&server) + .await; + + let client = authed_client(&server.uri()); + let err = client + .fetch_blob(VALID_HASH) + .await + .expect_err("500 must surface as an error"); + + match &err { + ApiError::Other(msg) => { + assert!(msg.contains("500"), "Other must embed the status; got: {msg}"); + assert!(msg.contains("boom"), "Other must embed the body; got: {msg}"); + } + other => panic!("500 must be Other; got: {other:?}"), + } + // An unclassified server error is never rerouted to the proxy. + assert!(!is_fallback_candidate(&err)); +} diff --git a/crates/socket-patch-core/tests/blob_fetcher_edges_e2e.rs b/crates/socket-patch-core/tests/blob_fetcher_edges_e2e.rs index 1ac8d1f..4906ade 100644 --- a/crates/socket-patch-core/tests/blob_fetcher_edges_e2e.rs +++ b/crates/socket-patch-core/tests/blob_fetcher_edges_e2e.rs @@ -458,6 +458,270 @@ async fn fetch_missing_blobs_rejects_content_hash_mismatch_and_writes_nothing() ); } +/// `fetch_blob` returning `Ok(None)` (a 404 from the server) is recorded +/// as a failure with the "not found" message, and writes no file. The +/// closed-port tests can only reach the transport-error arm, never this +/// "server answered, but with 404" arm. +#[tokio::test] +async fn fetch_missing_blobs_records_404_as_not_found() { + let hash = "a".repeat(64); + + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path_matcher(format!("/patch/blob/{hash}"))) + .respond_with(ResponseTemplate::new(404)) + .expect(1) + .mount(&server) + .await; + + let tmp = tempfile::tempdir().unwrap(); + let blobs = tmp.path().join("blobs"); + std::fs::create_dir(&blobs).unwrap(); + let manifest = manifest_with_after_hashes(&[&hash]); + let client = proxy_client(&server.uri()); + + let result = fetch_missing_blobs(&manifest, &blobs, &client, None).await; + assert_eq!(result.total, 1); + assert_eq!(result.downloaded, 0); + assert_eq!(result.failed, 1); + assert!(result.results[0] + .error + .as_deref() + .unwrap() + .contains("not found")); + assert!(!blobs.join(&hash).exists(), "a 404 must not leave a file"); + assert_eq!(dir_entry_count(&blobs), 0); +} + +/// A manifest whose `afterHash` is uppercase hex must still be accepted +/// when the server serves byte-for-byte correct content (whose computed +/// git-sha256 is lowercase). Exercises the case-insensitive verification +/// end to end — a case-sensitive comparison would wrongly reject it. +#[tokio::test] +async fn fetch_missing_blobs_accepts_uppercase_manifest_hash() { + let content = b"content addressed by an uppercase manifest hash"; + let hash_lower = compute_git_sha256_from_bytes(content); + let hash_upper = hash_lower.to_ascii_uppercase(); + assert_ne!(hash_lower, hash_upper, "fixture: hash must have hex letters"); + + let server = MockServer::start().await; + // The request path carries the manifest's (uppercase) hash verbatim. + Mock::given(method("GET")) + .and(path_matcher(format!("/patch/blob/{hash_upper}"))) + .respond_with(ResponseTemplate::new(200).set_body_bytes(content.to_vec())) + .expect(1) + .mount(&server) + .await; + + let tmp = tempfile::tempdir().unwrap(); + let blobs = tmp.path().join("blobs"); + std::fs::create_dir(&blobs).unwrap(); + let manifest = manifest_with_after_hashes(&[&hash_upper]); + let client = proxy_client(&server.uri()); + + let result = fetch_missing_blobs(&manifest, &blobs, &client, None).await; + assert_eq!(result.downloaded, 1, "uppercase-hash content must be accepted"); + assert_eq!(result.failed, 0); + assert_eq!(std::fs::read(blobs.join(&hash_upper)).unwrap(), content); +} + +// ── Archive (diff / package) download path ────────────────────────── +// +// `fetch_missing_archives_inner` (driven via `fetch_missing_sources` in +// Diff / Package mode) is otherwise only reached on the closed-port +// transport-error arm. These drive the success-write, 404, and +// progress-callback arms against a mock proxy. Archives are uuid-named +// and have no content hash, so the only integrity guarantee is the atomic +// write — assert no staging litter survives. + +/// Build a manifest carrying a set of patch UUIDs (each as its own PURL). +fn manifest_with_uuids(uuids: &[&str]) -> PatchManifest { + let mut patches = HashMap::new(); + for (i, uuid) in uuids.iter().enumerate() { + patches.insert( + format!("pkg:npm/test-{i}@1.0.0"), + PatchRecord { + uuid: (*uuid).to_string(), + exported_at: "2024-01-01T00:00:00Z".to_string(), + files: HashMap::new(), + vulnerabilities: HashMap::new(), + description: "test".to_string(), + license: "MIT".to_string(), + tier: "free".to_string(), + }, + ); + } + PatchManifest { patches, setup: None } +} + +#[tokio::test] +async fn fetch_missing_sources_diff_downloads_and_writes_archive() { + let uuid = "11111111-1111-4111-8111-111111111111"; + let archive_bytes = b"\x1f\x8b\x08 fake-but-opaque tar.gz payload"; + + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path_matcher(format!("/patch/diff/{uuid}"))) + .respond_with(ResponseTemplate::new(200).set_body_bytes(archive_bytes.to_vec())) + .expect(1) + .mount(&server) + .await; + + let tmp = tempfile::tempdir().unwrap(); + let blobs = tmp.path().join("blobs"); + let diffs = tmp.path().join("diffs"); + std::fs::create_dir(&blobs).unwrap(); + std::fs::create_dir(&diffs).unwrap(); + let sources = PatchSources { + blobs_path: &blobs, + packages_path: None, + diffs_path: Some(&diffs), + }; + let manifest = manifest_with_uuids(&[uuid]); + let client = proxy_client(&server.uri()); + + let result = + fetch_missing_sources(&manifest, &sources, DownloadMode::Diff, &client, None).await; + assert_eq!(result.total, 1); + assert_eq!(result.downloaded, 1, "diff archive must be downloaded"); + assert_eq!(result.failed, 0); + // The result's `hash` field carries the UUID for archive modes. + assert_eq!(result.results[0].hash, uuid); + // Written under `.tar.gz`, byte-for-byte, with no staging litter. + assert_eq!( + std::fs::read(diffs.join(format!("{uuid}.tar.gz"))).unwrap(), + archive_bytes + ); + let names: Vec = std::fs::read_dir(&diffs) + .unwrap() + .map(|e| e.unwrap().file_name().to_string_lossy().into_owned()) + .collect(); + assert_eq!(names, vec![format!("{uuid}.tar.gz")], "no temp files: {names:?}"); + // A re-run finds the archive present and short-circuits (no second GET; + // the mock's `.expect(1)` would trip on a second request). + let again = + fetch_missing_sources(&manifest, &sources, DownloadMode::Diff, &client, None).await; + assert_eq!(again.total, 0, "already-present archive → nothing to do"); +} + +#[tokio::test] +async fn fetch_missing_sources_package_downloads_via_package_endpoint() { + // Distinct from the diff test: Package mode must hit `/patch/package/` + // and write into the packages dir, proving the kind→endpoint→dir wiring + // isn't crossed. + let uuid = "22222222-2222-4222-8222-222222222222"; + let archive_bytes = b"package archive bytes"; + + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path_matcher(format!("/patch/package/{uuid}"))) + .respond_with(ResponseTemplate::new(200).set_body_bytes(archive_bytes.to_vec())) + .expect(1) + .mount(&server) + .await; + + let tmp = tempfile::tempdir().unwrap(); + let blobs = tmp.path().join("blobs"); + let packages = tmp.path().join("packages"); + std::fs::create_dir(&blobs).unwrap(); + std::fs::create_dir(&packages).unwrap(); + let sources = PatchSources { + blobs_path: &blobs, + packages_path: Some(&packages), + diffs_path: None, + }; + let manifest = manifest_with_uuids(&[uuid]); + let client = proxy_client(&server.uri()); + + let result = + fetch_missing_sources(&manifest, &sources, DownloadMode::Package, &client, None).await; + assert_eq!(result.downloaded, 1); + assert_eq!(result.failed, 0); + assert_eq!( + std::fs::read(packages.join(format!("{uuid}.tar.gz"))).unwrap(), + archive_bytes + ); +} + +#[tokio::test] +async fn fetch_missing_sources_diff_404_is_failure_with_kind_message() { + let uuid = "33333333-3333-4333-8333-333333333333"; + + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path_matcher(format!("/patch/diff/{uuid}"))) + .respond_with(ResponseTemplate::new(404)) + .expect(1) + .mount(&server) + .await; + + let tmp = tempfile::tempdir().unwrap(); + let blobs = tmp.path().join("blobs"); + let diffs = tmp.path().join("diffs"); + std::fs::create_dir(&blobs).unwrap(); + std::fs::create_dir(&diffs).unwrap(); + let sources = PatchSources { + blobs_path: &blobs, + packages_path: None, + diffs_path: Some(&diffs), + }; + let manifest = manifest_with_uuids(&[uuid]); + let client = proxy_client(&server.uri()); + + let result = + fetch_missing_sources(&manifest, &sources, DownloadMode::Diff, &client, None).await; + assert_eq!(result.total, 1); + assert_eq!(result.downloaded, 0); + assert_eq!(result.failed, 1); + let err = result.results[0].error.as_deref().unwrap(); + assert!(err.contains("Diff"), "message should name the kind: {err}"); + assert!(err.contains("not found"), "message should say not found: {err}"); + // Nothing written for a 404. + assert_eq!(dir_entry_count(&diffs), 0); +} + +/// The progress callback fires once per downloaded archive with a 1-based +/// index and the correct total. +#[tokio::test] +async fn fetch_missing_sources_diff_invokes_progress_callback() { + use std::sync::Mutex; + let uuid = "44444444-4444-4444-8444-444444444444"; + + let server = MockServer::start().await; + Mock::given(method("GET")) + .and(path_matcher(format!("/patch/diff/{uuid}"))) + .respond_with(ResponseTemplate::new(200).set_body_bytes(b"x".to_vec())) + .mount(&server) + .await; + + let tmp = tempfile::tempdir().unwrap(); + let blobs = tmp.path().join("blobs"); + let diffs = tmp.path().join("diffs"); + std::fs::create_dir(&blobs).unwrap(); + std::fs::create_dir(&diffs).unwrap(); + let sources = PatchSources { + blobs_path: &blobs, + packages_path: None, + diffs_path: Some(&diffs), + }; + let manifest = manifest_with_uuids(&[uuid]); + let client = proxy_client(&server.uri()); + + let calls: std::sync::Arc>> = + std::sync::Arc::new(Mutex::new(Vec::new())); + let calls_cb = calls.clone(); + let cb: socket_patch_core::api::blob_fetcher::OnProgress = + Box::new(move |h: &str, idx: usize, total: usize| { + calls_cb.lock().unwrap().push((h.to_string(), idx, total)); + }); + + let _ = + fetch_missing_sources(&manifest, &sources, DownloadMode::Diff, &client, Some(&cb)).await; + + let recorded = calls.lock().unwrap().clone(); + assert_eq!(recorded, vec![(uuid.to_string(), 1, 1)]); +} + /// `get_missing_blobs` against a manifest that lists no patches /// returns the empty set. Covers the early-return inside the /// function — the existing apply tests always stage at least one diff --git a/crates/socket-patch-core/tests/crawler_npm_e2e.rs b/crates/socket-patch-core/tests/crawler_npm_e2e.rs index e37f07c..0e54644 100644 --- a/crates/socket-patch-core/tests/crawler_npm_e2e.rs +++ b/crates/socket-patch-core/tests/crawler_npm_e2e.rs @@ -509,6 +509,48 @@ async fn find_by_purls_resolves_qualified_purl_keyed_by_input() { assert_eq!(pkg.purl, qualified); } +/// Regression: a qualifier value that itself contains an `@` +/// (`?vcs_url=git@github.com:...`) must NOT corrupt version parsing. +/// `parse_purl_components` strips the `?qualifier` *before* it calls +/// `rfind('@')` to split name from version. If those two steps were +/// reordered, `rfind('@')` would latch onto the `@` inside `git@github` +/// and parse a bogus version (`github.com:...`), so the package would +/// fail to match its on-disk `1.0.0` and silently drop out of +/// apply/rollback. The existing qualified-PURL tests only use +/// qualifiers WITHOUT an `@`, so they cannot catch a strip-order +/// regression — this pins it. +#[tokio::test] +async fn find_by_purls_qualifier_containing_at_does_not_corrupt_version() { + let tmp = tempfile::tempdir().unwrap(); + let nm = tmp.path().join("node_modules"); + stage_npm_pkg(&nm, "foo", "1.0.0").await; + stage_npm_pkg(&nm, "@types/node", "20.0.0").await; + + let crawler = NpmCrawler; + let unscoped_q = "pkg:npm/foo@1.0.0?vcs_url=git@github.com:x/y.git".to_string(); + let scoped_q = "pkg:npm/@types/node@20.0.0?maintainer=a@b.com".to_string(); + let result = crawler + .find_by_purls(&nm, &[unscoped_q.clone(), scoped_q.clone()]) + .await + .unwrap(); + + assert_eq!(result.len(), 2, "both @-bearing qualifiers must resolve"); + let foo = result + .get(&unscoped_q) + .expect("@-in-qualifier unscoped PURL must resolve to foo@1.0.0"); + assert_eq!(foo.name, "foo"); + assert_eq!(foo.version, "1.0.0"); + assert_eq!(foo.purl, unscoped_q); + + let node = result + .get(&scoped_q) + .expect("@-in-qualifier scoped PURL must resolve to @types/node@20.0.0"); + assert_eq!(node.namespace.as_deref(), Some("@types")); + assert_eq!(node.name, "node"); + assert_eq!(node.version, "20.0.0"); + assert_eq!(node.purl, scoped_q); +} + /// PURL with no `@` (no version separator) must be rejected via the /// `rfind('@')?` arm (line 707). #[tokio::test] diff --git a/crates/socket-patch-core/tests/diff_e2e.rs b/crates/socket-patch-core/tests/diff_e2e.rs index d54c4ed..96e6235 100644 --- a/crates/socket-patch-core/tests/diff_e2e.rs +++ b/crates/socket-patch-core/tests/diff_e2e.rs @@ -159,3 +159,28 @@ fn forged_max_u64_header_is_safe() { .expect("clamped apply must succeed on a max-size forged hint"); assert_eq!(result, after, "max-size forged hint must not corrupt output"); } + +/// Security regression (mirrors the lib's +/// `test_apply_diff_forged_negative_block_length_does_not_panic`): the +/// compressed control/diff block lengths in header bytes 8..24 are decoded +/// with a sign-magnitude scheme. A field with the sign bit set decodes to a +/// "negative" length whose `as u64` is enormous; qbsdiff's only guard +/// (`32 + csize + dsize > patch.len()`) uses *wrapping* arithmetic, so the sum +/// wraps back in-bounds and the subsequent `split_at` panics on +/// attacker-controlled input. `apply_diff` must reject it as a plain error. +#[test] +fn forged_negative_block_length_does_not_panic() { + let before = b"the quick brown fox jumps over the lazy dog"; + let after = b"the quick brown cat jumps over the lazy dog"; + let mut forged = make_delta(before, after); + assert!(forged.len() >= 32, "delta must contain a full header"); + // Sign-magnitude encoding of a negative control-block length (bytes 8..16). + let neg: u64 = 16u64 | (1u64 << 63); + forged[8..16].copy_from_slice(&neg.to_le_bytes()); + + let result = apply_diff(before, &forged); + assert!( + result.is_err(), + "a forged negative block length must error, not panic the process" + ); +} diff --git a/crates/socket-patch-guard/src/lib.rs b/crates/socket-patch-guard/src/lib.rs index 272305f..cfec2d5 100644 --- a/crates/socket-patch-guard/src/lib.rs +++ b/crates/socket-patch-guard/src/lib.rs @@ -69,6 +69,52 @@ mod tests { .any(|k| k == "cargo:rerun-if-changed=/proj/.socket/manifest.json")); } + /// The heal rewrites `cargo-patches/`, so the guard must NOT watch its own + /// output (that would re-run on every build); and it watches the resolved + /// `Cargo.lock`, not `Cargo.toml`. Pins these against an over-eager edit. + #[test] + fn rerun_keys_watch_inputs_not_outputs() { + let keys = rerun_keys("/proj"); + assert!( + !keys.iter().any(|k| k.contains("cargo-patches")), + "must not watch the heal's own output dir (would loop): {keys:?}" + ); + assert!( + !keys.iter().any(|k| k.ends_with("/Cargo.toml")), + "watches the resolved lockfile, not the manifest: {keys:?}" + ); + } + + #[test] + fn rerun_keys_also_watches_bin_env_and_has_no_extras() { + // The guard reads SOCKET_PATCH_BIN too, so a change to it must re-run the + // probe. The original test only `any`-checked 3 of the 4 keys, so dropping + // this one would have slipped through — pin it explicitly + pin the count. + let keys = rerun_keys("/proj"); + assert!( + keys.iter() + .any(|k| k == "cargo:rerun-if-env-changed=SOCKET_PATCH_BIN"), + "{keys:?}" + ); + assert_eq!(keys.len(), 4, "unexpected rerun key set: {keys:?}"); + } + + #[test] + fn check_is_read_only_and_apply_heals() { + // The single safety-critical difference between the probe and the heal is + // `--check` (read-only audit) vs no `--check` (mutating regenerate). Pin + // that the probe carries it and the heal does NOT — swapping them would + // either never heal or mutate during the read-only verify. + assert!(check_args("/proj").iter().any(|a| a == "--check")); + assert!(!apply_args("/proj").iter().any(|a| a == "--check")); + // Both must stay cargo-scoped and offline regardless. + for args in [check_args("/proj"), apply_args("/proj")] { + assert!(args.iter().any(|a| a == "--offline"), "{args:?}"); + assert!(args.windows(2).any(|w| w == ["--ecosystems", "cargo"]), "{args:?}"); + assert!(args.windows(2).any(|w| w == ["--cwd", "/proj"]), "{args:?}"); + } + } + #[test] fn apply_args_are_offline_cargo_scoped() { assert_eq!( @@ -100,6 +146,18 @@ mod tests { ); } + /// The probe and heal must differ by EXACTLY `--check` — same ecosystem + /// scope, offline flag, and cwd. Complements `check_is_read_only_and_apply_heals` + /// (which checks presence) by pinning that nothing else diverges. + #[test] + fn probe_and_heal_differ_only_by_check() { + let probe_without_check: Vec = check_args("/proj") + .into_iter() + .filter(|a| a != "--check") + .collect(); + assert_eq!(probe_without_check, apply_args("/proj")); + } + // ── single fail-closed mode: decide_initial ────────────────────── #[test] fn decide_initial_in_sync_proceeds() { @@ -139,4 +197,44 @@ mod tests { let m = fail_message_after_heal(&Probe::ProbeError("boom".to_string()), ""); assert!(m.contains("could not run") && m.contains("boom"), "{m}"); } + + #[test] + fn probe_error_message_is_consistent_initial_and_after_heal() { + // A CLI that can't run must produce the SAME diagnostic whether it fails + // the initial probe or the re-probe after a heal — both route through the + // one helper. Guards against the two messages drifting apart. + let initial = match decide_initial(&Probe::ProbeError("zap".to_string())) { + Action::Fail(m) => m, + other => panic!("probe error must fail-closed, got {other:?}"), + }; + let after_heal = fail_message_after_heal(&Probe::ProbeError("zap".to_string()), ""); + assert_eq!(initial, after_heal); + } + + #[test] + fn after_heal_drift_omits_detail_when_blank() { + // A blank / whitespace-only detail must not produce a dangling "detail:" + // line with nothing after it. + let m = fail_message_after_heal(&Probe::Drift, " \n "); + assert!(m.contains("could NOT be reconciled"), "{m}"); + assert!(!m.contains("detail:"), "blank detail must be dropped: {m}"); + } + + #[test] + fn after_heal_in_sync_ignores_detail() { + // The "regenerated, re-run" path describes a successful heal; probe output + // (relevant only to the unrecoverable Drift case) must not leak into it. + let m = fail_message_after_heal(&Probe::InSync, "stale copy of foo@1.2.3"); + assert!(!m.contains("stale copy of foo@1.2.3"), "{m}"); + } + + #[test] + fn after_heal_drift_trims_surrounding_whitespace_from_detail() { + // Non-blank detail is surfaced on its own line, trimmed — no trailing + // blank after "detail:" and no leading indentation from the CLI output. + let m = fail_message_after_heal(&Probe::Drift, " cargo: drift on serde \n"); + assert!(m.contains("\n detail: cargo: drift on serde"), "{m}"); + assert!(!m.contains("detail: "), "leading whitespace must be trimmed: {m}"); + assert!(!m.ends_with(' ') && !m.ends_with('\n'), "trailing whitespace: {m:?}"); + } } From dfe7bbcded491a40e670a6bf16fe5d359a72b5bb Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Mon, 8 Jun 2026 12:52:59 -0400 Subject: [PATCH 2/4] fix(ci): green the test/clippy jobs broken by the sweep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three CI-blocking fixes surfaced by the failing checks (clippy + test on all OSes + coverage + test-release): - clippy: `map_or(true, ..)` -> `is_none_or(..)` in manifest/schema.rs (clippy 1.93 `unnecessary_map_or`, denied via `-D warnings`). - rollback_dispatch_branch_golang: the sweep added a project-local go redirect rollback backend. In local mode go rolls back by dropping the `replace` redirect and leaves the module cache pristine, so it never restores cache bytes (verified: local mode reports success without touching the file; global mode genuinely restores). The dispatch test's byte-restore contract only holds on the in-place/global path — the go analog of the cargo test's `vendor/` in-place layout — so drive that one fixture in `--global` mode. - output_helpers_e2e: the sweep's severity-colour-inversion fix flipped critical->bright-red(91)/high->red(31) and updated the in-crate unit tests, but this integration file still asserted the old swapped codes. cargo test is fail-fast, so it aborted on the golang failure before ever reaching this binary in CI; surfaced via a local `--no-fail-fast` run. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../tests/ecosystem_dispatch_e2e.rs | 43 +++++++++++++++---- .../tests/output_helpers_e2e.rs | 16 ++++--- .../socket-patch-core/src/manifest/schema.rs | 2 +- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/crates/socket-patch-cli/tests/ecosystem_dispatch_e2e.rs b/crates/socket-patch-cli/tests/ecosystem_dispatch_e2e.rs index aeb84b4..66ed204 100644 --- a/crates/socket-patch-cli/tests/ecosystem_dispatch_e2e.rs +++ b/crates/socket-patch-cli/tests/ecosystem_dispatch_e2e.rs @@ -428,9 +428,22 @@ struct RollbackFixture { verify_file: PathBuf, /// Extra env vars the crawler needs (cache locations, experimental gates). envs: Vec<(String, String)>, -} - -fn run_rollback(cwd: &Path, ecosystem: &str, envs: &[(String, String)]) -> (i32, Value) { + /// Run the rollback in `--global` mode. Required for ecosystems whose + /// project-local backend is a *redirect* (golang): in local mode the + /// patched bytes live in a project-local copy and the module cache is left + /// pristine, so rollback drops the redirect rather than restoring the cache + /// file in place. Byte-restore — the contract `assert_rollback_restored` + /// verifies — only happens on the global/in-place path (the analog of + /// cargo's `vendor/` in-place layout). Defaults to local mode. + global: bool, +} + +fn run_rollback( + cwd: &Path, + ecosystem: &str, + global: bool, + envs: &[(String, String)], +) -> (i32, Value) { let mut cmd = Command::new(binary()); cmd.args([ "rollback", @@ -439,9 +452,11 @@ fn run_rollback(cwd: &Path, ecosystem: &str, envs: &[(String, String)]) -> (i32, "--ecosystems", ecosystem, "--silent", - ]) - .current_dir(cwd) - .env_remove("SOCKET_API_TOKEN"); + ]); + if global { + cmd.arg("--global"); + } + cmd.current_dir(cwd).env_remove("SOCKET_API_TOKEN"); for (k, v) in envs { cmd.env(k, v); } @@ -455,7 +470,7 @@ fn run_rollback(cwd: &Path, ecosystem: &str, envs: &[(String, String)]) -> (i32, /// Drive a genuine rollback for `fixture` and assert it discovered the /// package, restored the file, and reported success for the exact PURL. fn assert_rollback_restored(cwd: &Path, ecosystem: &str, fixture: &RollbackFixture) { - let (code, env) = run_rollback(cwd, ecosystem, &fixture.envs); + let (code, env) = run_rollback(cwd, ecosystem, fixture.global, &fixture.envs); assert_eq!( code, 0, "rollback --ecosystems={ecosystem}: expected exit 0; env={env}" @@ -524,7 +539,7 @@ fn assert_rollback_restored(cwd: &Path, ecosystem: &str, fixture: &RollbackFixtu /// file is left untouched (still PATCHED). Mirrors `assert_apply_not_dispatched` /// for the separate `find_packages_for_rollback` code path. fn assert_rollback_not_dispatched(cwd: &Path, ecosystem: &str, fixture: &RollbackFixture) { - let (code, env) = run_rollback(cwd, ecosystem, &fixture.envs); + let (code, env) = run_rollback(cwd, ecosystem, fixture.global, &fixture.envs); assert_eq!( code, 0, "rollback --ecosystems={ecosystem}: out-of-scope rollback should be a clean no-op (exit 0); env={env}" @@ -575,6 +590,7 @@ fn fixture_npm(root: &Path) -> RollbackFixture { purl: purl.to_string(), verify_file, envs: vec![], + global: false, } } @@ -605,6 +621,7 @@ fn fixture_pypi(root: &Path) -> RollbackFixture { purl: purl.to_string(), verify_file, envs: vec![], + global: false, } } @@ -626,6 +643,7 @@ fn fixture_gem(root: &Path) -> RollbackFixture { purl: purl.to_string(), verify_file, envs: vec![], + global: false, } } @@ -694,6 +712,7 @@ fn rollback_dispatch_branch_cargo() { purl: purl.to_string(), verify_file, envs: vec![], + global: false, }; assert_rollback_restored(root, "cargo", &fixture); } @@ -716,6 +735,11 @@ fn rollback_dispatch_branch_golang() { purl: purl.to_string(), verify_file, envs: vec![("GOMODCACHE".to_string(), cache.display().to_string())], + // Local-go rolls back by dropping the project-local `replace` redirect + // and leaves the module cache pristine, so it never restores cache + // bytes. Drive the global/in-place path to exercise byte-restore — the + // go analog of the cargo test's `vendor/` in-place layout. + global: true, }; assert_rollback_restored(root, "golang", &fixture); } @@ -750,6 +774,7 @@ fn rollback_dispatch_branch_maven() { ("MAVEN_REPO_LOCAL".to_string(), repo.display().to_string()), ("SOCKET_EXPERIMENTAL_MAVEN".to_string(), "1".to_string()), ], + global: false, }; assert_rollback_restored(root, "maven", &fixture); } @@ -778,6 +803,7 @@ fn rollback_dispatch_branch_composer() { purl: purl.to_string(), verify_file, envs: vec![], + global: false, }; assert_rollback_restored(root, "composer", &fixture); } @@ -805,6 +831,7 @@ fn rollback_dispatch_branch_nuget() { purl: purl.to_string(), verify_file, envs: vec![("SOCKET_EXPERIMENTAL_NUGET".to_string(), "1".to_string())], + global: false, }; assert_rollback_restored(root, "nuget", &fixture); } diff --git a/crates/socket-patch-cli/tests/output_helpers_e2e.rs b/crates/socket-patch-cli/tests/output_helpers_e2e.rs index 7e07c12..a68677f 100644 --- a/crates/socket-patch-cli/tests/output_helpers_e2e.rs +++ b/crates/socket-patch-cli/tests/output_helpers_e2e.rs @@ -18,14 +18,16 @@ fn format_severity_no_color_returns_input_verbatim() { } #[test] -fn format_severity_critical_wraps_in_red() { - // Exact envelope: red open + verbatim text + reset, nothing else. - assert_eq!(format_severity("critical", true), "\x1b[31mcritical\x1b[0m"); +fn format_severity_critical_wraps_in_bright_red() { + // Exact envelope: bright-red open + verbatim text + reset, nothing else. + // Critical is the most prominent colour (bright red, 91) — strictly more + // prominent than high (plain red, 31). + assert_eq!(format_severity("critical", true), "\x1b[91mcritical\x1b[0m"); } #[test] -fn format_severity_high_wraps_in_bright_red() { - assert_eq!(format_severity("high", true), "\x1b[91mhigh\x1b[0m"); +fn format_severity_high_wraps_in_red() { + assert_eq!(format_severity("high", true), "\x1b[31mhigh\x1b[0m"); } #[test] @@ -52,8 +54,8 @@ fn format_severity_case_insensitive() { // text must be the caller's verbatim, original-case string (production // wraps `{s}`, not the lowercased key). Exact-equality catches both a // miscoloured branch and any impl that lowercases the rendered text. - assert_eq!(format_severity("CRITICAL", true), "\x1b[31mCRITICAL\x1b[0m"); - assert_eq!(format_severity("High", true), "\x1b[91mHigh\x1b[0m"); + assert_eq!(format_severity("CRITICAL", true), "\x1b[91mCRITICAL\x1b[0m"); + assert_eq!(format_severity("High", true), "\x1b[31mHigh\x1b[0m"); assert_eq!(format_severity("MEDIUM", true), "\x1b[33mMEDIUM\x1b[0m"); assert_eq!(format_severity("Low", true), "\x1b[36mLow\x1b[0m"); } diff --git a/crates/socket-patch-core/src/manifest/schema.rs b/crates/socket-patch-core/src/manifest/schema.rs index afb57cd..bfcc128 100644 --- a/crates/socket-patch-core/src/manifest/schema.rs +++ b/crates/socket-patch-core/src/manifest/schema.rs @@ -83,7 +83,7 @@ impl SetupConfig { /// on-disk `.socket/manifest.json` byte-stable regardless of which in-memory /// representation produced it. fn setup_is_absent(setup: &Option) -> bool { - setup.as_ref().map_or(true, SetupConfig::is_empty) + setup.as_ref().is_none_or(SetupConfig::is_empty) } /// The top-level patch manifest structure. From d7deed9044255ef42e3ce28a216c424aef2c4cf2 Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Mon, 8 Jun 2026 13:23:29 -0400 Subject: [PATCH 3/4] fix(test): make manifest-unreadable list test cross-platform MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `manifest_path_through_regular_file_reports_unreadable_via_binary` nested the manifest under a regular file (`/manifest.json`), assuming the OS rejects the read with a non-absence error. That holds on Unix (ENOTDIR) but NOT on Windows, where traversing through a file is `ERROR_PATH_NOT_FOUND` (NotFound) — legitimately classified as `manifest_not_found`, failing the assertion on windows-latest. Point the manifest path at a directory instead: reading it fails with a non-NotFound error on every platform (Unix `IsADirectory`, Windows `PermissionDenied`), so the "present-but-unreadable → manifest_unreadable" contract is exercised portably. Renamed accordingly. This was masked on the first push: cargo test is fail-fast and the Windows run aborted at this binary (sorts before the now-fixed golang/output tests). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../socket-patch-cli/tests/cli_parse_list.rs | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/crates/socket-patch-cli/tests/cli_parse_list.rs b/crates/socket-patch-cli/tests/cli_parse_list.rs index 5f2efc2..f04ed0e 100644 --- a/crates/socket-patch-cli/tests/cli_parse_list.rs +++ b/crates/socket-patch-cli/tests/cli_parse_list.rs @@ -392,21 +392,23 @@ fn missing_manifest_under_valid_cwd_reports_manifest_not_found_via_binary() { } #[test] -fn manifest_path_through_regular_file_reports_unreadable_via_binary() { - // A genuine I/O error reaching the manifest must be `manifest_unreadable`, - // never `manifest_not_found`. Here the manifest path is nested *under a - // regular file* (`/manifest.json`), so the OS rejects the read with - // ENOTDIR — an I/O error, not file-absence. +fn manifest_path_is_existing_directory_reports_unreadable_via_binary() { + // A genuine I/O error reaching an *existing* path must be + // `manifest_unreadable`, never `manifest_not_found`. Here the manifest path + // points at a directory, so the read fails with a non-absence I/O error + // (Unix `IsADirectory` / Windows `PermissionDenied`) — present, but + // unreadable. (We use a directory rather than a `/manifest` + // path because the latter is `ENOTDIR` on Unix but a NotFound-class error + // on Windows, where traversing through a file is legitimately "path not + // found"; a directory yields a non-NotFound error on every platform.) // // Regression: `run()` used to stat the path with `tokio::fs::metadata` - // first and treat ANY stat failure as `manifest_not_found`, so this case - // (and an unreadable parent dir, etc.) was misreported as a missing file. - // Removing that pre-check lets `read_manifest`'s I/O error classify it - // correctly. + // first and treat ANY stat failure as `manifest_not_found`, masking real + // I/O errors. Removing that pre-check lets `read_manifest`'s I/O error + // classify it correctly. let tmp = tempfile::tempdir().unwrap(); - let blocker = tmp.path().join("not-a-dir"); - std::fs::write(&blocker, b"i am a regular file").unwrap(); - let manifest_path = blocker.join("manifest.json"); + let manifest_path = tmp.path().join("manifest-is-a-dir"); + std::fs::create_dir(&manifest_path).unwrap(); let out = run_list_binary( tmp.path(), From f968628b91475147f8de4fc40915d4c929f7280b Mon Sep 17 00:00:00 2001 From: Mikola Lysenko Date: Mon, 8 Jun 2026 13:42:34 -0400 Subject: [PATCH 4/4] fix(test): windows path separator in nested-workspace find test `test_find_recurses_into_nested_workspace` matched walked filesystem paths with `str::ends_with("packages/inner/package.json")`, which fails on Windows where `WalkDir` yields `\`-separated paths. Match on the `PathBuf` via `Path::ends_with`, which compares whole components and accepts `/` in the pattern on every platform (Windows treats both `/` and `\` as separators). This is the only walked-real-path assertion that used a forward-slash string literal; the CLI/manifest path assertions elsewhere operate on forward-slash-normalized manifest keys (echoed verbatim) and are unaffected. Surfaced by fail-fast: the Windows run aborted here (socket-patch-core --lib) only after the previously-fixed cli_parse_list binary passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/socket-patch-core/src/package_json/find.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/crates/socket-patch-core/src/package_json/find.rs b/crates/socket-patch-core/src/package_json/find.rs index fc90f14..3e0254c 100644 --- a/crates/socket-patch-core/src/package_json/find.rs +++ b/crates/socket-patch-core/src/package_json/find.rs @@ -770,12 +770,23 @@ mod tests { .iter() .map(|f| f.path.to_string_lossy().into_owned()) .collect(); + // `Path::ends_with` matches whole path components and treats `/` in the + // pattern as a separator on every platform (Windows accepts both `/` + // and `\`), so this is correct regardless of the OS path separator — + // unlike a byte-wise `str::ends_with` on a forward-slash literal, which + // fails on Windows' `\`-separated paths. assert!( - paths.iter().any(|p| p.ends_with("packages/inner/package.json")), + result + .files + .iter() + .any(|f| f.path.ends_with("packages/inner/package.json")), "first-level member must be found: {paths:?}" ); assert!( - paths.iter().any(|p| p.ends_with("packages/inner/sub/leaf/package.json")), + result + .files + .iter() + .any(|f| f.path.ends_with("packages/inner/sub/leaf/package.json")), "nested-workspace leaf must be found via recursion: {paths:?}" ); // root + inner + leaf, no duplicates.