diff --git a/.agents/skills/debug-openshell-cluster/SKILL.md b/.agents/skills/debug-openshell-cluster/SKILL.md index ecc5aff7f..cb9bd060c 100644 --- a/.agents/skills/debug-openshell-cluster/SKILL.md +++ b/.agents/skills/debug-openshell-cluster/SKILL.md @@ -173,6 +173,24 @@ If the gateway exits with `failed to read sandbox JWT signing key from `sandbox-jwt` secret at `/etc/openshell-jwt`. The sandbox JWT mount is required even when local Helm values disable TLS. +If `server.providerTokenGrants.spiffe.enabled=true`, the gateway should still +render `[openshell.gateway.gateway_jwt]` and mount the `sandbox-jwt` Secret. +SPIRE is used only by sandbox pods for dynamic provider token grants. Verify +that SPIRE is installed, the CSI driver is available, and the Kubernetes driver +config includes `provider_spiffe_workload_api_socket_path`: + +```bash +helm -n openshell get values openshell | grep -E 'providerTokenGrants|workloadApiSocketPath' +kubectl get pods -A | grep -E 'spire|spiffe' +kubectl -n openshell get configmap openshell-config -o yaml | grep provider_spiffe_workload_api_socket_path +``` + +Sandbox pods using provider token grants should have an +`openshell.io/sandbox-id` annotation, an `openshell.ai/managed-by=openshell` +label, supervisor env vars `OPENSHELL_K8S_SA_TOKEN_FILE` and +`OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET`, plus both the projected +`openshell-sa-token` volume and the `spiffe-workload-api` CSI volume. + Check the image references currently used by the gateway deployment: ```bash diff --git a/.agents/skills/helm-dev-environment/SKILL.md b/.agents/skills/helm-dev-environment/SKILL.md index 878d6e4da..10813792d 100644 --- a/.agents/skills/helm-dev-environment/SKILL.md +++ b/.agents/skills/helm-dev-environment/SKILL.md @@ -178,6 +178,23 @@ To remove Keycloak: mise run keycloak:k8s:teardown ``` +### SPIRE / SPIFFE Provider Token Grants + +Skaffold can install SPIRE with the SPIFFE hardened Helm charts. To activate +SPIFFE JWT-SVIDs for dynamic provider token grants: + +1. Uncomment the `spire-crds` and `spire` releases in `deploy/helm/openshell/skaffold.yaml` +2. Uncomment `#- ci/values-spire.yaml` in the OpenShell release values files +3. Redeploy: `mise run helm:skaffold:run` + +`ci/values-spire-stack.yaml` configures the local SPIRE trust domain as +`openshell.local` and adds a `ClusterSPIFFEID` that maps sandbox pod +annotations to `spiffe://openshell.local/openshell/sandbox/`. +OpenShell mounts the SPIFFE CSI Workload API socket at +`/spiffe-workload-api/spire-agent.sock` into sandbox pods for provider token +grants. Supervisor-to-gateway authentication remains on the Kubernetes +ServiceAccount bootstrap and gateway-minted sandbox JWT path. + --- ## Cluster Lifecycle (suspend/resume) @@ -206,6 +223,8 @@ mise run helm:k3s:status | `deploy/helm/openshell/ci/values-gateway.yaml` | Envoy Gateway GRPCRoute + Gateway overlay | | `deploy/helm/openshell/ci/values-high-availability.yaml` | HA test overlay (`replicaCount: 2` with external PostgreSQL Secret) | | `deploy/helm/openshell/ci/values-keycloak.yaml` | Keycloak OIDC overlay | +| `deploy/helm/openshell/ci/values-spire.yaml` | SPIFFE/SPIRE provider token grant overlay | +| `deploy/helm/openshell/ci/values-spire-stack.yaml` | SPIRE hardened chart values for local dev | | `deploy/helm/openshell/ci/values-tls-disabled.yaml` | Lint-only: TLS + auth disabled (reverse-proxy edge termination) | | `deploy/kube/manifests/envoy-gateway-openshell.yaml` | GatewayClass for Envoy Gateway (`mise run helm:gateway:apply`) | | `tasks/scripts/helm-k3s-local.sh` | k3d cluster create/delete/start/stop/status | diff --git a/Cargo.lock b/Cargo.lock index d0cd77f85..148c10c15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -156,6 +156,15 @@ dependencies = [ "thiserror 2.0.18", ] +[[package]] +name = "arc-swap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" +dependencies = [ + "rustversion", +] + [[package]] name = "argon2" version = "0.5.3" @@ -217,28 +226,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "async-stream" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" -dependencies = [ - "async-stream-impl", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-stream-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - [[package]] name = "async-trait" version = "0.1.89" @@ -303,40 +290,13 @@ dependencies = [ "fs_extra", ] -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core 0.4.5", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit 0.7.3", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.3", - "tower-layer", - "tower-service", -] - [[package]] name = "axum" version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" dependencies = [ - "axum-core 0.5.6", + "axum-core", "base64 0.22.1", "bytes", "form_urlencoded", @@ -347,7 +307,7 @@ dependencies = [ "hyper", "hyper-util", "itoa", - "matchit 0.8.4", + "matchit", "memchr", "mime", "percent-encoding", @@ -366,26 +326,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - [[package]] name = "axum-core" version = "0.5.6" @@ -1855,19 +1795,13 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.14.0", + "indexmap", "slab", "tokio", "tokio-util", "tracing", ] -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" - [[package]] name = "hashbrown" version = "0.14.5" @@ -2149,7 +2083,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.3", + "socket2", "tokio", "tower-service", "tracing", @@ -2328,16 +2262,6 @@ dependencies = [ "quote", ] -[[package]] -name = "indexmap" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" -dependencies = [ - "autocfg", - "hashbrown 0.12.3", -] - [[package]] name = "indexmap" version = "2.14.0" @@ -2957,12 +2881,6 @@ dependencies = [ "regex-automata", ] -[[package]] -name = "matchit" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" - [[package]] name = "matchit" version = "0.8.4" @@ -3011,7 +2929,7 @@ dependencies = [ "http-body-util", "hyper", "hyper-util", - "indexmap 2.14.0", + "indexmap", "ipnet", "metrics", "metrics-util", @@ -3469,7 +3387,8 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tonic", - "tonic-build", + "tonic-prost", + "tonic-prost-build", "url", ] @@ -3614,11 +3533,13 @@ dependencies = [ name = "openshell-providers" version = "0.0.0" dependencies = [ + "glob", "openshell-core", "serde", "serde_json", "serde_yml", "thiserror 2.0.18", + "url", ] [[package]] @@ -3665,6 +3586,7 @@ dependencies = [ "rand_core 0.6.4", "rcgen", "regorus", + "reqwest 0.12.28", "russh", "rustix 1.1.4", "rustls", @@ -3675,6 +3597,7 @@ dependencies = [ "serde_yml", "sha1 0.10.6", "sha2 0.10.9", + "spiffe", "temp-env", "tempfile", "thiserror 2.0.18", @@ -3696,11 +3619,12 @@ version = "0.0.0" dependencies = [ "anyhow", "async-trait", - "axum 0.8.9", + "axum", "bytes", "clap", "futures", "futures-util", + "glob", "hex", "hmac", "http", @@ -4034,12 +3958,13 @@ dependencies = [ [[package]] name = "petgraph" -version = "0.7.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", - "indexmap 2.14.0", + "hashbrown 0.15.5", + "indexmap", ] [[package]] @@ -4277,9 +4202,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -4287,19 +4212,20 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools 0.14.0", "log", "multimap", - "once_cell", "petgraph", "prettyplease", "prost", "prost-types", + "pulldown-cmark", + "pulldown-cmark-to-cmark", "regex", "syn 2.0.117", "tempfile", @@ -4307,9 +4233,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools 0.14.0", @@ -4320,9 +4246,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] @@ -4336,6 +4262,26 @@ dependencies = [ "autotools", ] +[[package]] +name = "pulldown-cmark" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad" +dependencies = [ + "bitflags", + "memchr", + "unicase", +] + +[[package]] +name = "pulldown-cmark-to-cmark" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50793def1b900256624a709439404384204a5dc3a6ec580281bfaac35e882e90" +dependencies = [ + "pulldown-cmark", +] + [[package]] name = "quanta" version = "0.12.6" @@ -4364,7 +4310,7 @@ dependencies = [ "quinn-udp", "rustc-hash 2.1.2", "rustls", - "socket2 0.6.3", + "socket2", "thiserror 2.0.18", "tokio", "tracing", @@ -4402,7 +4348,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.3", + "socket2", "tracing", "windows-sys 0.60.2", ] @@ -5258,7 +5204,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.14.0", + "indexmap", "itoa", "ryu", "serde", @@ -5271,7 +5217,7 @@ version = "0.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59e2dd588bf1597a252c3b920e0143eb99b0f76e4e082f4c92ce34fbc9e71ddd" dependencies = [ - "indexmap 2.14.0", + "indexmap", "itoa", "libyml", "memchr", @@ -5454,22 +5400,40 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.10" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] -name = "socket2" -version = "0.6.3" +name = "spiffe" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "6d3f9e45e9e53f03cb452fe0f050101a9280ff4f4214e326037bc8275284d906" dependencies = [ - "libc", - "windows-sys 0.61.2", + "arc-swap", + "base64ct", + "fastrand", + "futures", + "hyper-util", + "log", + "prost", + "prost-types", + "serde", + "serde_json", + "thiserror 2.0.18", + "time", + "tokio", + "tokio-util", + "tonic", + "tonic-prost", + "tower 0.5.3", + "tracing", + "url", + "zeroize", ] [[package]] @@ -5532,7 +5496,7 @@ dependencies = [ "futures-util", "hashbrown 0.15.5", "hashlink", - "indexmap 2.14.0", + "indexmap", "log", "memchr", "once_cell", @@ -6098,7 +6062,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.3", + "socket2", "tokio-macros", "windows-sys 0.61.2", ] @@ -6204,7 +6168,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.14.0", + "indexmap", "serde", "serde_spanned", "toml_datetime", @@ -6220,13 +6184,12 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tonic" -version = "0.12.3" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +checksum = "ac2a5518c70fa84342385732db33fb3f44bc4cc748936eb5833d2df34d6445ef" dependencies = [ - "async-stream", "async-trait", - "axum 0.7.9", + "axum", "base64 0.22.1", "bytes", "h2", @@ -6238,14 +6201,13 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", "rustls-native-certs", - "rustls-pemfile", - "socket2 0.5.10", + "socket2", + "sync_wrapper", "tokio", "tokio-rustls", "tokio-stream", - "tower 0.4.13", + "tower 0.5.3", "tower-layer", "tower-service", "tracing", @@ -6253,9 +6215,32 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.12.3" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +checksum = "c68f61875ac5293cf72e6c8cf0158086428c82c37229e98c840878f1706b0322" +dependencies = [ + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "tonic-prost" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50849f68853be452acf590cde0b146665b8d507b3b8af17261df47e02c209ea0" +dependencies = [ + "bytes", + "prost", + "tonic", +] + +[[package]] +name = "tonic-prost-build" +version = "0.14.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "654e5643eff75d7f8c99197ce1440ed19a3474eada74c12bbac488b2cafdae27" dependencies = [ "prettyplease", "proc-macro2", @@ -6263,6 +6248,8 @@ dependencies = [ "prost-types", "quote", "syn 2.0.117", + "tempfile", + "tonic-build", ] [[package]] @@ -6273,11 +6260,8 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ "futures-core", "futures-util", - "indexmap 1.9.3", "pin-project", "pin-project-lite", - "rand 0.8.6", - "slab", "tokio", "tokio-util", "tower-layer", @@ -6293,9 +6277,12 @@ checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", + "indexmap", "pin-project-lite", + "slab", "sync_wrapper", "tokio", + "tokio-util", "tower-layer", "tower-service", "tracing", @@ -6782,7 +6769,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap 2.14.0", + "indexmap", "wasm-encoder", "wasmparser", ] @@ -6808,7 +6795,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags", "hashbrown 0.15.5", - "indexmap 2.14.0", + "indexmap", "semver", ] @@ -7373,7 +7360,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.14.0", + "indexmap", "prettyplease", "syn 2.0.117", "wasm-metadata", @@ -7404,7 +7391,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags", - "indexmap 2.14.0", + "indexmap", "log", "serde", "serde_derive", @@ -7423,7 +7410,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.14.0", + "indexmap", "log", "semver", "serde", @@ -7637,7 +7624,7 @@ dependencies = [ "flate2", "getrandom 0.4.2", "hmac", - "indexmap 2.14.0", + "indexmap", "lzma-rust2", "memchr", "pbkdf2", diff --git a/Cargo.toml b/Cargo.toml index 079e1e172..86025646a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,10 +17,11 @@ repository = "https://github.com/NVIDIA/OpenShell" tokio = { version = "1.43", features = ["full"] } # gRPC/Protobuf -tonic = "0.12" -tonic-build = "0.12" -prost = "0.13" -prost-types = "0.13" +tonic = "0.14" +tonic-prost = "0.14" +tonic-prost-build = "0.14" +prost = "0.14" +prost-types = "0.14" # HTTP server axum = { version = "0.8", features = ["ws"] } @@ -87,6 +88,7 @@ sha2 = "0.10" rand = "0.9" jsonwebtoken = "9" getrandom = "0.3" +spiffe = { version = "0.15", default-features = false, features = ["workload-api-jwt", "tracing"] } # Filesystem embedding include_dir = "0.7" diff --git a/architecture/gateway.md b/architecture/gateway.md index 35e2d6659..c8bb695ea 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -47,9 +47,13 @@ Supported auth modes: | Cloudflare JWT | Edge-authenticated deployments where Cloudflare Access supplies identity. | | OIDC | Bearer-token auth for users, with browser PKCE or client credentials login. | -Sandbox supervisor RPCs authenticate with gateway-minted sandbox JWTs when that -authenticator is configured; mTLS does not grant sandbox identity. User-facing -mutations are authorized by role policy when OIDC or edge identity is enabled. +Sandbox supervisor RPCs authenticate with explicit sandbox credentials; mTLS +does not grant sandbox identity. Kubernetes deployments use the +gateway-minted JWT bootstrap path: the supervisor starts with a projected +ServiceAccount token, exchanges it for a gateway-minted sandbox JWT, and uses +that JWT on subsequent gateway RPCs. +User-facing mutations are authorized by role policy when OIDC or edge identity +is enabled. Sandbox secrets are gateway-signed JWTs bound to a single sandbox ID. Docker, Podman, and VM drivers deliver the initial token through supervisor-only diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 4bc6803eb..e60b727a5 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -70,10 +70,24 @@ agent process and SSH child processes. Driver-controlled environment variables override template values so sandbox images cannot spoof identity, callback, or relay settings. +Supervisor bootstrap identity is not inherited by agent child processes. When +provider token grants mount a SPIFFE Workload API socket, the socket path must +live under a dedicated directory. Children also enter a private mount namespace +where that socket directory is hidden before privilege drop. + Credential placeholders in proxied HTTP requests can be resolved by the proxy when policy allows the target endpoint. Secrets must not be logged in OCSF or plain tracing output. +Provider profiles can also declare dynamic token grants. For matching HTTP +endpoints, the supervisor obtains a SPIFFE JWT-SVID from the local Workload API, +exchanges it for an OAuth2 access token, caches the token, and injects it as an +`Authorization: Bearer` header before forwarding the request. Token grant +endpoints are HTTPS-only except for loopback and Kubernetes service DNS hosts, +and returned access tokens must be bearer-compatible before they are cached or +injected. Token response lifetimes are capped and cached with an expiry margin +unless a profile supplies an explicit cache TTL override. + ## Connect and Logs The supervisor runs an SSH server on a Unix socket inside the sandbox. The diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index 4d7241de3..577fb73b9 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -30,7 +30,7 @@ prost-types = { workspace = true } tokio = { workspace = true } # gRPC client -tonic = { workspace = true, features = ["tls", "tls-native-roots"] } +tonic = { workspace = true, features = ["tls-native-roots"] } # CLI chrono = "0.4" diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 19fe2df83..109b67596 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -716,7 +716,7 @@ impl From for openshell_cli::ssh::Editor { #[derive(Subcommand, Debug)] enum ProviderCommands { /// Create a provider config. - #[command(group = clap::ArgGroup::new("cred_source").required(true).args(["from_existing", "credentials", "from_gcloud_adc"]), help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] + #[command(group = clap::ArgGroup::new("cred_source").required(true).args(["from_existing", "credentials", "from_gcloud_adc", "runtime_credentials"]), help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] Create { /// Provider name. #[arg(long)] @@ -727,23 +727,27 @@ enum ProviderCommands { provider_type: String, /// Load provider credentials/config from existing local state. - #[arg(long, conflicts_with_all = ["credentials", "from_gcloud_adc"])] + #[arg(long, conflicts_with_all = ["credentials", "from_gcloud_adc", "runtime_credentials"])] from_existing: bool, /// Provider credential pair (`KEY=VALUE`) or env lookup key (`KEY`). #[arg( long = "credential", value_name = "KEY[=VALUE]", - conflicts_with_all = ["from_existing", "from_gcloud_adc"] + conflicts_with_all = ["from_existing", "from_gcloud_adc", "runtime_credentials"] )] credentials: Vec, /// Configure credentials from gcloud Application Default Credentials /// (`~/.config/gcloud/application_default_credentials.json`). /// Only valid for google-vertex-ai providers. - #[arg(long, group = "cred_source", conflicts_with_all = ["from_existing", "credentials"])] + #[arg(long, group = "cred_source", conflicts_with_all = ["from_existing", "credentials", "runtime_credentials"])] from_gcloud_adc: bool, + /// Create a provider whose required credentials are resolved at runtime by the gateway/sandbox. + #[arg(long, conflicts_with_all = ["from_existing", "credentials", "from_gcloud_adc"])] + runtime_credentials: bool, + /// Provider config key/value pair. #[arg(long = "config", value_name = "KEY=VALUE")] config: Vec, @@ -2834,15 +2838,17 @@ async fn main() -> Result<()> { from_existing, credentials, from_gcloud_adc, + runtime_credentials, config, } => { - run::provider_create( + run::provider_create_with_options( endpoint, &name, provider_type.as_str(), from_existing, &credentials, from_gcloud_adc, + runtime_credentials, &config, &tls, ) @@ -3912,6 +3918,60 @@ mod tests { } } + #[test] + fn provider_create_requires_credential_source() { + let err = Cli::try_parse_from([ + "openshell", + "provider", + "create", + "--name", + "spiffe-token-demo", + "--type", + "spiffe-token-demo", + ]) + .expect_err("provider create should require a credential source"); + + assert!(err.to_string().contains("--runtime-credentials")); + } + + #[test] + fn provider_create_accepts_runtime_credentials() { + let cli = Cli::try_parse_from([ + "openshell", + "provider", + "create", + "--name", + "spiffe-token-demo", + "--type", + "spiffe-token-demo", + "--runtime-credentials", + ]) + .expect("provider create should parse runtime credentials"); + + match cli.command { + Some(Commands::Provider { + command: + Some(ProviderCommands::Create { + name, + provider_type, + from_existing, + credentials, + from_gcloud_adc, + runtime_credentials, + .. + }), + }) => { + assert_eq!(name, "spiffe-token-demo"); + assert_eq!(provider_type, "spiffe-token-demo"); + assert!(!from_existing); + assert!(credentials.is_empty()); + assert!(!from_gcloud_adc); + assert!(runtime_credentials); + } + other => panic!("expected provider create command, got: {other:?}"), + } + } + #[test] fn provider_create_rejects_from_gcloud_adc_with_from_existing() { let err = Cli::try_parse_from([ diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 7290ac05e..c303dd601 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -4468,7 +4468,8 @@ fn missing_credentials_error(provider_type: &str) -> miette::Report { miette::miette!( "no credentials resolved for provider type '{provider_type}'. \ - Use --credential KEY[=VALUE] or --from-existing with the appropriate env vars set." + Use --credential KEY[=VALUE], --runtime-credentials for runtime-resolved profile credentials, \ + or --from-existing with the appropriate env vars set." ) } @@ -4483,14 +4484,45 @@ pub async fn provider_create( config: &[String], tls: &TlsOptions, ) -> Result<()> { - if from_gcloud_adc && (from_existing || !credentials.is_empty()) { + provider_create_with_options( + server, + name, + provider_type, + from_existing, + credentials, + from_gcloud_adc, + false, + config, + tls, + ) + .await +} + +#[allow(clippy::too_many_arguments)] +pub async fn provider_create_with_options( + server: &str, + name: &str, + provider_type: &str, + from_existing: bool, + credentials: &[String], + from_gcloud_adc: bool, + runtime_credentials: bool, + config: &[String], + tls: &TlsOptions, +) -> Result<()> { + if from_gcloud_adc && (from_existing || !credentials.is_empty() || runtime_credentials) { return Err(miette::miette!( - "--from-gcloud-adc cannot be combined with --from-existing or --credential" + "--from-gcloud-adc cannot be combined with --from-existing or --credential; it also cannot be combined with --runtime-credentials" )); } - if from_existing && !credentials.is_empty() { + if from_existing && (!credentials.is_empty() || runtime_credentials) { return Err(miette::miette!( - "--from-existing cannot be combined with --credential" + "--from-existing cannot be combined with --credential or --runtime-credentials" + )); + } + if runtime_credentials && !credentials.is_empty() { + return Err(miette::miette!( + "--runtime-credentials cannot be combined with --credential" )); } @@ -4553,11 +4585,25 @@ pub async fn provider_create( if from_existing { return Err(missing_credentials_error(&provider_type)); } - let allows_refresh_bootstrap = fetch_provider_profile(&mut client, &provider_type) - .await - .ok() - .is_some_and(|profile| provider_profile_allows_refresh_bootstrap(&profile)); - if !allows_refresh_bootstrap { + if !from_gcloud_adc && !runtime_credentials { + return Err(missing_credentials_error(&provider_type)); + } + let allows_empty_credentials = if runtime_credentials { + provider_profile_allows_empty_credentials( + &fetch_provider_profile(&mut client, &provider_type).await?, + ) + } else { + fetch_provider_profile(&mut client, &provider_type) + .await + .ok() + .is_some_and(|profile| provider_profile_allows_empty_credentials(&profile)) + }; + if !allows_empty_credentials { + if runtime_credentials { + return Err(miette::miette!( + "--runtime-credentials is only valid for provider profiles whose required credentials are resolved at runtime" + )); + } return Err(missing_credentials_error(&provider_type)); } } @@ -4652,8 +4698,8 @@ pub async fn provider_create( Ok(()) } -fn provider_profile_allows_refresh_bootstrap(profile: &ProviderProfile) -> bool { - ProviderTypeProfile::from_proto(profile).allows_gateway_refresh_bootstrap() +fn provider_profile_allows_empty_credentials(profile: &ProviderProfile) -> bool { + ProviderTypeProfile::from_proto(profile).allows_empty_provider_credentials() } pub async fn provider_get(server: &str, name: &str, tls: &TlsOptions) -> Result<()> { @@ -7529,7 +7575,7 @@ mod tests { inferred_provider_type, mtls_certs_exist_for_gateway, package_managed_tls_dirs, parse_cli_setting_value, parse_credential_expiry_cli_value, parse_credential_expiry_pairs, parse_credential_pairs, parse_driver_config_json, plaintext_gateway_is_remote, - progress_step_from_metadata, provider_profile_allows_refresh_bootstrap, + progress_step_from_metadata, provider_profile_allows_empty_credentials, provisioning_timeout_message, ready_false_condition_message, refresh_status_header, refresh_status_row, resolve_from, sandbox_should_persist, sandbox_upload_plan, service_expose_status_error, service_url_for_gateway, @@ -7552,8 +7598,8 @@ mod tests { }; use openshell_core::proto::{ Provider, ProviderCredentialRefresh, ProviderCredentialRefreshStatus, - ProviderCredentialRefreshStrategy, ProviderProfile, ProviderProfileCredential, - SandboxCondition, SandboxStatus, datamodel::v1::ObjectMeta, + ProviderCredentialRefreshStrategy, ProviderCredentialTokenGrant, ProviderProfile, + ProviderProfileCredential, SandboxCondition, SandboxStatus, datamodel::v1::ObjectMeta, }; struct EnvVarGuard { @@ -7780,7 +7826,7 @@ mod tests { } #[test] - fn refresh_bootstrap_requires_all_required_credentials_to_be_gateway_mintable() { + fn empty_provider_credentials_require_all_required_credentials_to_be_runtime_resolvable() { let refresh_token_profile = ProviderProfile { credentials: vec![ProviderProfileCredential { name: "MS_GRAPH_ACCESS_TOKEN".to_string(), @@ -7793,10 +7839,26 @@ mod tests { }], ..Default::default() }; - assert!(provider_profile_allows_refresh_bootstrap( + assert!(provider_profile_allows_empty_credentials( &refresh_token_profile )); + let token_grant_profile = ProviderProfile { + credentials: vec![ProviderProfileCredential { + name: "ACCESS_TOKEN".to_string(), + required: true, + token_grant: Some(ProviderCredentialTokenGrant { + token_endpoint: "https://auth.example.com/token".to_string(), + ..Default::default() + }), + ..Default::default() + }], + ..Default::default() + }; + assert!(provider_profile_allows_empty_credentials( + &token_grant_profile + )); + let mixed_static_profile = ProviderProfile { credentials: vec![ ProviderProfileCredential { @@ -7817,7 +7879,7 @@ mod tests { ], ..Default::default() }; - assert!(!provider_profile_allows_refresh_bootstrap( + assert!(!provider_profile_allows_empty_credentials( &mixed_static_profile )); @@ -7833,7 +7895,7 @@ mod tests { }], ..Default::default() }; - assert!(provider_profile_allows_refresh_bootstrap( + assert!(provider_profile_allows_empty_credentials( &optional_refresh_profile )); } @@ -8883,7 +8945,7 @@ mod tests { } #[test] - fn refresh_bootstrap_allows_oauth2_refresh_token() { + fn empty_provider_credentials_allow_oauth2_refresh_token() { use openshell_core::proto::{ ProviderCredentialRefresh, ProviderCredentialRefreshStrategy, ProviderProfile, ProviderProfileCredential, @@ -8902,7 +8964,7 @@ mod tests { ..Default::default() }; assert!( - provider_profile_allows_refresh_bootstrap(&profile), + provider_profile_allows_empty_credentials(&profile), "Oauth2RefreshToken should be allowed for refresh bootstrap" ); } diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs index ed78c6659..b287b4ea0 100644 --- a/crates/openshell-cli/tests/provider_commands_integration.rs +++ b/crates/openshell-cli/tests/provider_commands_integration.rs @@ -343,7 +343,7 @@ impl OpenShell for TestOpenShell { if provider.credentials.is_empty() { let bootstrap_allowed = if let Some(profile) = openshell_providers::get_default_profile(&provider.r#type) { - profile.allows_gateway_refresh_bootstrap() + profile.allows_empty_provider_credentials() } else { self.state .profiles @@ -353,7 +353,7 @@ impl OpenShell for TestOpenShell { .cloned() .is_some_and(|profile| { openshell_providers::ProviderTypeProfile::from_proto(&profile) - .allows_gateway_refresh_bootstrap() + .allows_empty_provider_credentials() }) }; if !bootstrap_allowed { @@ -1119,13 +1119,14 @@ async fn provider_create_allows_empty_credentials_for_gateway_refresh_profiles() }, ); - run::provider_create( + run::provider_create_with_options( &ts.endpoint, "custom-refresh-provider", "custom-refresh", false, &[], false, + true, &[], &ts.tls, ) @@ -1138,6 +1139,50 @@ async fn provider_create_allows_empty_credentials_for_gateway_refresh_profiles() assert!(provider.credentials.is_empty()); } +#[tokio::test] +async fn provider_create_requires_runtime_credentials_for_empty_gateway_refresh_profiles() { + let ts = run_server().await; + ts.state.profiles.lock().await.insert( + "custom-refresh".to_string(), + ProviderProfile { + id: "custom-refresh".to_string(), + display_name: "Custom Refresh".to_string(), + credentials: vec![ProviderProfileCredential { + name: "ACCESS_TOKEN".to_string(), + required: true, + refresh: Some(ProviderCredentialRefresh { + strategy: ProviderCredentialRefreshStrategy::Oauth2RefreshToken as i32, + ..Default::default() + }), + ..Default::default() + }], + ..Default::default() + }, + ); + + let err = run::provider_create( + &ts.endpoint, + "custom-refresh-provider", + "custom-refresh", + false, + &[], + false, + &[], + &ts.tls, + ) + .await + .expect_err("empty runtime-resolved providers should require an explicit source"); + + assert!(err.to_string().contains("--runtime-credentials")); + assert!( + !ts.state + .providers + .lock() + .await + .contains_key("custom-refresh-provider") + ); +} + #[tokio::test] async fn sandbox_provider_cli_run_functions_wire_requests_and_idempotent_results() { let ts = run_server().await; diff --git a/crates/openshell-core/Cargo.toml b/crates/openshell-core/Cargo.toml index 469a0f4d9..3f5c013db 100644 --- a/crates/openshell-core/Cargo.toml +++ b/crates/openshell-core/Cargo.toml @@ -14,6 +14,7 @@ repository.workspace = true prost = { workspace = true } prost-types = { workspace = true } tonic = { workspace = true } +tonic-prost = { workspace = true } thiserror = { workspace = true } miette = { workspace = true } serde = { workspace = true } @@ -35,7 +36,7 @@ telemetry = ["dep:reqwest", "dep:chrono"] dev-settings = [] [build-dependencies] -tonic-build = { workspace = true } +tonic-prost-build = { workspace = true } protobuf-src = { workspace = true } [dev-dependencies] diff --git a/crates/openshell-core/build.rs b/crates/openshell-core/build.rs index 12e79a1dc..7955772a6 100644 --- a/crates/openshell-core/build.rs +++ b/crates/openshell-core/build.rs @@ -43,14 +43,14 @@ fn main() -> Result<(), Box> { let out_dir = PathBuf::from(env::var("OUT_DIR")?); let descriptor_path = out_dir.join("openshell_descriptor.bin"); - // Configure tonic-build - tonic_build::configure() + // Configure tonic/prost protobuf code generation. + tonic_prost_build::configure() .build_server(true) .build_client(true) // Emit a binary FileDescriptorSet so the server can enumerate every // RPC at runtime (used by the per-handler auth exhaustiveness test). .file_descriptor_set_path(&descriptor_path) - .compile_protos(&proto_files, &[proto_root.as_path()])?; + .compile_protos(&proto_files, &[proto_root])?; println!( "cargo:rustc-env=OPENSHELL_DESCRIPTOR_PATH={}", diff --git a/crates/openshell-core/src/sandbox_env.rs b/crates/openshell-core/src/sandbox_env.rs index 1059c0d08..b457a4a8e 100644 --- a/crates/openshell-core/src/sandbox_env.rs +++ b/crates/openshell-core/src/sandbox_env.rs @@ -63,3 +63,11 @@ pub const USER_ENVIRONMENT: &str = "OPENSHELL_USER_ENVIRONMENT"; /// writes and rotates this file; the supervisor exchanges its contents /// for a gateway JWT at startup and on refresh. pub const K8S_SA_TOKEN_FILE: &str = "OPENSHELL_K8S_SA_TOKEN_FILE"; + +/// Filesystem path to the SPIFFE Workload API UNIX socket used for provider +/// token grants. +/// +/// When set, the supervisor can fetch JWT-SVIDs for upstream provider token +/// exchanges without using SPIFFE for gateway authentication. +pub const PROVIDER_SPIFFE_WORKLOAD_API_SOCKET: &str = + "OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET"; diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 48004fa4b..4c1153b08 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -3,6 +3,7 @@ use openshell_core::config::DEFAULT_SUPERVISOR_IMAGE; use serde::{Deserialize, Deserializer, Serialize}; +use std::path::Path; use std::str::FromStr; /// Default Kubernetes namespace for sandbox resources. @@ -143,6 +144,18 @@ where } } +fn deserialize_provider_spiffe_workload_api_socket_path<'de, D>( + deserializer: D, +) -> Result +where + D: Deserializer<'de>, +{ + let value = String::deserialize(deserializer)?; + validate_provider_spiffe_workload_api_socket_path_value(&value) + .map_err(serde::de::Error::custom)?; + Ok(value) +} + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default, deny_unknown_fields)] pub struct KubernetesComputeConfig { @@ -190,6 +203,14 @@ pub struct KubernetesComputeConfig { /// this token within a few seconds of pod start, so any value at /// the floor is sufficient. Default 3600. pub sa_token_ttl_secs: i64, + /// SPIFFE Workload API socket path mounted into sandbox pods for dynamic + /// provider token grants. Empty disables provider token-grant SPIFFE + /// material. + #[serde( + default, + deserialize_with = "deserialize_provider_spiffe_workload_api_socket_path" + )] + pub provider_spiffe_workload_api_socket_path: String, } /// Lower bound enforced by kubelet for projected SA tokens. @@ -224,6 +245,7 @@ impl Default for KubernetesComputeConfig { workspace_default_storage_size: DEFAULT_WORKSPACE_STORAGE_SIZE.to_string(), default_runtime_class_name: String::new(), sa_token_ttl_secs: 3600, + provider_spiffe_workload_api_socket_path: String::new(), } } } @@ -241,6 +263,52 @@ impl KubernetesComputeConfig { .clamp(MIN_SA_TOKEN_TTL_SECS, MAX_SA_TOKEN_TTL_SECS) } } + + #[must_use] + pub fn provider_spiffe_enabled(&self) -> bool { + !self + .provider_spiffe_workload_api_socket_path + .trim() + .is_empty() + } + + pub fn validate_provider_spiffe_workload_api_socket_path(&self) -> Result<(), String> { + validate_provider_spiffe_workload_api_socket_path_value( + &self.provider_spiffe_workload_api_socket_path, + ) + } +} + +fn validate_provider_spiffe_workload_api_socket_path_value( + socket_path: &str, +) -> Result<(), String> { + let trimmed = socket_path.trim(); + if trimmed.is_empty() { + return Ok(()); + } + if trimmed != socket_path { + return Err( + "provider_spiffe_workload_api_socket_path must not contain leading or trailing whitespace" + .to_string(), + ); + } + let path = Path::new(socket_path); + if !path.is_absolute() { + return Err( + "provider_spiffe_workload_api_socket_path must be an absolute UNIX socket path" + .to_string(), + ); + } + let parent = path.parent().ok_or_else(|| { + "provider_spiffe_workload_api_socket_path must include a parent directory".to_string() + })?; + if parent == Path::new("/") { + return Err( + "provider_spiffe_workload_api_socket_path must live below a dedicated directory" + .to_string(), + ); + } + Ok(()) } #[cfg(test)] @@ -345,6 +413,35 @@ mod tests { assert_eq!(cfg.app_armor_profile, None); } + #[test] + fn serde_accepts_absolute_provider_spiffe_socket_path() { + let json = serde_json::json!({ + "provider_spiffe_workload_api_socket_path": "/spiffe-workload-api/spire-agent.sock" + }); + let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); + cfg.validate_provider_spiffe_workload_api_socket_path() + .unwrap(); + } + + #[test] + fn serde_rejects_invalid_provider_spiffe_socket_path() { + for socket_path in [ + "spiffe-workload-api/spire-agent.sock", + "/spire-agent.sock", + " /spiffe-workload-api/spire-agent.sock", + ] { + let json = serde_json::json!({ + "provider_spiffe_workload_api_socket_path": socket_path + }); + let err = serde_json::from_value::(json).unwrap_err(); + assert!( + err.to_string() + .contains("provider_spiffe_workload_api_socket_path"), + "unexpected error for {socket_path}: {err}" + ); + } + } + #[test] fn serde_rejects_invalid_app_armor_profile() { let json = serde_json::json!({ diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index ba20b0725..efee91afb 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -79,6 +79,7 @@ pub const SANDBOX_KIND: &str = "Sandbox"; const GPU_RESOURCE_NAME: &str = "nvidia.com/gpu"; const GPU_RESOURCE_QUANTITY: &str = "1"; +const SPIFFE_WORKLOAD_API_VOLUME_NAME: &str = "spiffe-workload-api"; // This POC treats the selected Struct as a driver-local typed schema. Once the // Kubernetes shape stabilizes, these serde structs may move to driver-local @@ -174,25 +175,30 @@ impl std::fmt::Debug for KubernetesComputeDriver { } impl KubernetesComputeDriver { - pub async fn new(config: KubernetesComputeConfig) -> Result { + pub async fn new(config: KubernetesComputeConfig) -> Result { + config + .validate_provider_spiffe_workload_api_socket_path() + .map_err(KubernetesDriverError::Precondition)?; let base_config = match kube::Config::incluster() { Ok(c) => c, Err(_) => kube::Config::infer() .await - .map_err(kube::Error::InferConfig)?, + .map_err(kube::Error::InferConfig) + .map_err(KubernetesDriverError::from_kube)?, }; let mut kube_config = base_config.clone(); kube_config.connect_timeout = Some(Duration::from_secs(10)); kube_config.read_timeout = Some(Duration::from_secs(30)); kube_config.write_timeout = Some(Duration::from_secs(30)); - let client = Client::try_from(kube_config)?; + let client = Client::try_from(kube_config).map_err(KubernetesDriverError::from_kube)?; let mut watch_kube_config = base_config; watch_kube_config.connect_timeout = Some(Duration::from_secs(10)); watch_kube_config.read_timeout = None; watch_kube_config.write_timeout = Some(Duration::from_secs(30)); - let watch_client = Client::try_from(watch_kube_config)?; + let watch_client = + Client::try_from(watch_kube_config).map_err(KubernetesDriverError::from_kube)?; Ok(Self { client, @@ -374,6 +380,10 @@ impl KubernetesComputeDriver { workspace_default_storage_size: &self.config.workspace_default_storage_size, default_runtime_class_name: &self.config.default_runtime_class_name, sa_token_ttl_secs: self.config.effective_sa_token_ttl_secs(), + provider_spiffe_enabled: self.config.provider_spiffe_enabled(), + provider_spiffe_workload_api_socket_path: &self + .config + .provider_spiffe_workload_api_socket_path, }; obj.data = sandbox_to_k8s_spec(sandbox.spec.as_ref(), ¶ms); let api = self.api(); @@ -1090,6 +1100,8 @@ struct SandboxPodParams<'a> { /// Lifetime (seconds) of the projected `ServiceAccount` token used /// for the bootstrap `IssueSandboxToken` exchange. sa_token_ttl_secs: i64, + provider_spiffe_enabled: bool, + provider_spiffe_workload_api_socket_path: &'a str, } impl Default for SandboxPodParams<'_> { @@ -1113,6 +1125,8 @@ impl Default for SandboxPodParams<'_> { workspace_default_storage_size: DEFAULT_WORKSPACE_STORAGE_SIZE, default_runtime_class_name: "", sa_token_ttl_secs: 3600, + provider_spiffe_enabled: false, + provider_spiffe_workload_api_socket_path: "", } } } @@ -1220,8 +1234,25 @@ fn sandbox_template_to_k8s( let driver_config = kubernetes_driver_config(template); let mut metadata = serde_json::Map::new(); - if !template.labels.is_empty() { - metadata.insert("labels".to_string(), serde_json::json!(template.labels)); + let mut pod_labels = template + .labels + .iter() + .map(|(key, value)| (key.clone(), serde_json::Value::String(value.clone()))) + .collect::>(); + if params.provider_spiffe_enabled { + pod_labels.insert( + LABEL_MANAGED_BY.to_string(), + serde_json::Value::String(LABEL_MANAGED_BY_VALUE.to_string()), + ); + if !params.sandbox_id.is_empty() { + pod_labels.insert( + LABEL_SANDBOX_ID.to_string(), + serde_json::Value::String(params.sandbox_id.to_string()), + ); + } + } + if !pod_labels.is_empty() { + metadata.insert("labels".to_string(), serde_json::Value::Object(pod_labels)); } // Carry the sandbox UUID as a pod annotation so the gateway can resolve // a projected SA token claim (pod name + uid) back to a sandbox identity @@ -1336,6 +1367,7 @@ fn sandbox_template_to_k8s( params.grpc_endpoint, params.ssh_socket_path, !params.client_tls_secret_name.is_empty(), + provider_spiffe_socket_path(params), ); container.insert("env".to_string(), serde_json::Value::Array(env)); @@ -1358,9 +1390,9 @@ fn sandbox_template_to_k8s( } container.insert("securityContext".to_string(), security_context); - // Mount client TLS secret for mTLS to the server, plus the projected - // ServiceAccount token used to bootstrap the sandbox's gateway JWT - // via `IssueSandboxToken`. + // Mount client TLS secret for mTLS to the server. Gateway identity uses + // the projected ServiceAccount bootstrap token. Provider token grants may + // additionally mount the SPIFFE Workload API socket. let mut volume_mounts: Vec = Vec::new(); if !params.client_tls_secret_name.is_empty() { volume_mounts.push(serde_json::json!({ @@ -1369,6 +1401,13 @@ fn sandbox_template_to_k8s( "readOnly": true })); } + if params.provider_spiffe_enabled { + volume_mounts.push(serde_json::json!({ + "name": SPIFFE_WORKLOAD_API_VOLUME_NAME, + "mountPath": spiffe_socket_mount_path(params.provider_spiffe_workload_api_socket_path), + "readOnly": true, + })); + } volume_mounts.push(serde_json::json!({ "name": "openshell-sa-token", "mountPath": "/var/run/secrets/openshell", @@ -1397,6 +1436,15 @@ fn sandbox_template_to_k8s( "secret": { "secretName": params.client_tls_secret_name, "defaultMode": 256 } })); } + if params.provider_spiffe_enabled { + volumes.push(serde_json::json!({ + "name": SPIFFE_WORKLOAD_API_VOLUME_NAME, + "csi": { + "driver": "csi.spiffe.io", + "readOnly": true + } + })); + } // Projected ServiceAccountToken volume — kubelet writes a short-lived // audience-bound JWT into /var/run/secrets/openshell/token and rotates // it automatically. The supervisor exchanges this for a gateway-minted @@ -1621,6 +1669,7 @@ fn build_env_list( grpc_endpoint: &str, ssh_socket_path: &str, tls_enabled: bool, + provider_spiffe_socket_path: Option<&str>, ) -> Vec { let mut env = existing_env.cloned().unwrap_or_default(); apply_env_map(&mut env, template_environment); @@ -1643,6 +1692,7 @@ fn build_env_list( grpc_endpoint, ssh_socket_path, tls_enabled, + provider_spiffe_socket_path, ); env } @@ -1665,6 +1715,7 @@ fn apply_required_env( grpc_endpoint: &str, ssh_socket_path: &str, tls_enabled: bool, + provider_spiffe_socket_path: Option<&str>, ) { upsert_env(env, openshell_core::sandbox_env::SANDBOX_ID, sandbox_id); upsert_env(env, openshell_core::sandbox_env::SANDBOX, sandbox_name); @@ -1713,6 +1764,28 @@ fn apply_required_env( openshell_core::sandbox_env::K8S_SA_TOKEN_FILE, "/var/run/secrets/openshell/token", ); + if let Some(socket_path) = provider_spiffe_socket_path { + upsert_env( + env, + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET, + socket_path, + ); + } +} + +fn provider_spiffe_socket_path<'a>(params: &'a SandboxPodParams<'a>) -> Option<&'a str> { + params + .provider_spiffe_enabled + .then_some(params.provider_spiffe_workload_api_socket_path) +} + +fn spiffe_socket_mount_path(socket_path: &str) -> String { + std::path::Path::new(socket_path) + .parent() + .and_then(std::path::Path::to_str) + .filter(|path| !path.is_empty() && *path != "/") + .expect("provider SPIFFE socket path should be validated before pod rendering") + .to_string() } fn upsert_env(env: &mut Vec, name: &str, value: &str) { @@ -2196,6 +2269,7 @@ mod tests { "https://endpoint:8080", "0.0.0.0:2222", true, // tls_enabled + None, ); // Extract the TLS-related env vars @@ -3081,6 +3155,51 @@ mod tests { ); } + #[test] + fn provider_spiffe_mounts_csi_socket_and_keeps_sa_token_bootstrap() { + let params = SandboxPodParams { + sandbox_id: "sandbox-123", + sandbox_name: "sandbox", + provider_spiffe_enabled: true, + provider_spiffe_workload_api_socket_path: "/spiffe-workload-api/spire-agent.sock", + ..SandboxPodParams::default() + }; + let pod_template = sandbox_template_to_k8s( + &SandboxTemplate::default(), + false, + &std::collections::HashMap::new(), + true, + ¶ms, + ); + + let env = pod_template["spec"]["containers"][0]["env"] + .as_array() + .expect("env"); + assert!(env.iter().any(|e| { + e["name"] == openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + && e["value"] == "/spiffe-workload-api/spire-agent.sock" + })); + assert!(env.iter().any(|e| { + e["name"] == openshell_core::sandbox_env::K8S_SA_TOKEN_FILE + && e["value"] == "/var/run/secrets/openshell/token" + })); + + let volumes = pod_template["spec"]["volumes"].as_array().expect("volumes"); + assert!(volumes.iter().any(|volume| { + volume["name"] == SPIFFE_WORKLOAD_API_VOLUME_NAME + && volume["csi"]["driver"] == "csi.spiffe.io" + })); + assert!(volumes.iter().any(|volume| { + volume["name"] == "openshell-sa-token" + && volume["projected"]["sources"][0]["serviceAccountToken"]["path"] == "token" + })); + + assert_eq!( + pod_template["metadata"]["labels"][LABEL_MANAGED_BY], + serde_json::json!(LABEL_MANAGED_BY_VALUE) + ); + } + #[test] fn platform_config_bool_extracts_value() { let template = SandboxTemplate { diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index a2b0e2790..f7eeeba42 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -92,6 +92,9 @@ struct Args { /// gateway clamps values outside `[600, 86400]`. Default 3600. #[arg(long, env = "OPENSHELL_K8S_SA_TOKEN_TTL_SECS", default_value_t = 3600)] sa_token_ttl_secs: i64, + + #[arg(long, env = "OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET")] + provider_spiffe_workload_api_socket_path: Option, } #[tokio::main] @@ -129,6 +132,9 @@ async fn main() -> Result<()> { default_runtime_class_name: std::env::var("OPENSHELL_K8S_DEFAULT_RUNTIME_CLASS_NAME") .unwrap_or_default(), sa_token_ttl_secs: args.sa_token_ttl_secs, + provider_spiffe_workload_api_socket_path: args + .provider_spiffe_workload_api_socket_path + .unwrap_or_default(), }) .await .into_diagnostic()?; diff --git a/crates/openshell-providers/Cargo.toml b/crates/openshell-providers/Cargo.toml index 2c9c48b63..9b294d7b7 100644 --- a/crates/openshell-providers/Cargo.toml +++ b/crates/openshell-providers/Cargo.toml @@ -11,11 +11,13 @@ license.workspace = true repository.workspace = true [dependencies] +glob = { workspace = true } openshell-core = { path = "../openshell-core", default-features = false } serde = { workspace = true } serde_json = { workspace = true } serde_yml = { workspace = true } thiserror = { workspace = true } +url = { workspace = true } [lints] workspace = true diff --git a/crates/openshell-providers/src/discovery.rs b/crates/openshell-providers/src/discovery.rs index 96ed76466..97a6a911b 100644 --- a/crates/openshell-providers/src/discovery.rs +++ b/crates/openshell-providers/src/discovery.rs @@ -97,6 +97,7 @@ mod tests { query_param: String::new(), refresh: None, path_template: String::new(), + token_grant: None, }, CredentialProfile { name: "secondary".to_string(), @@ -108,6 +109,7 @@ mod tests { query_param: String::new(), refresh: None, path_template: String::new(), + token_grant: None, }, ], endpoints: Vec::new(), diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 624ee0711..d2a35ca80 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -14,6 +14,7 @@ use openshell_core::proto::{ use serde::ser::SerializeStruct; use serde::{Deserialize, Deserializer, Serialize, Serializer, de}; use std::collections::{HashMap, HashSet}; +use std::net::IpAddr; use std::sync::OnceLock; const PATH_TEMPLATE_CREDENTIAL_PLACEHOLDER: &str = "{credential}"; @@ -90,6 +91,38 @@ pub struct CredentialProfile { pub refresh: Option, #[serde(default, skip_serializing_if = "String::is_empty")] pub path_template: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub token_grant: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct TokenGrantProfile { + pub token_endpoint: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub audience: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub jwt_svid_audience: String, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub client_assertion_type: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub scopes: Vec, + #[serde(default, skip_serializing_if = "is_zero_i64")] + pub cache_ttl_seconds: i64, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub audience_overrides: Vec, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] +pub struct TokenGrantAudienceOverrideProfile { + #[serde(default, skip_serializing_if = "String::is_empty")] + pub host: String, + #[serde(default, skip_serializing_if = "is_zero")] + pub port: u32, + #[serde(default, skip_serializing_if = "String::is_empty")] + pub path: String, + pub audience: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub scopes: Vec, } #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq)] @@ -290,6 +323,7 @@ impl ProviderTypeProfile { .as_ref() .map(credential_refresh_from_proto), path_template: credential.path_template.clone(), + token_grant: credential.token_grant.as_ref().map(token_grant_from_proto), }) .collect(), endpoints: profile.endpoints.iter().map(endpoint_from_proto).collect(), @@ -316,23 +350,23 @@ impl ProviderTypeProfile { vars } - /// Whether this profile can be created without an initial access token because - /// the gateway can mint at least one credential immediately from refresh - /// material, and no required credential falls outside that gateway-mintable set. + /// Whether this profile can be created without initial static credentials. + /// + /// Empty provider creation is allowed when at least one credential can be + /// resolved at runtime, and every required credential can be resolved at + /// runtime. Runtime-resolvable credentials are either gateway-mintable + /// refresh credentials or sandbox-side dynamic token grants. #[must_use] - pub fn allows_gateway_refresh_bootstrap(&self) -> bool { - let mut has_gateway_mintable_credential = false; + pub fn allows_empty_provider_credentials(&self) -> bool { + let mut has_runtime_resolvable_credential = false; for credential in &self.credentials { - let is_gateway_mintable = credential - .refresh - .as_ref() - .is_some_and(CredentialRefreshProfile::is_gateway_mintable); - if credential.required && !is_gateway_mintable { + let is_runtime_resolvable = credential.is_runtime_resolvable(); + if credential.required && !is_runtime_resolvable { return false; } - has_gateway_mintable_credential |= is_gateway_mintable; + has_runtime_resolvable_credential |= is_runtime_resolvable; } - has_gateway_mintable_credential + has_runtime_resolvable_credential } #[must_use] @@ -355,6 +389,7 @@ impl ProviderTypeProfile { query_param: credential.query_param.clone(), refresh: credential.refresh.as_ref().map(credential_refresh_to_proto), path_template: credential.path_template.clone(), + token_grant: credential.token_grant.as_ref().map(token_grant_to_proto), }) .collect(), endpoints: self.endpoints.iter().map(endpoint_to_proto).collect(), @@ -375,6 +410,17 @@ impl ProviderTypeProfile { } } +impl CredentialProfile { + #[must_use] + pub fn is_runtime_resolvable(&self) -> bool { + self.token_grant.is_some() + || self + .refresh + .as_ref() + .is_some_and(CredentialRefreshProfile::is_gateway_mintable) + } +} + impl CredentialRefreshProfile { #[must_use] pub fn is_gateway_mintable(&self) -> bool { @@ -602,6 +648,66 @@ fn credential_refresh_to_proto(refresh: &CredentialRefreshProfile) -> ProviderCr } } +fn token_grant_from_proto( + token_grant: &openshell_core::proto::ProviderCredentialTokenGrant, +) -> TokenGrantProfile { + TokenGrantProfile { + token_endpoint: token_grant.token_endpoint.clone(), + audience: token_grant.audience.clone(), + jwt_svid_audience: token_grant.jwt_svid_audience.clone(), + client_assertion_type: token_grant.client_assertion_type.clone(), + scopes: token_grant.scopes.clone(), + cache_ttl_seconds: token_grant.cache_ttl_seconds, + audience_overrides: token_grant + .audience_overrides + .iter() + .map(token_grant_audience_override_from_proto) + .collect(), + } +} + +fn token_grant_to_proto( + token_grant: &TokenGrantProfile, +) -> openshell_core::proto::ProviderCredentialTokenGrant { + openshell_core::proto::ProviderCredentialTokenGrant { + token_endpoint: token_grant.token_endpoint.clone(), + audience: token_grant.audience.clone(), + jwt_svid_audience: token_grant.jwt_svid_audience.clone(), + client_assertion_type: token_grant.client_assertion_type.clone(), + scopes: token_grant.scopes.clone(), + cache_ttl_seconds: token_grant.cache_ttl_seconds, + audience_overrides: token_grant + .audience_overrides + .iter() + .map(token_grant_audience_override_to_proto) + .collect(), + } +} + +fn token_grant_audience_override_from_proto( + override_config: &openshell_core::proto::ProviderCredentialTokenGrantAudienceOverride, +) -> TokenGrantAudienceOverrideProfile { + TokenGrantAudienceOverrideProfile { + host: override_config.host.clone(), + port: override_config.port, + path: override_config.path.clone(), + audience: override_config.audience.clone(), + scopes: override_config.scopes.clone(), + } +} + +fn token_grant_audience_override_to_proto( + override_config: &TokenGrantAudienceOverrideProfile, +) -> openshell_core::proto::ProviderCredentialTokenGrantAudienceOverride { + openshell_core::proto::ProviderCredentialTokenGrantAudienceOverride { + host: override_config.host.clone(), + port: override_config.port, + path: override_config.path.clone(), + audience: override_config.audience.clone(), + scopes: override_config.scopes.clone(), + } +} + fn discovery_from_proto(discovery: &ProviderProfileDiscovery) -> DiscoveryProfile { DiscoveryProfile { credentials: discovery.credentials.clone(), @@ -1097,6 +1203,43 @@ pub fn validate_profile_set( } } } + + if let Some(token_grant) = credential.token_grant.as_ref() + && let Err(message) = validate_token_grant_endpoint(&token_grant.token_endpoint) + { + diagnostics.push(ProfileValidationDiagnostic::error( + source, + profile_id, + "credentials.token_grant.token_endpoint", + message, + )); + } + diagnostics.extend(validate_token_grant_audience_overrides( + source, + profile_id, + credential, + &profile.endpoints, + )); + if credential.token_grant.is_some() + && let Err(message) = validate_token_grant_auth_style(credential) + { + diagnostics.push(ProfileValidationDiagnostic::error( + source, + profile_id, + "credentials.token_grant.auth_style", + message, + )); + } + if credential.token_grant.is_some() + && let Err(message) = validate_token_grant_header_name(credential) + { + diagnostics.push(ProfileValidationDiagnostic::error( + source, + profile_id, + "credentials.header_name", + message, + )); + } } for (index, endpoint) in profile.endpoints.iter().enumerate() { @@ -1137,6 +1280,337 @@ fn endpoint_is_valid(endpoint: &EndpointProfile) -> bool { (1..=65_535).contains(&endpoint.port) } +#[derive(Debug, Clone)] +struct TokenGrantOverrideBinding { + override_index: usize, + host: String, + port: u32, + path: String, + score: u32, +} + +fn validate_token_grant_audience_overrides( + source: &str, + profile_id: &str, + credential: &CredentialProfile, + endpoints: &[EndpointProfile], +) -> Vec { + let Some(token_grant) = credential.token_grant.as_ref() else { + return Vec::new(); + }; + + let mut diagnostics = Vec::new(); + let mut bindings: Vec = Vec::new(); + for (override_index, override_config) in token_grant.audience_overrides.iter().enumerate() { + for endpoint in endpoints { + for port in endpoint_ports(endpoint.port, &endpoint.ports) { + if !token_grant_override_matches_endpoint(override_config, &endpoint.host, port) { + continue; + } + + let host = if override_config.host.trim().is_empty() { + endpoint.host.trim() + } else { + override_config.host.trim() + }; + let path = if override_config.path.trim().is_empty() { + endpoint.path.trim() + } else { + override_config.path.trim() + }; + let candidate = TokenGrantOverrideBinding { + override_index, + host: host.to_ascii_lowercase(), + port, + path: path.to_string(), + score: dynamic_token_grant_match_score(host, path), + }; + for existing in &bindings { + if existing.override_index == candidate.override_index { + continue; + } + if existing.port == candidate.port + && existing.score == candidate.score + && host_patterns_can_overlap(&existing.host, &candidate.host) + && path_patterns_can_overlap(&existing.path, &candidate.path) + { + diagnostics.push(ProfileValidationDiagnostic::error( + source, + profile_id, + "credentials.token_grant.audience_overrides", + format!( + "credential '{}' has ambiguous token_grant audience_overrides at indexes {} and {} for {}:{} path selectors '{}' and '{}'", + credential.name, + existing.override_index, + candidate.override_index, + candidate.host, + candidate.port, + existing.path, + candidate.path + ), + )); + } + } + bindings.push(candidate); + } + } + } + diagnostics +} + +fn endpoint_ports(port: u32, ports: &[u32]) -> Vec { + if ports.is_empty() { + if port == 0 { Vec::new() } else { vec![port] } + } else { + ports.iter().copied().filter(|port| *port != 0).collect() + } +} + +fn token_grant_override_matches_endpoint( + override_config: &TokenGrantAudienceOverrideProfile, + endpoint_host: &str, + endpoint_port: u32, +) -> bool { + let override_host = override_config.host.trim(); + let host_matches = override_host.is_empty() + || host_pattern_matches(override_host, endpoint_host) + || host_pattern_matches(endpoint_host, override_host); + let port_matches = override_config.port == 0 || override_config.port == endpoint_port; + host_matches && port_matches +} + +fn host_pattern_matches(pattern: &str, host: &str) -> bool { + let pattern = pattern.to_ascii_lowercase(); + let host = host.to_ascii_lowercase(); + if pattern == host { + return true; + } + if !pattern.contains('*') { + return false; + } + + let pattern_labels: Vec<&str> = pattern.split('.').collect(); + let host_labels: Vec<&str> = host.split('.').collect(); + host_pattern_labels_match(&pattern_labels, &host_labels) +} + +fn host_pattern_labels_match(pattern: &[&str], host: &[&str]) -> bool { + match pattern.split_first() { + None => host.is_empty(), + Some((label, rest)) if *label == "**" => { + host_pattern_labels_match(rest, host) + || (!host.is_empty() && host_pattern_labels_match(pattern, &host[1..])) + } + Some((label, rest)) if *label == "*" => { + !host.is_empty() && host_pattern_labels_match(rest, &host[1..]) + } + Some((literal, rest)) => { + host.first().is_some_and(|label| label == literal) + && host_pattern_labels_match(rest, &host[1..]) + } + } +} + +fn dynamic_token_grant_match_score(host: &str, path: &str) -> u32 { + host_pattern_specificity(host) + endpoint_path_specificity(path) +} + +fn host_pattern_specificity(pattern: &str) -> u32 { + let wildcard_penalty = count_as_u32(pattern.matches('*').count()); + let label_count = count_as_u32(pattern.split('.').filter(|label| !label.is_empty()).count()); + let literal_chars = count_as_u32(pattern.chars().filter(|ch| *ch != '*').count()); + 100_000u32 + .saturating_sub(wildcard_penalty.saturating_mul(10_000)) + .saturating_add(label_count.saturating_mul(100)) + .saturating_add(literal_chars) +} + +fn endpoint_path_specificity(path: &str) -> u32 { + if path.is_empty() || path == "**" { + return 0; + } + 1_000_000u32.saturating_add(count_as_u32(path.chars().filter(|ch| *ch != '*').count())) +} + +fn count_as_u32(count: usize) -> u32 { + u32::try_from(count).unwrap_or(u32::MAX) +} + +fn host_patterns_can_overlap(first: &str, second: &str) -> bool { + let first = first.to_ascii_lowercase(); + let second = second.to_ascii_lowercase(); + if !first.contains('*') { + return host_pattern_matches(&second, &first); + } + if !second.contains('*') { + return host_pattern_matches(&first, &second); + } + let first_labels: Vec<&str> = first.split('.').collect(); + let second_labels: Vec<&str> = second.split('.').collect(); + host_pattern_labels_can_overlap(&first_labels, &second_labels) +} + +fn host_pattern_labels_can_overlap(first: &[&str], second: &[&str]) -> bool { + match (first.split_first(), second.split_first()) { + (None, None) => true, + (None, Some((label, rest))) if *label == "**" => { + host_pattern_labels_can_overlap(first, rest) + } + (Some((label, rest)), None) if *label == "**" => { + host_pattern_labels_can_overlap(rest, second) + } + (None, _) | (_, None) => false, + (Some((label, rest)), _) if *label == "**" => { + host_pattern_labels_can_overlap(rest, second) + || host_pattern_labels_can_overlap(first, &second[1..]) + } + (_, Some((label, rest))) if *label == "**" => { + host_pattern_labels_can_overlap(first, rest) + || host_pattern_labels_can_overlap(&first[1..], second) + } + (Some((first_label, first_rest)), Some((second_label, second_rest))) => { + (*first_label == "*" || *second_label == "*" || first_label == second_label) + && host_pattern_labels_can_overlap(first_rest, second_rest) + } + } +} + +fn path_patterns_can_overlap(first: &str, second: &str) -> bool { + if path_matches_all(first) || path_matches_all(second) { + return true; + } + if !first.contains('*') { + return endpoint_path_matches(second, first); + } + if !second.contains('*') { + return endpoint_path_matches(first, second); + } + match (path_prefix_pattern(first), path_prefix_pattern(second)) { + (Some(first_prefix), Some(second_prefix)) => { + first_prefix == second_prefix + || first_prefix.starts_with(&format!("{second_prefix}/")) + || second_prefix.starts_with(&format!("{first_prefix}/")) + } + _ => true, + } +} + +fn path_matches_all(path: &str) -> bool { + path.is_empty() || path == "**" || path == "/**" +} + +fn path_prefix_pattern(path: &str) -> Option<&str> { + path.strip_suffix("/**") +} + +fn endpoint_path_matches(pattern: &str, path: &str) -> bool { + if path_matches_all(pattern) { + return true; + } + if pattern == path { + return true; + } + if let Some(prefix) = path_prefix_pattern(pattern) { + return path == prefix || path.starts_with(&format!("{prefix}/")); + } + glob::Pattern::new(pattern).is_ok_and(|glob| glob.matches(path)) +} + +fn validate_token_grant_endpoint(token_endpoint: &str) -> Result<(), String> { + let url = url::Url::parse(token_endpoint) + .map_err(|_| "token_endpoint must be an absolute URL".to_string())?; + if token_endpoint_transport_allowed(&url) { + return Ok(()); + } + + Err( + "token_endpoint must use https, except http for loopback or in-cluster service hosts" + .to_string(), + ) +} + +fn validate_token_grant_auth_style(credential: &CredentialProfile) -> Result<(), String> { + match credential.auth_style.trim().to_ascii_lowercase().as_str() { + "" | "bearer" | "header" => Ok(()), + _ => Err("token_grant credentials support auth_style bearer or header".to_string()), + } +} + +fn validate_token_grant_header_name(credential: &CredentialProfile) -> Result<(), String> { + let header_name = match credential.auth_style.trim().to_ascii_lowercase().as_str() { + "" | "bearer" if credential.header_name.trim().is_empty() => "Authorization", + "" | "bearer" | "header" => credential.header_name.trim(), + _ => return Ok(()), + }; + if header_name.is_empty() { + return Ok(()); + } + let valid = header_name.bytes().all(|byte| { + byte.is_ascii_alphanumeric() + || matches!( + byte, + b'!' | b'#' + | b'$' + | b'%' + | b'&' + | b'\'' + | b'*' + | b'+' + | b'-' + | b'.' + | b'^' + | b'_' + | b'`' + | b'|' + | b'~' + ) + }); + if !valid { + return Err("token_grant header_name is not a valid HTTP header name".to_string()); + } + match header_name.to_ascii_lowercase().as_str() { + "host" | "content-length" | "transfer-encoding" | "connection" => Err( + "token_grant header_name may not override HTTP framing or connection headers" + .to_string(), + ), + _ => Ok(()), + } +} + +fn token_endpoint_transport_allowed(url: &url::Url) -> bool { + match url.scheme() { + "https" => true, + "http" => url + .host_str() + .is_some_and(|host| is_loopback_host(host) || is_kubernetes_service_host(host)), + _ => false, + } +} + +fn is_loopback_host(host: &str) -> bool { + let host = host.trim_matches(['[', ']']); + if host.eq_ignore_ascii_case("localhost") { + return true; + } + + match host.parse::() { + Ok(IpAddr::V4(v4)) => v4.is_loopback(), + Ok(IpAddr::V6(v6)) => { + v6.is_loopback() || v6.to_ipv4_mapped().is_some_and(|v4| v4.is_loopback()) + } + Err(_) => false, + } +} + +fn is_kubernetes_service_host(host: &str) -> bool { + let host = host.trim_end_matches('.').to_ascii_lowercase(); + let labels = host.split('.').collect::>(); + let is_service_name = labels.len() == 3 && labels[2] == "svc"; + let is_cluster_local_service = + labels.len() == 5 && labels[2] == "svc" && labels[3] == "cluster" && labels[4] == "local"; + (is_service_name || is_cluster_local_service) && labels.iter().all(|label| !label.is_empty()) +} + static DEFAULT_PROFILES: OnceLock> = OnceLock::new(); #[must_use] @@ -1249,13 +1723,14 @@ mod tests { vec!["service_account_token", "gcloud_adc_token"] ); assert!( - profile.allows_gateway_refresh_bootstrap(), + profile.allows_empty_provider_credentials(), "Vertex profile should allow empty-create bootstrap via gateway-mintable credentials" ); } #[test] - fn refresh_bootstrap_requires_a_gateway_mintable_path_and_no_required_static_credentials() { + fn empty_provider_credentials_require_a_runtime_resolvable_path_and_no_required_static_credentials() + { let optional_refresh_profile = parse_profile_yaml( r" id: optional-refresh @@ -1268,7 +1743,21 @@ credentials: ", ) .expect("profile"); - assert!(optional_refresh_profile.allows_gateway_refresh_bootstrap()); + assert!(optional_refresh_profile.allows_empty_provider_credentials()); + + let token_grant_profile = parse_profile_yaml( + r" +id: token-grant +display_name: Token Grant +credentials: + - name: access_token + required: true + token_grant: + token_endpoint: https://auth.example.com/token +", + ) + .expect("profile"); + assert!(token_grant_profile.allows_empty_provider_credentials()); let mixed_required_profile = parse_profile_yaml( r" @@ -1284,7 +1773,7 @@ credentials: ", ) .expect("profile"); - assert!(!mixed_required_profile.allows_gateway_refresh_bootstrap()); + assert!(!mixed_required_profile.allows_empty_provider_credentials()); let static_only_profile = parse_profile_yaml( r" @@ -1296,7 +1785,7 @@ credentials: ", ) .expect("profile"); - assert!(!static_only_profile.allows_gateway_refresh_bootstrap()); + assert!(!static_only_profile.allows_empty_provider_credentials()); } #[test] @@ -1439,6 +1928,317 @@ credentials: ); } + #[test] + fn token_grant_audience_overrides_round_trip_through_proto() { + let profile = parse_profile_yaml( + r" +id: keycloak-example +display_name: Keycloak Example +credentials: + - name: access_token + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: http://keycloak.default.svc.cluster.local/realms/openshell/protocol/openid-connect/token + jwt_svid_audience: http://keycloak.default.svc.cluster.local/realms/openshell + client_assertion_type: urn:ietf:params:oauth:client-assertion-type:jwt-spiffe + audience: api://default + scopes: [openid] + cache_ttl_seconds: 300 + audience_overrides: + - host: alpha.default.svc.cluster.local + port: 80 + audience: api://alpha + - host: beta.default.svc.cluster.local + port: 80 + path: /v1/** + audience: api://beta + scopes: [beta.read] +", + ) + .expect("profile should parse"); + + let token_grant = profile.credentials[0] + .token_grant + .as_ref() + .expect("token grant should parse"); + assert_eq!( + token_grant.jwt_svid_audience, + "http://keycloak.default.svc.cluster.local/realms/openshell" + ); + assert_eq!( + token_grant.client_assertion_type, + "urn:ietf:params:oauth:client-assertion-type:jwt-spiffe" + ); + assert_eq!(token_grant.audience_overrides.len(), 2); + assert_eq!(token_grant.audience_overrides[1].path, "/v1/**"); + assert_eq!(token_grant.audience_overrides[1].scopes, vec!["beta.read"]); + + let reparsed = ProviderTypeProfile::from_proto(&profile.to_proto()); + let reparsed_token_grant = reparsed.credentials[0] + .token_grant + .as_ref() + .expect("token grant should round trip"); + assert_eq!( + reparsed_token_grant.jwt_svid_audience, + token_grant.jwt_svid_audience + ); + assert_eq!( + reparsed_token_grant.audience_overrides, + token_grant.audience_overrides + ); + } + + #[test] + fn validate_profile_set_rejects_plain_http_token_endpoint() { + for token_endpoint in [ + "http://auth.example.com/token", + "http://token-issuer.default.svc.evil.com/token", + ] { + let profile = parse_profile_yaml(&format!( + r" +id: insecure-token-grant +display_name: Insecure Token Grant +credentials: + - name: access_token + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: {token_endpoint} + audience: api://default +" + )) + .expect("profile should parse"); + + let diagnostics = validate_profile_set(&[("insecure.yaml".to_string(), profile)]); + let diagnostic = diagnostics + .iter() + .find(|diagnostic| diagnostic.field == "credentials.token_grant.token_endpoint") + .expect("token endpoint diagnostic should be reported"); + + assert_eq!( + diagnostic.message, + "token_endpoint must use https, except http for loopback or in-cluster service hosts" + ); + } + } + + #[test] + fn validate_profile_set_allows_https_loopback_and_in_cluster_token_endpoints() { + for token_endpoint in [ + "https://auth.example.com/token", + "http://127.0.0.1:8180/token", + "http://token-issuer.default.svc.cluster.local/token", + ] { + let profile = parse_profile_yaml(&format!( + r" +id: secure-token-grant +display_name: Secure Token Grant +credentials: + - name: access_token + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: {token_endpoint} + audience: api://default +" + )) + .expect("profile should parse"); + + let diagnostics = validate_profile_set(&[("secure.yaml".to_string(), profile)]); + assert!( + diagnostics.is_empty(), + "unexpected diagnostics for {token_endpoint}: {diagnostics:?}" + ); + } + } + + #[test] + fn validate_profile_set_rejects_relative_token_endpoint() { + let profile = parse_profile_yaml( + r" +id: relative-token-grant +display_name: Relative Token Grant +credentials: + - name: access_token + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: /token + audience: api://default +", + ) + .expect("profile should parse"); + + let diagnostics = validate_profile_set(&[("relative.yaml".to_string(), profile)]); + let diagnostic = diagnostics + .iter() + .find(|diagnostic| diagnostic.field == "credentials.token_grant.token_endpoint") + .expect("token endpoint diagnostic should be reported"); + + assert_eq!(diagnostic.message, "token_endpoint must be an absolute URL"); + } + + #[test] + fn validate_profile_set_rejects_token_grant_query_or_path_auth_style() { + for auth_style in ["query", "path"] { + let profile = parse_profile_yaml(&format!( + r" +id: unsupported-token-grant-style +display_name: Unsupported Token Grant Style +credentials: + - name: access_token + auth_style: {auth_style} + token_grant: + token_endpoint: https://auth.example.com/token +" + )) + .expect("profile should parse"); + + let diagnostics = validate_profile_set(&[("unsupported.yaml".to_string(), profile)]); + let diagnostic = diagnostics + .iter() + .find(|diagnostic| diagnostic.field == "credentials.token_grant.auth_style") + .expect("auth style diagnostic should be reported"); + + assert_eq!( + diagnostic.message, + "token_grant credentials support auth_style bearer or header" + ); + } + } + + #[test] + fn validate_profile_set_requires_header_name_for_token_grant_header_auth_style() { + let profile = parse_profile_yaml( + r" +id: missing-header-token-grant +display_name: Missing Header Token Grant +credentials: + - name: access_token + auth_style: header + token_grant: + token_endpoint: https://auth.example.com/token +", + ) + .expect("profile should parse"); + + let diagnostics = validate_profile_set(&[("missing-header.yaml".to_string(), profile)]); + let diagnostic = diagnostics + .iter() + .find(|diagnostic| diagnostic.field == "credentials.header_name") + .expect("header_name diagnostic should be reported"); + + assert_eq!( + diagnostic.message, + "header_name is required for header auth" + ); + } + + #[test] + fn validate_profile_set_rejects_token_grant_framing_header_name() { + let profile = parse_profile_yaml( + r" +id: framing-header-token-grant +display_name: Framing Header Token Grant +credentials: + - name: access_token + auth_style: header + header_name: Content-Length + token_grant: + token_endpoint: https://auth.example.com/token +", + ) + .expect("profile should parse"); + + let diagnostics = validate_profile_set(&[("framing.yaml".to_string(), profile)]); + let diagnostic = diagnostics + .iter() + .find(|diagnostic| { + diagnostic.field == "credentials.header_name" + && diagnostic.message.contains("HTTP framing") + }) + .expect("framing header diagnostic should be reported"); + + assert_eq!( + diagnostic.message, + "token_grant header_name may not override HTTP framing or connection headers" + ); + } + + #[test] + fn validate_profile_set_rejects_ambiguous_same_credential_audience_overrides() { + let profile = parse_profile_yaml( + r" +id: ambiguous-token-grant +display_name: Ambiguous Token Grant +credentials: + - name: access_token + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: https://auth.example.com/token + audience: api://default + audience_overrides: + - audience: api://alpha + - host: alpha.default.svc.cluster.local + audience: api://beta +endpoints: + - host: alpha.default.svc.cluster.local + port: 80 + path: /v1/** +", + ) + .expect("profile should parse"); + + let diagnostics = validate_profile_set(&[("ambiguous.yaml".to_string(), profile)]); + let diagnostic = diagnostics + .iter() + .find(|diagnostic| diagnostic.field == "credentials.token_grant.audience_overrides") + .expect("audience override diagnostic should be reported"); + + assert!( + diagnostic + .message + .contains("ambiguous token_grant audience_overrides") + ); + assert!(diagnostic.message.contains("indexes 0 and 1")); + } + + #[test] + fn validate_profile_set_allows_more_specific_audience_override_path() { + let profile = parse_profile_yaml( + r" +id: specific-token-grant +display_name: Specific Token Grant +credentials: + - name: access_token + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: https://auth.example.com/token + audience: api://default + audience_overrides: + - path: /v1/** + audience: api://alpha + - path: /v1/admin/** + audience: api://admin +endpoints: + - host: alpha.default.svc.cluster.local + port: 80 + path: /v1/** +", + ) + .expect("profile should parse"); + + let diagnostics = validate_profile_set(&[("specific.yaml".to_string(), profile)]); + + assert!( + diagnostics.is_empty(), + "unexpected diagnostics: {diagnostics:?}" + ); + } + #[test] fn profile_json_round_trip_preserves_compact_dto_shape() { let profile = get_default_profile("github").expect("github profile"); diff --git a/crates/openshell-sandbox/Cargo.toml b/crates/openshell-sandbox/Cargo.toml index cf98193a2..730337266 100644 --- a/crates/openshell-sandbox/Cargo.toml +++ b/crates/openshell-sandbox/Cargo.toml @@ -24,8 +24,9 @@ openshell-router = { path = "../openshell-router" } tokio = { workspace = true } # gRPC -tonic = { workspace = true, features = ["channel", "tls"] } +tonic = { workspace = true, features = ["channel", "tls-native-roots"] } tokio-stream = { workspace = true } +spiffe = { workspace = true } # CLI clap = { workspace = true } @@ -52,6 +53,7 @@ webpki-roots = { workspace = true } # HTTP bytes = { workspace = true } +reqwest = { workspace = true } # UUID uuid = { workspace = true } diff --git a/crates/openshell-sandbox/src/grpc_client.rs b/crates/openshell-sandbox/src/grpc_client.rs index ffa22f951..e35d2701d 100644 --- a/crates/openshell-sandbox/src/grpc_client.rs +++ b/crates/openshell-sandbox/src/grpc_client.rs @@ -4,8 +4,8 @@ //! gRPC client for fetching sandbox policy, provider environment, and inference //! route bundles from `OpenShell` server. //! -//! Every request carries a gateway-minted JWT in the `Authorization` header. -//! The token is resolved at startup from one of three sources: +//! Every request carries a sandbox bearer credential in the `Authorization` +//! header. The token is resolved at startup from one of three sources: //! //! 1. `OPENSHELL_SANDBOX_TOKEN` — raw JWT in the env (test harness path). //! 2. `OPENSHELL_SANDBOX_TOKEN_FILE` — file containing the JWT (Docker / @@ -15,7 +15,7 @@ //! supervisor exchanges it for a gateway JWT via `IssueSandboxToken` //! once at startup. //! -//! The resolved gateway JWT is held in process memory thereafter and +//! The resolved bearer credential is held in process memory thereafter and //! injected on every outbound call by [`AuthInterceptor`]. use std::collections::HashMap; @@ -54,18 +54,12 @@ enum TokenSource { K8sServiceAccount, } -#[derive(Debug)] -struct AcquiredToken { - token: String, - source: TokenSource, -} - /// Process-wide token slot. Initialized by the first [`connect_channel`] /// call and shared with every subsequent client and the renewal loop. static TOKEN_SLOT: OnceLock = OnceLock::new(); -/// Source used to acquire the process-wide token slot. -static TOKEN_SOURCE: OnceLock = OnceLock::new(); +/// Refresh strategy used by the process-wide token slot. +static TOKEN_REFRESH_MODE: OnceLock = OnceLock::new(); /// Serializes the first token acquisition. Several supervisor subsystems /// connect during startup; without this guard they can all observe an empty @@ -75,6 +69,17 @@ static TOKEN_INIT_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(( /// One-shot guard so the renewal loop spawns at most once per process. static REFRESH_SPAWNED: OnceLock<()> = OnceLock::new(); +#[derive(Clone, Debug)] +enum RefreshMode { + GatewayJwt(TokenSource), +} + +#[derive(Debug)] +struct AcquiredToken { + token: String, + refresh_mode: RefreshMode, +} + fn install_token_slot(token: &str) -> Result { let bearer = AsciiMetadataValue::try_from(format!("Bearer {token}")) .into_diagnostic() @@ -189,10 +194,11 @@ async fn build_plain_channel(endpoint: &str) -> Result { /// spawned once per process via [`REFRESH_SPAWNED`]. async fn connect_channel(endpoint: &str) -> Result { let channel = build_plain_channel(endpoint).await?; - let (slot, source) = token_slot(endpoint, &channel).await?; + let (slot, refresh_mode) = token_slot(endpoint, &channel).await?; let plain_channel = channel.clone(); let intercepted = InterceptedService::new(channel, AuthInterceptor::new(slot.clone())); if REFRESH_SPAWNED.set(()).is_ok() { + let RefreshMode::GatewayJwt(source) = refresh_mode; let refresh_channel = intercepted.clone(); let endpoint = endpoint.to_string(); tokio::spawn(async move { @@ -202,23 +208,29 @@ async fn connect_channel(endpoint: &str) -> Result { Ok(intercepted) } -async fn token_slot(endpoint: &str, plain_channel: &Channel) -> Result<(TokenSlot, TokenSource)> { +async fn token_slot(endpoint: &str, plain_channel: &Channel) -> Result<(TokenSlot, RefreshMode)> { if let Some(existing) = TOKEN_SLOT.get() { - let source = TOKEN_SOURCE.get().copied().unwrap_or(TokenSource::Env); - return Ok((existing.clone(), source)); + let refresh_mode = TOKEN_REFRESH_MODE + .get() + .cloned() + .unwrap_or(RefreshMode::GatewayJwt(TokenSource::Env)); + return Ok((existing.clone(), refresh_mode)); } let _guard = TOKEN_INIT_LOCK.lock().await; if let Some(existing) = TOKEN_SLOT.get() { - let source = TOKEN_SOURCE.get().copied().unwrap_or(TokenSource::Env); - return Ok((existing.clone(), source)); + let refresh_mode = TOKEN_REFRESH_MODE + .get() + .cloned() + .unwrap_or(RefreshMode::GatewayJwt(TokenSource::Env)); + return Ok((existing.clone(), refresh_mode)); } let acquired = acquire_sandbox_token(endpoint, plain_channel).await?; let slot = install_token_slot(&acquired.token)?; - let _ = TOKEN_SOURCE.set(acquired.source); - Ok((slot, acquired.source)) + let _ = TOKEN_REFRESH_MODE.set(acquired.refresh_mode.clone()); + Ok((slot, acquired.refresh_mode)) } /// Resolve the sandbox JWT used to authenticate every outbound RPC. @@ -234,7 +246,7 @@ async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Resul debug!(source = "env", "loaded sandbox token"); return Ok(AcquiredToken { token: t, - source: TokenSource::Env, + refresh_mode: RefreshMode::GatewayJwt(TokenSource::Env), }); } @@ -247,7 +259,7 @@ async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Resul debug!(source = "file", path = %path, "loaded sandbox token"); return Ok(AcquiredToken { token: contents.trim().to_string(), - source: TokenSource::File, + refresh_mode: RefreshMode::GatewayJwt(TokenSource::File), }); } @@ -256,7 +268,7 @@ async fn acquire_sandbox_token(endpoint: &str, plain_channel: &Channel) -> Resul { return Ok(AcquiredToken { token: acquire_k8s_sandbox_token(endpoint, plain_channel, &sa_path).await?, - source: TokenSource::K8sServiceAccount, + refresh_mode: RefreshMode::GatewayJwt(TokenSource::K8sServiceAccount), }); } @@ -674,6 +686,7 @@ pub async fn fetch_provider_environment( environment: inner.environment, provider_env_revision: inner.provider_env_revision, credential_expires_at_ms: inner.credential_expires_at_ms, + dynamic_credentials: inner.dynamic_credentials, }) } @@ -704,6 +717,7 @@ pub struct ProviderEnvironmentResult { pub environment: HashMap, pub provider_env_revision: u64, pub credential_expires_at_ms: HashMap, + pub dynamic_credentials: HashMap, } impl CachedOpenShellClient { diff --git a/crates/openshell-sandbox/src/l7/graphql.rs b/crates/openshell-sandbox/src/l7/graphql.rs index 2ff502d1c..82c35720e 100644 --- a/crates/openshell-sandbox/src/l7/graphql.rs +++ b/crates/openshell-sandbox/src/l7/graphql.rs @@ -802,6 +802,8 @@ network_policies: cmdline_paths: Vec::new(), secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let request_info = crate::l7::L7RequestInfo { action: req.action, diff --git a/crates/openshell-sandbox/src/l7/mod.rs b/crates/openshell-sandbox/src/l7/mod.rs index 703aafae4..365bb379a 100644 --- a/crates/openshell-sandbox/src/l7/mod.rs +++ b/crates/openshell-sandbox/src/l7/mod.rs @@ -15,6 +15,7 @@ pub mod provider; pub mod relay; pub mod rest; pub mod tls; +pub(crate) mod token_grant_injection; pub(crate) mod websocket; /// Application-layer protocol for L7 inspection. diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index 9efa7ca9f..40b002535 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -40,6 +40,17 @@ pub struct L7EvalContext { pub(crate) secret_resolver: Option>, /// Anonymous activity counter channel. pub(crate) activity_tx: Option, + /// Dynamic credentials (token grants) keyed by endpoint-bound provider metadata. + pub(crate) dynamic_credentials: Option< + Arc< + std::sync::RwLock< + std::collections::HashMap, + >, + >, + >, + /// Dynamic token grant resolver for endpoint-bound credentials. + pub(crate) token_grant_resolver: + Option>, } #[derive(Default)] @@ -769,9 +780,24 @@ where let _ = &eval_target; if allowed || config.enforcement == EnforcementMode::Audit { + let req_with_auth = + match crate::l7::token_grant_injection::inject_if_needed(req, ctx).await { + Ok(req) => req, + Err(e) => { + warn!( + host = %ctx.host, + port = ctx.port, + error = %e, + "Token grant failed in L7 relay" + ); + write_bad_gateway_response(client).await?; + return Ok(()); + } + }; + // Forward request to upstream and relay response let outcome = crate::l7::rest::relay_http_request_with_options_guarded( - &req, + &req_with_auth, client, upstream, crate::l7::rest::RelayRequestOptions { @@ -802,7 +828,7 @@ where ctx, websocket_request, &redacted_target, - &req.query_params, + &req_with_auth.query_params, Some(engine), ); options.websocket.permessage_deflate = websocket_permessage_deflate; @@ -1258,11 +1284,26 @@ where ocsf_emit!(event); } + let req_with_auth = match crate::l7::token_grant_injection::inject_if_needed(req, ctx).await + { + Ok(req) => req, + Err(e) => { + warn!( + host = %ctx.host, + port = ctx.port, + error = %e, + "Token grant failed in passthrough relay" + ); + write_bad_gateway_response(client).await?; + return Ok(()); + } + }; + // Forward request with credential rewriting and relay the response. // relay_http_request_with_resolver handles both directions: it sends // the request upstream and reads the response back to the client. let outcome = crate::l7::rest::relay_http_request_with_options_guarded( - &req, + &req_with_auth, client, upstream, crate::l7::rest::RelayRequestOptions { @@ -1300,6 +1341,16 @@ where Ok(()) } +async fn write_bad_gateway_response(client: &mut W) -> Result<()> +where + W: AsyncWrite + Unpin, +{ + let response = b"HTTP/1.1 502 Bad Gateway\r\nContent-Length: 0\r\nConnection: close\r\n\r\n"; + client.write_all(response).await.into_diagnostic()?; + client.flush().await.into_diagnostic()?; + Ok(()) +} + #[cfg(test)] mod tests { use super::*; @@ -1309,6 +1360,128 @@ mod tests { const TEST_POLICY: &str = include_str!("../../data/sandbox-policy.rego"); + fn rest_token_grant_relay_context( + resolver_response: std::result::Result<&str, &str>, + ) -> ( + L7EndpointConfig, + TunnelPolicyEngine, + L7EvalContext, + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture, + ) { + let data = r#" +network_policies: + rest_api: + name: rest_api + endpoints: + - host: api.example.test + port: 8080 + protocol: rest + enforcement: enforce + rules: + - allow: + method: GET + path: "/v1/**" + binaries: + - { path: /usr/bin/curl } +"#; + let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap(); + let input = NetworkInput { + host: "api.example.test".into(), + port: 8080, + binary_path: PathBuf::from("/usr/bin/curl"), + binary_sha256: "unused".into(), + ancestors: vec![], + cmdline_paths: vec![], + }; + let (endpoint_config, generation) = engine + .query_endpoint_config_with_generation(&input) + .unwrap(); + let config = crate::l7::parse_l7_config(&endpoint_config.unwrap()).unwrap(); + let tunnel_engine = engine.clone_engine_for_tunnel(generation).unwrap(); + let provider_key = "api.example.test\t8080\t/v1/**\tprovider:access_token"; + let fixture = match resolver_response { + Ok(token) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::success( + provider_key, + token, + ) + } + Err(error) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::failure( + provider_key, + error, + ) + } + }; + let ctx = L7EvalContext { + host: "api.example.test".into(), + port: 8080, + policy_name: "rest_api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + + (config, tunnel_engine, ctx, fixture) + } + + fn passthrough_token_grant_relay_context( + resolver_response: std::result::Result<&str, &str>, + ) -> ( + PolicyGenerationGuard, + L7EvalContext, + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture, + ) { + let policy_data = "network_policies: {}\n"; + let engine = OpaEngine::from_strings(TEST_POLICY, policy_data).unwrap(); + let generation_guard = engine + .generation_guard(engine.current_generation()) + .unwrap(); + let provider_key = "api.example.test\t8080\t/v1/**\tprovider:access_token"; + let fixture = match resolver_response { + Ok(token) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::success( + provider_key, + token, + ) + } + Err(error) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::failure( + provider_key, + error, + ) + } + }; + let ctx = L7EvalContext { + host: "api.example.test".into(), + port: 8080, + policy_name: "rest_api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + + (generation_guard, ctx, fixture) + } + + fn authorization_header_count(headers: &str) -> usize { + headers + .lines() + .filter(|line| { + line.split_once(':') + .is_some_and(|(name, _)| name.eq_ignore_ascii_case("authorization")) + }) + .count() + } + #[test] fn parse_rejection_detail_adds_l7_hint_for_encoded_slash() { let detail = parse_rejection_detail( @@ -1342,6 +1515,234 @@ mod tests { ); } + #[tokio::test] + async fn l7_rest_relay_injects_token_grant_authorization_header() { + let (config, tunnel_engine, ctx, fixture) = + rest_token_grant_relay_context(Ok("grant-token")); + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_with_inspection( + &config, + tunnel_engine, + &mut relay_client, + &mut relay_upstream, + &ctx, + ) + .await + }); + + app.write_all( + b"GET /v1/projects HTTP/1.1\r\nHost: api.example.test\r\nAuthorization: Bearer stale-token\r\nConnection: close\r\n\r\n", + ) + .await + .unwrap(); + + let mut upstream_request = [0u8; 1024]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + upstream.read(&mut upstream_request), + ) + .await + .expect("request should reach upstream") + .unwrap(); + let upstream_request = String::from_utf8_lossy(&upstream_request[..n]); + + assert!(upstream_request.starts_with("GET /v1/projects HTTP/1.1\r\n")); + assert!(upstream_request.contains("Authorization: Bearer grant-token\r\n")); + assert!(!upstream_request.contains("stale-token")); + assert_eq!(authorization_header_count(&upstream_request), 1); + + upstream + .write_all(b"HTTP/1.1 204 No Content\r\nContent-Length: 0\r\nConnection: close\r\n\r\n") + .await + .unwrap(); + + let mut client_response = [0u8; 512]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + app.read(&mut client_response), + ) + .await + .expect("response should reach client") + .unwrap(); + assert!(String::from_utf8_lossy(&client_response[..n]).contains("204 No Content")); + drop(app); + + tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should finish") + .unwrap() + .unwrap(); + + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn l7_rest_relay_token_grant_failure_does_not_forward_request() { + let (config, tunnel_engine, ctx, fixture) = + rest_token_grant_relay_context(Err("oauth unavailable")); + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_with_inspection( + &config, + tunnel_engine, + &mut relay_client, + &mut relay_upstream, + &ctx, + ) + .await + }); + + app.write_all( + b"GET /v1/projects HTTP/1.1\r\nHost: api.example.test\r\nConnection: close\r\n\r\n", + ) + .await + .unwrap(); + + tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should finish") + .unwrap() + .unwrap(); + + let mut client_response = [0u8; 512]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + app.read(&mut client_response), + ) + .await + .expect("bad gateway response should reach client") + .unwrap(); + assert!(String::from_utf8_lossy(&client_response[..n]).contains("502 Bad Gateway")); + + let mut upstream_request = [0u8; 128]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + upstream.read(&mut upstream_request), + ) + .await + .expect("upstream should close without forwarded data") + .unwrap(); + assert_eq!(n, 0, "unauthenticated request must not reach upstream"); + + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn passthrough_relay_injects_token_grant_authorization_header() { + let (generation_guard, ctx, fixture) = + passthrough_token_grant_relay_context(Ok("grant-token")); + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_passthrough_with_credentials( + &mut relay_client, + &mut relay_upstream, + &ctx, + &generation_guard, + ) + .await + }); + + app.write_all( + b"GET /v1/projects HTTP/1.1\r\nHost: api.example.test\r\nAuthorization: Bearer stale-token\r\nConnection: close\r\n\r\n", + ) + .await + .unwrap(); + + let mut upstream_request = [0u8; 1024]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + upstream.read(&mut upstream_request), + ) + .await + .expect("request should reach upstream") + .unwrap(); + let upstream_request = String::from_utf8_lossy(&upstream_request[..n]); + + assert!(upstream_request.starts_with("GET /v1/projects HTTP/1.1\r\n")); + assert!(upstream_request.contains("Authorization: Bearer grant-token\r\n")); + assert!(!upstream_request.contains("stale-token")); + assert_eq!(authorization_header_count(&upstream_request), 1); + + upstream + .write_all(b"HTTP/1.1 204 No Content\r\nContent-Length: 0\r\nConnection: close\r\n\r\n") + .await + .unwrap(); + + let mut client_response = [0u8; 512]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + app.read(&mut client_response), + ) + .await + .expect("response should reach client") + .unwrap(); + assert!(String::from_utf8_lossy(&client_response[..n]).contains("204 No Content")); + drop(app); + + tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should finish") + .unwrap() + .unwrap(); + + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn passthrough_relay_token_grant_failure_returns_bad_gateway_without_forwarding() { + let (generation_guard, ctx, fixture) = + passthrough_token_grant_relay_context(Err("oauth unavailable")); + let (mut app, mut relay_client) = tokio::io::duplex(8192); + let (mut relay_upstream, mut upstream) = tokio::io::duplex(8192); + let relay = tokio::spawn(async move { + relay_passthrough_with_credentials( + &mut relay_client, + &mut relay_upstream, + &ctx, + &generation_guard, + ) + .await + }); + + app.write_all( + b"GET /v1/projects HTTP/1.1\r\nHost: api.example.test\r\nConnection: close\r\n\r\n", + ) + .await + .unwrap(); + + tokio::time::timeout(std::time::Duration::from_secs(1), relay) + .await + .expect("relay should finish") + .unwrap() + .unwrap(); + + let mut client_response = [0u8; 512]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + app.read(&mut client_response), + ) + .await + .expect("bad gateway response should reach client") + .unwrap(); + assert!(String::from_utf8_lossy(&client_response[..n]).contains("502 Bad Gateway")); + + let mut upstream_request = [0u8; 128]; + let n = tokio::time::timeout( + std::time::Duration::from_secs(1), + upstream.read(&mut upstream_request), + ) + .await + .expect("upstream should close without forwarded data") + .unwrap(); + assert_eq!(n, 0, "unauthenticated request must not reach upstream"); + + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + #[test] fn websocket_text_policy_requires_explicit_message_rule() { let data = r#" @@ -1383,6 +1784,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let request = L7RequestInfo { action: "WEBSOCKET_TEXT".into(), @@ -1439,6 +1842,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); @@ -1544,6 +1949,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: resolver.map(Arc::new), activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); @@ -1662,6 +2069,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: resolver.map(Arc::new), activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); @@ -1833,6 +2242,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); @@ -1921,6 +2332,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut app, mut relay_client) = tokio::io::duplex(8192); diff --git a/crates/openshell-sandbox/src/l7/token_grant_injection.rs b/crates/openshell-sandbox/src/l7/token_grant_injection.rs new file mode 100644 index 000000000..fd803ad2f --- /dev/null +++ b/crates/openshell-sandbox/src/l7/token_grant_injection.rs @@ -0,0 +1,794 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Endpoint-bound dynamic token grant injection for HTTP relay paths. + +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; + +use miette::{Result, miette}; +use openshell_core::proto::{ProviderCredentialTokenGrant, ProviderProfileCredential}; +use openshell_ocsf::{ + ActionId, ActivityId, DispositionId, Endpoint, HttpActivityBuilder, HttpRequest, SeverityId, + StatusId, Url as OcsfUrl, ocsf_emit, +}; +use tracing::warn; + +use crate::l7::provider::L7Request; +use crate::l7::relay::L7EvalContext; + +pub struct TokenGrantRequest<'a> { + pub provider_key: &'a str, + pub token_endpoint: &'a str, + pub jwt_svid_audience: &'a str, + pub client_assertion_type: &'a str, + pub audience: &'a str, + pub scopes: &'a [String], + pub cache_ttl_seconds: i64, +} + +pub trait TokenGrantResolver: Send + Sync { + fn obtain<'a>( + &'a self, + request: TokenGrantRequest<'a>, + ) -> Pin> + Send + 'a>>; +} + +#[derive(Default)] +pub struct SpiffeTokenGrantResolver; + +impl TokenGrantResolver for SpiffeTokenGrantResolver { + fn obtain<'a>( + &'a self, + request: TokenGrantRequest<'a>, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + crate::token_grant::obtain_provider_token( + request.provider_key, + request.token_endpoint, + request.jwt_svid_audience, + request.client_assertion_type, + request.audience, + request.scopes, + request.cache_ttl_seconds, + ) + .await + }) + } +} + +pub fn default_resolver() -> Arc { + Arc::new(SpiffeTokenGrantResolver) +} + +/// Checks for endpoint-bound token grant credentials and injects an +/// Authorization header before forwarding the request upstream. +pub async fn inject_if_needed(req: L7Request, ctx: &L7EvalContext) -> Result { + let request_path = req.target.split('?').next().unwrap_or(req.target.as_str()); + let token_grant_credential = ctx.dynamic_credentials.as_ref().and_then(|dyn_creds| { + dyn_creds.read().map_or(None, |creds_guard| { + creds_guard + .iter() + .filter_map(|(key, cred)| { + let score = + dynamic_credential_key_match_score(key, &ctx.host, ctx.port, request_path)?; + cred.token_grant + .is_some() + .then(|| (score, key.clone(), cred.clone())) + }) + .max_by_key(|(score, key, _)| (*score, key.clone())) + .map(|(_, key, cred)| (key, cred)) + }) + }); + + if let Some((provider_key, cred)) = token_grant_credential + && let Some(ref token_grant) = cred.token_grant + { + let resolver = ctx + .token_grant_resolver + .as_ref() + .ok_or_else(|| miette!("token grant resolver unavailable"))?; + let request = token_grant_request(&provider_key, token_grant); + + match resolver.obtain(request).await { + Ok(access_token) => { + let modified_raw_header = + inject_token_grant_header(&req.raw_header, &cred, &access_token)?; + let provider_key = ocsf_message_field(&provider_key); + ocsf_emit!( + HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Other) + .action(ActionId::Allowed) + .disposition(DispositionId::Allowed) + .severity(SeverityId::Informational) + .http_request(HttpRequest::new( + &req.action, + OcsfUrl::new("http", &ctx.host, request_path, ctx.port), + )) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .message(format!( + "Token grant successful for {} to {}:{}", + provider_key, ctx.host, ctx.port + )) + .build() + ); + return Ok(L7Request { + action: req.action, + target: req.target, + query_params: req.query_params, + raw_header: modified_raw_header, + body_length: req.body_length, + }); + } + Err(e) => { + warn!( + host = %ctx.host, + port = ctx.port, + provider = %provider_key, + error = %e, + "Token grant failed" + ); + let provider_key = ocsf_message_field(&provider_key); + ocsf_emit!( + HttpActivityBuilder::new(crate::ocsf_ctx()) + .activity(ActivityId::Fail) + .action(ActionId::Denied) + .disposition(DispositionId::Blocked) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .http_request(HttpRequest::new( + &req.action, + OcsfUrl::new("http", &ctx.host, request_path, ctx.port), + )) + .dst_endpoint(Endpoint::from_domain(&ctx.host, ctx.port)) + .message(format!( + "Token grant failed for {} to {}:{}: {}", + provider_key, ctx.host, ctx.port, e + )) + .build() + ); + return Err(miette!("Token grant failed: {}", e)); + } + } + } + + Ok(req) +} + +fn ocsf_message_field(value: &str) -> String { + value + .chars() + .map(|ch| if ch.is_control() { '_' } else { ch }) + .collect() +} + +fn token_grant_request<'a>( + provider_key: &'a str, + token_grant: &'a ProviderCredentialTokenGrant, +) -> TokenGrantRequest<'a> { + TokenGrantRequest { + provider_key, + token_endpoint: &token_grant.token_endpoint, + jwt_svid_audience: &token_grant.jwt_svid_audience, + client_assertion_type: &token_grant.client_assertion_type, + audience: &token_grant.audience, + scopes: &token_grant.scopes, + cache_ttl_seconds: token_grant.cache_ttl_seconds, + } +} + +#[cfg(test)] +fn dynamic_credential_key_matches(key: &str, host: &str, port: u16, request_path: &str) -> bool { + dynamic_credential_key_match_score(key, host, port, request_path).is_some() +} + +fn dynamic_credential_key_match_score( + key: &str, + host: &str, + port: u16, + request_path: &str, +) -> Option { + let mut parts = key.splitn(4, '\t'); + let endpoint_host = parts.next()?; + let endpoint_port = parts.next()?; + let endpoint_path = parts.next()?; + let _provider_key = parts.next()?; + + if endpoint_port.parse::().ok() != Some(port) { + return None; + } + + let host_lc = host.to_ascii_lowercase(); + let endpoint_host_lc = endpoint_host.to_ascii_lowercase(); + if !host_pattern_matches(&endpoint_host_lc, &host_lc) + || !crate::l7::endpoint_path_matches(endpoint_path, request_path) + { + return None; + } + + Some(host_pattern_specificity(&endpoint_host_lc) + endpoint_path_specificity(endpoint_path)) +} + +fn host_pattern_matches(pattern: &str, host: &str) -> bool { + if pattern == host { + return true; + } + if !pattern.contains('*') { + return false; + } + + let pattern_labels: Vec<&str> = pattern.split('.').collect(); + let host_labels: Vec<&str> = host.split('.').collect(); + host_pattern_labels_match(&pattern_labels, &host_labels) +} + +fn host_pattern_labels_match(pattern: &[&str], host: &[&str]) -> bool { + match pattern.split_first() { + None => host.is_empty(), + Some((label, rest)) if *label == "**" => { + host_pattern_labels_match(rest, host) + || (!host.is_empty() && host_pattern_labels_match(pattern, &host[1..])) + } + Some((label, rest)) if *label == "*" => { + !host.is_empty() && host_pattern_labels_match(rest, &host[1..]) + } + Some((literal, rest)) => { + host.first().is_some_and(|label| label == literal) + && host_pattern_labels_match(rest, &host[1..]) + } + } +} + +fn host_pattern_specificity(pattern: &str) -> u32 { + let wildcard_penalty = count_as_u32(pattern.matches('*').count()); + let label_count = count_as_u32(pattern.split('.').filter(|label| !label.is_empty()).count()); + let literal_chars = count_as_u32(pattern.chars().filter(|ch| *ch != '*').count()); + 100_000u32 + .saturating_sub(wildcard_penalty.saturating_mul(10_000)) + .saturating_add(label_count.saturating_mul(100)) + .saturating_add(literal_chars) +} + +fn endpoint_path_specificity(path: &str) -> u32 { + if path.is_empty() || path == "**" { + return 0; + } + 1_000_000u32.saturating_add(count_as_u32(path.chars().filter(|ch| *ch != '*').count())) +} + +fn count_as_u32(count: usize) -> u32 { + u32::try_from(count).unwrap_or(u32::MAX) +} + +fn inject_token_grant_header( + raw_header: &[u8], + credential: &ProviderProfileCredential, + access_token: &str, +) -> Result> { + crate::token_grant::validate_access_token(access_token)?; + let (header_name, header_value) = token_grant_header(credential, access_token)?; + inject_header(raw_header, &header_name, &header_value) +} + +fn token_grant_header( + credential: &ProviderProfileCredential, + access_token: &str, +) -> Result<(String, String)> { + match credential.auth_style.trim().to_ascii_lowercase().as_str() { + "" | "bearer" => { + let header_name = if credential.header_name.trim().is_empty() { + "Authorization" + } else { + credential.header_name.trim() + }; + validate_header_name(header_name)?; + Ok((header_name.to_string(), format!("Bearer {access_token}"))) + } + "header" => { + let header_name = credential.header_name.trim(); + if header_name.is_empty() { + return Err(miette!( + "token grant auth_style header requires header_name" + )); + } + validate_header_name(header_name)?; + Ok((header_name.to_string(), access_token.to_string())) + } + other => Err(miette!( + "token grant auth_style '{other}' is not supported; use bearer or header" + )), + } +} + +fn validate_header_name(header_name: &str) -> Result<()> { + let valid = !header_name.is_empty() + && header_name.bytes().all(|byte| { + byte.is_ascii_alphanumeric() + || matches!( + byte, + b'!' | b'#' + | b'$' + | b'%' + | b'&' + | b'\'' + | b'*' + | b'+' + | b'-' + | b'.' + | b'^' + | b'_' + | b'`' + | b'|' + | b'~' + ) + }); + if !valid { + return Err(miette!( + "token grant header_name is not a valid HTTP header name" + )); + } + match header_name.to_ascii_lowercase().as_str() { + "host" | "content-length" | "transfer-encoding" | "connection" => Err(miette!( + "token grant header_name may not override HTTP framing or connection headers" + )), + _ => Ok(()), + } +} + +fn inject_header(raw_header: &[u8], header_name: &str, header_value: &str) -> Result> { + let header_end = raw_header + .windows(4) + .position(|w| w == b"\r\n\r\n") + .ok_or_else(|| miette!("HTTP headers missing final CRLF CRLF"))?; + + let header_block = std::str::from_utf8(&raw_header[..header_end]) + .map_err(|_| miette!("HTTP headers contain invalid UTF-8"))?; + let mut lines = header_block.split("\r\n"); + let request_line = lines + .next() + .ok_or_else(|| miette!("HTTP headers missing request line"))?; + + let inserted_header = format!("{header_name}: {header_value}"); + let mut new_raw_header = Vec::with_capacity(raw_header.len() + inserted_header.len() + 2); + new_raw_header.extend_from_slice(request_line.as_bytes()); + new_raw_header.extend_from_slice(b"\r\n"); + + for line in lines { + if line.is_empty() { + break; + } + if line + .split_once(':') + .is_some_and(|(name, _)| name.trim().eq_ignore_ascii_case(header_name)) + { + continue; + } + new_raw_header.extend_from_slice(line.as_bytes()); + new_raw_header.extend_from_slice(b"\r\n"); + } + + new_raw_header.extend_from_slice(inserted_header.as_bytes()); + new_raw_header.extend_from_slice(&raw_header[header_end..]); + + Ok(new_raw_header) +} + +#[cfg(test)] +pub mod test_support { + use super::*; + use openshell_core::proto::{ProviderCredentialTokenGrant, ProviderProfileCredential}; + use std::collections::HashMap; + use std::sync::{Arc, Mutex}; + + struct FakeTokenGrantResolver { + requests: Arc>>, + response: std::result::Result, + } + + #[derive(Debug, Clone, PartialEq, Eq)] + struct OwnedTokenGrantRequest { + provider_key: String, + token_endpoint: String, + jwt_svid_audience: String, + client_assertion_type: String, + audience: String, + scopes: Vec, + cache_ttl_seconds: i64, + } + + pub struct TokenGrantTestFixture { + dynamic_credentials: Arc>>, + resolver: Arc, + requests: Arc>>, + } + + impl TokenGrantTestFixture { + pub fn success(key: &str, token: &str) -> Self { + Self::new(key, Ok(token)) + } + + pub fn failure(key: &str, error: &str) -> Self { + Self::new(key, Err(error)) + } + + fn new(key: &str, response: std::result::Result<&str, &str>) -> Self { + let requests = Arc::new(Mutex::new(Vec::new())); + let resolver = Arc::new(FakeTokenGrantResolver { + requests: requests.clone(), + response: response.map(str::to_string).map_err(str::to_string), + }); + + let mut dynamic_credentials = HashMap::new(); + dynamic_credentials.insert( + key.to_string(), + ProviderProfileCredential { + name: "access_token".to_string(), + auth_style: "bearer".to_string(), + header_name: "Authorization".to_string(), + token_grant: Some(token_grant()), + ..Default::default() + }, + ); + + Self { + dynamic_credentials: Arc::new(std::sync::RwLock::new(dynamic_credentials)), + resolver, + requests, + } + } + + pub fn dynamic_credentials( + &self, + ) -> Arc>> { + self.dynamic_credentials.clone() + } + + pub fn resolver(&self) -> Arc { + self.resolver.clone() + } + + pub fn assert_one_request(&self, expected_provider_key: &str) { + let requests = self + .requests + .lock() + .expect("fake token grant requests lock poisoned"); + assert_eq!(requests.len(), 1); + + let request = &requests[0]; + assert_eq!(request.provider_key, expected_provider_key); + assert_eq!(request.token_endpoint, "https://auth.example.com/token"); + assert_eq!(request.jwt_svid_audience, "https://auth.example.com"); + assert_eq!( + request.client_assertion_type, + "urn:ietf:params:oauth:client-assertion-type:jwt-bearer" + ); + assert_eq!(request.audience, "api://example"); + assert_eq!(request.scopes, ["read"]); + assert_eq!(request.cache_ttl_seconds, 300); + } + } + + fn token_grant() -> ProviderCredentialTokenGrant { + ProviderCredentialTokenGrant { + token_endpoint: "https://auth.example.com/token".to_string(), + audience: "api://example".to_string(), + jwt_svid_audience: "https://auth.example.com".to_string(), + client_assertion_type: "urn:ietf:params:oauth:client-assertion-type:jwt-bearer" + .to_string(), + scopes: vec!["read".to_string()], + cache_ttl_seconds: 300, + audience_overrides: Vec::new(), + } + } + + impl TokenGrantResolver for FakeTokenGrantResolver { + fn obtain<'a>( + &'a self, + request: TokenGrantRequest<'a>, + ) -> Pin> + Send + 'a>> { + let owned = OwnedTokenGrantRequest { + provider_key: request.provider_key.to_string(), + token_endpoint: request.token_endpoint.to_string(), + jwt_svid_audience: request.jwt_svid_audience.to_string(), + client_assertion_type: request.client_assertion_type.to_string(), + audience: request.audience.to_string(), + scopes: request.scopes.to_vec(), + cache_ttl_seconds: request.cache_ttl_seconds, + }; + Box::pin(async move { + self.requests + .lock() + .expect("fake token grant requests lock poisoned") + .push(owned); + self.response.clone().map_err(|err| miette!("{err}")) + }) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::l7::provider::{BodyLength, L7Request}; + use crate::l7::token_grant_injection::test_support::TokenGrantTestFixture; + + fn credential(auth_style: &str, header_name: &str) -> ProviderProfileCredential { + ProviderProfileCredential { + auth_style: auth_style.to_string(), + header_name: header_name.to_string(), + ..Default::default() + } + } + + #[test] + fn dynamic_credential_key_matches_endpoint_host_port_and_path() { + let key = "api.example.com\t443\t/repos/**\tgithub:access_token"; + + assert!(dynamic_credential_key_matches( + key, + "api.example.com", + 443, + "/repos/owner/repo" + )); + assert!(!dynamic_credential_key_matches( + key, + "uploads.example.com", + 443, + "/repos/owner/repo" + )); + assert!(!dynamic_credential_key_matches( + key, + "api.example.com", + 80, + "/repos/owner/repo" + )); + assert!(!dynamic_credential_key_matches( + key, + "api.example.com", + 443, + "/orgs/owner" + )); + } + + #[test] + fn dynamic_credential_key_matches_wildcard_hosts_and_empty_path() { + let key = "*.example.com\t443\t\tprovider:access_token"; + + assert!(dynamic_credential_key_matches( + key, + "api.example.com", + 443, + "/anything" + )); + assert!(!dynamic_credential_key_matches( + key, + "api.other.com", + 443, + "/anything" + )); + assert!(!dynamic_credential_key_matches( + key, + "nested.api.example.com", + 443, + "/anything" + )); + } + + #[test] + fn dynamic_credential_key_matches_double_wildcard_hosts() { + let key = "**.example.com\t443\t\tprovider:access_token"; + + assert!(dynamic_credential_key_matches( + key, + "api.example.com", + 443, + "/anything" + )); + assert!(dynamic_credential_key_matches( + key, + "nested.api.example.com", + 443, + "/anything" + )); + } + + #[test] + fn dynamic_credential_match_score_prefers_path_specific_key() { + let default_key = "alpha.default.svc.cluster.local\t80\t\tprovider:access_token"; + let path_key = "alpha.default.svc.cluster.local\t80\t/admin/**\tprovider:access_token"; + let request_path = "/admin/users"; + + let default_score = dynamic_credential_key_match_score( + default_key, + "alpha.default.svc.cluster.local", + 80, + request_path, + ) + .expect("default key should match"); + let path_score = dynamic_credential_key_match_score( + path_key, + "alpha.default.svc.cluster.local", + 80, + request_path, + ) + .expect("path key should match"); + + assert!(path_score > default_score); + } + + #[test] + fn inject_token_grant_header_replaces_existing_authorization() { + let raw = b"GET /v1 HTTP/1.1\r\nHost: api.example.com\r\nauthorization: Bearer stale-token\r\nAccept: application/json\r\n\r\n"; + + let rewritten = + inject_token_grant_header(raw, &credential("bearer", "Authorization"), "grant-token") + .expect("header should rewrite"); + let rewritten = String::from_utf8(rewritten).expect("rewritten header should be UTF-8"); + + assert!(rewritten.contains("Authorization: Bearer grant-token\r\n")); + assert!(!rewritten.contains("stale-token")); + assert_eq!( + rewritten + .lines() + .filter(|line| line + .split_once(':') + .is_some_and(|(name, _)| name.eq_ignore_ascii_case("authorization"))) + .count(), + 1 + ); + } + + #[test] + fn inject_token_grant_header_replaces_existing_authorization_with_ows_before_colon() { + let raw = b"GET /v1 HTTP/1.1\r\nHost: api.example.com\r\nAuthorization : Bearer stale-token\r\nAccept: application/json\r\n\r\n"; + + let rewritten = + inject_token_grant_header(raw, &credential("bearer", "Authorization"), "grant-token") + .expect("header should rewrite"); + let rewritten = String::from_utf8(rewritten).expect("rewritten header should be UTF-8"); + + assert!(rewritten.contains("Authorization: Bearer grant-token\r\n")); + assert!(!rewritten.contains("stale-token")); + assert_eq!( + rewritten + .lines() + .filter(|line| line + .split_once(':') + .is_some_and(|(name, _)| name.trim().eq_ignore_ascii_case("authorization"))) + .count(), + 1 + ); + } + + #[test] + fn token_grant_header_rejects_framing_and_connection_headers() { + for header_name in ["Host", "Content-Length", "Transfer-Encoding", "Connection"] { + let err = token_grant_header(&credential("header", header_name), "grant-token") + .expect_err("framing header override should be rejected"); + assert_eq!( + err.to_string(), + "token grant header_name may not override HTTP framing or connection headers" + ); + } + } + + #[test] + fn inject_token_grant_header_preserves_header_terminator_before_body() { + let raw = b"POST /v1 HTTP/1.1\r\nHost: api.example.com\r\nContent-Length: 2\r\n\r\nOK"; + + let rewritten = inject_token_grant_header(raw, &credential("bearer", ""), "grant-token") + .expect("header should rewrite"); + + assert_eq!( + rewritten, + b"POST /v1 HTTP/1.1\r\nHost: api.example.com\r\nContent-Length: 2\r\nAuthorization: Bearer grant-token\r\n\r\nOK" + ); + } + + #[test] + fn inject_token_grant_header_uses_custom_header_style() { + let raw = b"GET /v1 HTTP/1.1\r\nHost: api.example.com\r\nX-Api-Token: stale-token\r\n\r\n"; + + let rewritten = + inject_token_grant_header(raw, &credential("header", "X-Api-Token"), "grant-token") + .expect("header should rewrite"); + let rewritten = String::from_utf8(rewritten).expect("rewritten header should be UTF-8"); + + assert!(rewritten.contains("X-Api-Token: grant-token\r\n")); + assert!(!rewritten.contains("stale-token")); + assert!(!rewritten.contains("Bearer grant-token")); + } + + #[test] + fn inject_token_grant_header_rejects_malformed_access_token() { + let raw = b"GET /v1 HTTP/1.1\r\nHost: api.example.com\r\n\r\n"; + + let err = inject_token_grant_header( + raw, + &credential("bearer", "Authorization"), + "grant-token\r\nX-Injected: yes", + ) + .expect_err("malformed token must not be injected"); + + assert_eq!( + err.to_string(), + "token grant returned a malformed access token" + ); + } + + #[tokio::test] + async fn inject_if_needed_uses_configured_resolver() { + let fixture = TokenGrantTestFixture::success( + "api.example.com\t443\t/v1/**\tprovider:access_token", + "grant-token", + ); + + let ctx = L7EvalContext { + host: "api.example.com".into(), + port: 443, + policy_name: "api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + let req = L7Request { + action: "GET".to_string(), + target: "/v1/projects".to_string(), + query_params: std::collections::HashMap::new(), + raw_header: b"GET /v1/projects HTTP/1.1\r\nHost: api.example.com\r\n\r\n".to_vec(), + body_length: BodyLength::None, + }; + + let rewritten = inject_if_needed(req, &ctx) + .await + .expect("fake token grant should inject"); + let rewritten = + String::from_utf8(rewritten.raw_header).expect("rewritten request should be UTF-8"); + + assert!(rewritten.contains("Authorization: Bearer grant-token\r\n")); + fixture.assert_one_request("api.example.com\t443\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn inject_if_needed_rejects_malformed_resolver_token() { + let fixture = TokenGrantTestFixture::success( + "api.example.com\t443\t/v1/**\tprovider:access_token", + "grant-token\r\nX-Injected: yes", + ); + + let ctx = L7EvalContext { + host: "api.example.com".into(), + port: 443, + policy_name: "api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + let req = L7Request { + action: "GET".to_string(), + target: "/v1/projects".to_string(), + query_params: std::collections::HashMap::new(), + raw_header: b"GET /v1/projects HTTP/1.1\r\nHost: api.example.com\r\n\r\n".to_vec(), + body_length: BodyLength::None, + }; + + let err = inject_if_needed(req, &ctx) + .await + .expect_err("malformed resolver token should fail closed"); + + assert_eq!( + err.to_string(), + "token grant returned a malformed access token" + ); + fixture.assert_one_request("api.example.com\t443\t/v1/**\tprovider:access_token"); + } +} diff --git a/crates/openshell-sandbox/src/l7/websocket.rs b/crates/openshell-sandbox/src/l7/websocket.rs index 89a6e6c51..79c820e26 100644 --- a/crates/openshell-sandbox/src/l7/websocket.rs +++ b/crates/openshell-sandbox/src/l7/websocket.rs @@ -1271,6 +1271,8 @@ network_policies: cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let (mut client_write, mut relay_read) = tokio::io::duplex(MAX_TEXT_MESSAGE_BYTES + 1024); let (mut relay_write, mut upstream_read) = tokio::io::duplex(MAX_TEXT_MESSAGE_BYTES + 1024); diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index fa4654243..16f6138a4 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -25,8 +25,10 @@ pub mod proxy; mod sandbox; mod secrets; mod skills; +mod spiffe_endpoint; mod ssh; mod supervisor_session; +mod token_grant; use miette::{IntoDiagnostic, Result}; #[cfg(target_os = "linux")] @@ -370,57 +372,65 @@ pub async fn run_sandbox( // Fetch provider environment variables from the server. // This is done after loading the policy so the sandbox can still start // even if provider env fetch fails (graceful degradation). - let (provider_env_revision, provider_env, provider_credential_expires_at_ms) = - if let (Some(id), Some(endpoint)) = (&sandbox_id, &openshell_endpoint) { - match grpc_client::fetch_provider_environment(endpoint, id).await { - Ok(result) => { - ocsf_emit!( - ConfigStateChangeBuilder::new(ocsf_ctx()) - .severity(SeverityId::Informational) - .status(StatusId::Success) - .state(StateId::Enabled, "loaded") - .message(format!( - "Fetched provider environment [env_count:{}]", - result.environment.len() - )) - .build() - ); - ( - result.provider_env_revision, - result.environment, - result.credential_expires_at_ms, - ) - } - Err(e) => { - ocsf_emit!( - ConfigStateChangeBuilder::new(ocsf_ctx()) - .severity(SeverityId::Medium) - .status(StatusId::Failure) - .state(StateId::Other, "degraded") - .message(format!( - "Failed to fetch provider environment, continuing without: {e}" - )) - .build() - ); - ( - 0, - std::collections::HashMap::new(), - std::collections::HashMap::new(), - ) - } + let ( + provider_env_revision, + provider_env, + provider_credential_expires_at_ms, + dynamic_credentials, + ) = if let (Some(id), Some(endpoint)) = (&sandbox_id, &openshell_endpoint) { + match grpc_client::fetch_provider_environment(endpoint, id).await { + Ok(result) => { + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Informational) + .status(StatusId::Success) + .state(StateId::Enabled, "loaded") + .message(format!( + "Fetched provider environment [env_count:{}]", + result.environment.len() + )) + .build() + ); + ( + result.provider_env_revision, + result.environment, + result.credential_expires_at_ms, + result.dynamic_credentials, + ) } - } else { - ( - 0, - std::collections::HashMap::new(), - std::collections::HashMap::new(), - ) - }; + Err(e) => { + ocsf_emit!( + ConfigStateChangeBuilder::new(ocsf_ctx()) + .severity(SeverityId::Medium) + .status(StatusId::Failure) + .state(StateId::Other, "degraded") + .message(format!( + "Failed to fetch provider environment, continuing without: {e}" + )) + .build() + ); + ( + 0, + std::collections::HashMap::new(), + std::collections::HashMap::new(), + std::collections::HashMap::new(), + ) + } + } + } else { + ( + 0, + std::collections::HashMap::new(), + std::collections::HashMap::new(), + std::collections::HashMap::new(), + ) + }; let provider_credentials = provider_credentials::ProviderCredentialState::from_environment( provider_env_revision, provider_env, provider_credential_expires_at_ms, + dynamic_credentials, ); let provider_env = provider_credentials.snapshot().child_env.clone(); @@ -597,6 +607,13 @@ pub async fn run_sandbox( #[allow(clippy::no_effect_underscore_binding)] let _netns: Option<()> = None; + // Prepare the child-only mount namespace before the supervisor seccomp + // prelude blocks mount operations. Children enter this namespace with + // `setns` in pre_exec so supervisor identity sockets stay hidden from + // untrusted code while remaining available to the supervisor for refresh. + #[cfg(target_os = "linux")] + process::prepare_supervisor_identity_mount_namespace_from_env()?; + // Install the supervisor seccomp prelude after privileged startup helpers // (network namespace setup, nftables probes) complete, but before the SSH // listener and workload process are exposed. @@ -2631,6 +2648,7 @@ async fn run_policy_poll_loop(ctx: PolicyPollLoopContext) -> Result<()> { env_result.provider_env_revision, env_result.environment, env_result.credential_expires_at_ms, + env_result.dynamic_credentials, ); current_provider_env_revision = env_result.provider_env_revision; ocsf_emit!( diff --git a/crates/openshell-sandbox/src/process.rs b/crates/openshell-sandbox/src/process.rs index 76786a84d..9f9c05097 100644 --- a/crates/openshell-sandbox/src/process.rs +++ b/crates/openshell-sandbox/src/process.rs @@ -16,14 +16,40 @@ use nix::unistd::{Group, Pid, User}; use std::collections::HashMap; use std::ffi::CString; #[cfg(target_os = "linux")] -use std::os::unix::io::RawFd; +use std::os::fd::{AsRawFd, OwnedFd, RawFd}; +#[cfg(target_os = "linux")] +use std::os::unix::ffi::OsStrExt; +#[cfg(any(test, target_os = "linux"))] +use std::path::Path; use std::path::PathBuf; use std::process::Stdio; +#[cfg(target_os = "linux")] +use std::sync::OnceLock; use tokio::process::{Child, Command}; use tracing::debug; +const SUPERVISOR_ONLY_ENV_VARS: &[&str] = &[ + openshell_core::sandbox_env::SANDBOX_TOKEN, + openshell_core::sandbox_env::SANDBOX_TOKEN_FILE, + openshell_core::sandbox_env::K8S_SA_TOKEN_FILE, + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET, +]; + +pub fn is_supervisor_only_env_var(key: &str) -> bool { + SUPERVISOR_ONLY_ENV_VARS.contains(&key) +} + +fn strip_supervisor_only_env(cmd: &mut Command) { + for key in SUPERVISOR_ONLY_ENV_VARS { + cmd.env_remove(key); + } +} + fn inject_provider_env(cmd: &mut Command, provider_env: &HashMap) { for (key, value) in provider_env { + if is_supervisor_only_env_var(key) { + continue; + } cmd.env(key, value); } } @@ -129,6 +155,234 @@ fn parse_pids_max(contents: &str) -> RuntimePidLimitStatus { } } +// Pins the pre-seccomp child mount namespace where supervisor identity sockets +// are shadowed. Children enter it with setns before dropping privileges. +#[cfg(target_os = "linux")] +static SUPERVISOR_IDENTITY_MOUNT_NS: OnceLock> = + OnceLock::new(); + +#[cfg(target_os = "linux")] +pub struct SupervisorIdentityMountNamespace { + fd: OwnedFd, +} + +#[cfg(target_os = "linux")] +type SupervisorIdentityNsRef = &'static SupervisorIdentityMountNamespace; + +#[cfg(target_os = "linux")] +impl SupervisorIdentityMountNamespace { + fn from_socket_path(socket_path: &str) -> Result> { + let Some(target) = supervisor_identity_mount_target(socket_path)? else { + return Ok(None); + }; + Ok(Some(Self { + fd: create_supervisor_identity_mount_namespace(&target)?, + })) + } + + pub fn enter_for_child(&self) -> std::io::Result<()> { + set_mount_namespace(self.fd.as_raw_fd()) + } +} + +#[cfg(target_os = "linux")] +pub fn prepare_supervisor_identity_mount_namespace_from_env() -> Result<()> { + if SUPERVISOR_IDENTITY_MOUNT_NS.get().is_some() { + return Ok(()); + } + + let Some((_env_name, socket_path)) = supervisor_identity_socket_path_from_env() else { + let _ = SUPERVISOR_IDENTITY_MOUNT_NS.set(None); + return Ok(()); + }; + let namespace = SupervisorIdentityMountNamespace::from_socket_path(&socket_path)?; + let _ = SUPERVISOR_IDENTITY_MOUNT_NS.set(namespace); + Ok(()) +} + +#[cfg(target_os = "linux")] +pub fn supervisor_identity_mount_from_env() -> Result> { + let Some(namespace) = SUPERVISOR_IDENTITY_MOUNT_NS.get() else { + if supervisor_identity_socket_path_from_env().is_some() { + return Err(miette::miette!( + "supervisor identity mount namespace was not prepared before startup hardening" + )); + } + return Ok(None); + }; + Ok(namespace.as_ref()) +} + +#[cfg(target_os = "linux")] +fn supervisor_identity_socket_path_from_env() -> Option<(&'static str, String)> { + std::env::var(openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET) + .ok() + .filter(|socket_path| !socket_path.trim().is_empty()) + .map(|socket_path| { + ( + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET, + socket_path, + ) + }) +} + +#[cfg(any(test, target_os = "linux"))] +fn supervisor_identity_mount_target(socket_path: &str) -> Result> { + let trimmed = socket_path.trim(); + if trimmed.is_empty() { + return Ok(None); + } + if trimmed.starts_with("tcp:") { + return Err(miette::miette!( + "{} must be a UNIX socket path so sandbox child processes can hide it", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + )); + } + let path = trimmed.strip_prefix("unix:").unwrap_or(trimmed); + let path = Path::new(path); + if !path.is_absolute() { + return Err(miette::miette!( + "{} must be an absolute UNIX socket path", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + )); + } + let Some(parent) = path.parent() else { + return Err(miette::miette!( + "{} has no parent directory", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + )); + }; + if parent == Path::new("/") { + return Err(miette::miette!( + "{} must live below a dedicated directory, not directly under /", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + )); + } + if is_shared_root_mount_shadow(parent) { + return Err(miette::miette!( + "{} must live below a dedicated subdirectory; refusing to hide shared directory {}", + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET, + parent.display() + )); + } + Ok(Some(parent.to_path_buf())) +} + +#[cfg(any(test, target_os = "linux"))] +fn is_shared_root_mount_shadow(parent: &Path) -> bool { + matches!(parent.to_str(), Some("/run" | "/var" | "/tmp" | "/etc")) +} + +#[cfg(target_os = "linux")] +fn cstring_path(path: &Path) -> Result { + CString::new(path.as_os_str().as_bytes()) + .map_err(|_| miette::miette!("path contains an interior NUL byte: {}", path.display())) +} + +#[cfg(target_os = "linux")] +fn create_supervisor_identity_mount_namespace(target: &Path) -> Result { + let original_ns = open_current_mount_namespace() + .map_err(|err| miette::miette!("failed to open original mount namespace: {err}"))?; + + private_mount_namespace() + .map_err(|err| miette::miette!("failed to create supervisor identity namespace: {err}"))?; + + let target = cstring_path(target)?; + let result = (|| -> Result { + mount_empty_tmpfs(&target).map_err(|err| { + miette::miette!("failed to hide supervisor identity mount from child namespace: {err}") + })?; + open_current_mount_namespace() + .map_err(|err| miette::miette!("failed to open sanitized mount namespace: {err}")) + })(); + + set_mount_namespace(original_ns.as_raw_fd()).map_err(|restore_err| { + let result_msg = result.as_ref().err().map_or_else( + || "sanitized namespace was created".to_string(), + ToString::to_string, + ); + miette::miette!( + "failed to restore original mount namespace after supervisor identity isolation setup: \ + {restore_err}; setup result: {result_msg}" + ) + })?; + + result +} + +#[cfg(target_os = "linux")] +fn open_current_mount_namespace() -> std::io::Result { + let file = std::fs::File::open("/proc/thread-self/ns/mnt")?; + Ok(file.into()) +} + +#[cfg(target_os = "linux")] +fn private_mount_namespace() -> std::io::Result<()> { + #[allow(unsafe_code)] + let rc = unsafe { libc::unshare(libc::CLONE_NEWNS) }; + if rc != 0 { + return Err(std::io::Error::other(format!( + "failed to create private mount namespace: {}", + std::io::Error::last_os_error() + ))); + } + + #[allow(unsafe_code)] + let rc = unsafe { + let flags: libc::c_ulong = libc::MS_REC | libc::MS_PRIVATE; + libc::mount( + std::ptr::null(), + c"/".as_ptr(), + std::ptr::null(), + flags, + std::ptr::null(), + ) + }; + if rc != 0 { + return Err(std::io::Error::other(format!( + "failed to mark mount namespace private: {}", + std::io::Error::last_os_error() + ))); + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn set_mount_namespace(fd: RawFd) -> std::io::Result<()> { + #[allow(unsafe_code)] + let rc = unsafe { libc::setns(fd, libc::CLONE_NEWNS) }; + if rc != 0 { + return Err(std::io::Error::other(format!( + "failed to enter mount namespace: {}", + std::io::Error::last_os_error() + ))); + } + Ok(()) +} + +#[cfg(target_os = "linux")] +fn mount_empty_tmpfs(target: &CString) -> std::io::Result<()> { + #[allow(unsafe_code)] + let rc = unsafe { + let flags: libc::c_ulong = + libc::MS_NOSUID | libc::MS_NODEV | libc::MS_NOEXEC | libc::MS_RDONLY; + libc::mount( + c"tmpfs".as_ptr(), + target.as_ptr(), + c"tmpfs".as_ptr(), + flags, + c"mode=0555,size=4k".as_ptr().cast(), + ) + }; + if rc != 0 { + return Err(std::io::Error::other(format!( + "failed to hide supervisor identity mount from child process: {}", + std::io::Error::last_os_error() + ))); + } + Ok(()) +} + /// Handle to a running process. pub struct ProcessHandle { child: Child, @@ -211,14 +465,11 @@ impl ProcessHandle { .kill_on_drop(true) .env(openshell_core::sandbox_env::SANDBOX, "1"); - // Strip supervisor-only credentials from the entrypoint's inherited - // environment. The entrypoint drops to the sandbox user before - // `exec`; without this strip, anything running as the sandbox user - // (e.g. an SSH-spawned shell) could read /proc//environ - // and recover the gateway-minted JWT. Issue #1354. - cmd.env_remove(openshell_core::sandbox_env::SANDBOX_TOKEN) - .env_remove(openshell_core::sandbox_env::SANDBOX_TOKEN_FILE) - .env_remove(openshell_core::sandbox_env::K8S_SA_TOKEN_FILE); + // Strip supervisor-only identity material from the entrypoint's + // inherited environment. The entrypoint drops to the sandbox user + // before `exec`; without this strip, sandbox code could recover + // supervisor credentials from its inherited environment. + strip_supervisor_only_env(&mut cmd); inject_provider_env(&mut cmd, provider_env); @@ -269,6 +520,10 @@ impl ProcessHandle { #[cfg(target_os = "linux")] let prepared_sandbox = sandbox::linux::prepare(policy, workdir) .map_err(|err| miette::miette!("Failed to prepare sandbox: {err}"))?; + #[cfg(target_os = "linux")] + let supervisor_identity_mount = supervisor_identity_mount_from_env().map_err(|err| { + miette::miette!("Failed to prepare supervisor identity isolation: {err}") + })?; // Set up process group for signal handling (non-interactive mode only). // In interactive mode, we inherit the parent's process group to maintain @@ -297,6 +552,11 @@ impl ProcessHandle { } } + #[cfg(target_os = "linux")] + if let Some(mount) = supervisor_identity_mount { + mount.enter_for_child()?; + } + // Drop privileges. initgroups/setgid/setuid need access to // /etc/group and /etc/passwd which would be blocked if // Landlock were already enforced. @@ -346,14 +606,9 @@ impl ProcessHandle { .kill_on_drop(true) .env(openshell_core::sandbox_env::SANDBOX, "1"); - // Strip supervisor-only credentials from the entrypoint's inherited - // environment. The entrypoint drops to the sandbox user before - // `exec`; without this strip, anything running as the sandbox user - // (e.g. an SSH-spawned shell) could read /proc//environ - // and recover the gateway-minted JWT. Issue #1354. - cmd.env_remove(openshell_core::sandbox_env::SANDBOX_TOKEN) - .env_remove(openshell_core::sandbox_env::SANDBOX_TOKEN_FILE) - .env_remove(openshell_core::sandbox_env::K8S_SA_TOKEN_FILE); + // Strip supervisor-only identity material from the entrypoint's + // inherited environment. + strip_supervisor_only_env(&mut cmd); inject_provider_env(&mut cmd, provider_env); @@ -935,4 +1190,115 @@ mod tests { let stdout = String::from_utf8(output.stdout).expect("utf8"); assert!(stdout.contains("ANTHROPIC_API_KEY=openshell:resolve:env:ANTHROPIC_API_KEY")); } + + #[tokio::test] + async fn inject_provider_env_skips_supervisor_identity_material() { + let mut cmd = Command::new("/usr/bin/env"); + cmd.env_clear() + .stdin(StdStdio::null()) + .stdout(StdStdio::piped()) + .stderr(StdStdio::null()); + + let provider_env = HashMap::from([ + ( + "ANTHROPIC_API_KEY".to_string(), + "openshell:resolve:env:ANTHROPIC_API_KEY".to_string(), + ), + ( + openshell_core::sandbox_env::SANDBOX_TOKEN.to_string(), + "provider-token".to_string(), + ), + ( + openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET.to_string(), + "/spiffe-workload-api/spire-agent.sock".to_string(), + ), + ]); + + inject_provider_env(&mut cmd, &provider_env); + + let output = cmd.output().await.expect("spawn env"); + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).expect("utf8"); + assert!(stdout.contains("ANTHROPIC_API_KEY=openshell:resolve:env:ANTHROPIC_API_KEY")); + assert!(!stdout.contains(openshell_core::sandbox_env::SANDBOX_TOKEN)); + assert!(!stdout.contains(openshell_core::sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET)); + } + + #[tokio::test] + async fn strip_supervisor_only_env_removes_identity_material() { + let mut cmd = Command::new("/usr/bin/env"); + cmd.stdin(StdStdio::null()) + .stdout(StdStdio::piped()) + .stderr(StdStdio::null()) + .env("OPENSHELL_ENDPOINT", "https://gateway.example.test"); + + for key in SUPERVISOR_ONLY_ENV_VARS { + cmd.env(key, format!("{key}-secret")); + } + + strip_supervisor_only_env(&mut cmd); + + let output = cmd.output().await.expect("spawn env"); + assert!(output.status.success()); + let stdout = String::from_utf8(output.stdout).expect("utf8"); + + for key in SUPERVISOR_ONLY_ENV_VARS { + assert!( + !stdout + .lines() + .any(|line| line.starts_with(&format!("{key}="))), + "{key} must not be inherited by sandbox child processes" + ); + } + assert!(stdout.contains("OPENSHELL_ENDPOINT=https://gateway.example.test")); + } + + #[test] + fn supervisor_identity_mount_target_uses_socket_parent() { + assert_eq!( + supervisor_identity_mount_target("/spiffe-workload-api/spire-agent.sock") + .expect("plain path should parse"), + Some(PathBuf::from("/spiffe-workload-api")) + ); + assert_eq!( + supervisor_identity_mount_target("unix:/spiffe-workload-api/spire-agent.sock") + .expect("unix path should parse"), + Some(PathBuf::from("/spiffe-workload-api")) + ); + } + + #[test] + fn supervisor_identity_mount_target_ignores_empty_socket_path() { + assert_eq!( + supervisor_identity_mount_target(" ").expect("empty path should be ignored"), + None + ); + } + + #[test] + fn supervisor_identity_mount_target_rejects_unhideable_endpoints() { + assert!(supervisor_identity_mount_target("tcp:127.0.0.1:8081").is_err()); + assert!(supervisor_identity_mount_target("spiffe-workload-api/spire-agent.sock").is_err()); + assert!(supervisor_identity_mount_target("/spire-agent.sock").is_err()); + } + + #[test] + fn supervisor_identity_mount_target_rejects_shared_root_shadowing() { + for socket_path in [ + "/run/spire-agent.sock", + "/var/spire-agent.sock", + "/tmp/spire-agent.sock", + "/etc/spire-agent.sock", + ] { + let err = supervisor_identity_mount_target(socket_path) + .expect_err("shared root shadowing should be rejected"); + assert!(err.to_string().contains("dedicated subdirectory")); + } + + assert_eq!( + supervisor_identity_mount_target("/run/spire/spire-agent.sock") + .expect("dedicated subdirectory should be accepted"), + Some(PathBuf::from("/run/spire")) + ); + } } diff --git a/crates/openshell-sandbox/src/provider_credentials.rs b/crates/openshell-sandbox/src/provider_credentials.rs index ae91e8d6e..acd116a83 100644 --- a/crates/openshell-sandbox/src/provider_credentials.rs +++ b/crates/openshell-sandbox/src/provider_credentials.rs @@ -13,6 +13,7 @@ const MAX_RETAINED_CREDENTIAL_GENERATIONS: usize = 8; pub struct ProviderCredentialSnapshot { pub revision: u64, pub child_env: HashMap, + pub dynamic_credentials: HashMap, } #[derive(Debug)] @@ -33,6 +34,7 @@ impl ProviderCredentialState { revision: u64, env: HashMap, credential_expires_at_ms: HashMap, + dynamic_credentials: HashMap, ) -> Self { let (child_env, generation_resolver, current_resolver) = SecretResolver::from_provider_env_for_current_revision( @@ -43,6 +45,7 @@ impl ProviderCredentialState { let snapshot = Arc::new(ProviderCredentialSnapshot { revision, child_env, + dynamic_credentials, }); let generations: VecDeque<_> = generation_resolver.map(Arc::new).into_iter().collect(); let current_resolver = current_resolver.map(Arc::new); @@ -79,6 +82,7 @@ impl ProviderCredentialState { revision: u64, env: HashMap, credential_expires_at_ms: HashMap, + dynamic_credentials: HashMap, ) -> usize { let (child_env, generation_resolver, current_resolver) = SecretResolver::from_provider_env_for_current_revision( @@ -94,6 +98,7 @@ impl ProviderCredentialState { inner.current = Arc::new(ProviderCredentialSnapshot { revision, child_env, + dynamic_credentials, }); inner.current_resolver = current_resolver.map(Arc::new); @@ -132,6 +137,7 @@ mod tests { 10, HashMap::from([("GITHUB_TOKEN".to_string(), "old".to_string())]), HashMap::new(), + HashMap::new(), ); let first = state.snapshot(); assert_eq!( @@ -143,6 +149,7 @@ mod tests { 11, HashMap::from([("GITHUB_TOKEN".to_string(), "new".to_string())]), HashMap::new(), + HashMap::new(), ); let second = state.snapshot(); assert_eq!( @@ -175,9 +182,10 @@ mod tests { 10, HashMap::from([("GITHUB_TOKEN".to_string(), "old".to_string())]), HashMap::new(), + HashMap::new(), ); - state.install_environment(11, HashMap::new(), HashMap::new()); + state.install_environment(11, HashMap::new(), HashMap::new(), HashMap::new()); assert!(state.snapshot().child_env.is_empty()); let resolver = state.resolver().expect("old resolver retained"); @@ -208,12 +216,14 @@ mod tests { 10, HashMap::from([("GITHUB_TOKEN".to_string(), "old".to_string())]), HashMap::from([("GITHUB_TOKEN".to_string(), now_ms - 1_000)]), + HashMap::new(), ); state.install_environment( 11, HashMap::from([("GITHUB_TOKEN".to_string(), "new".to_string())]), HashMap::from([("GITHUB_TOKEN".to_string(), now_ms + 60_000)]), + HashMap::new(), ); let resolver = state.resolver().expect("resolver"); diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index aa8338433..39620657d 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -242,6 +242,11 @@ impl ProxyHandle { let resolver = provider_credentials .as_ref() .and_then(ProviderCredentialState::resolver); + let dynamic_credentials = provider_credentials.as_ref().map(|state| { + Arc::new(std::sync::RwLock::new( + state.snapshot().dynamic_credentials.clone(), + )) + }); let dtx = denial_tx.clone(); let atx = activity_tx.clone(); tokio::spawn(async move { @@ -255,6 +260,7 @@ impl ProxyHandle { policy_local, gw, resolver, + dynamic_credentials, dtx, atx, ) @@ -411,6 +417,13 @@ async fn handle_tcp_connection( policy_local_ctx: Option>, trusted_host_gateway: Arc>, secret_resolver: Option>, + dynamic_credentials: Option< + Arc< + std::sync::RwLock< + std::collections::HashMap, + >, + >, + >, denial_tx: Option>, activity_tx: Option, ) -> Result<()> { @@ -458,6 +471,7 @@ async fn handle_tcp_connection( policy_local_ctx, trusted_host_gateway, secret_resolver, + dynamic_credentials, denial_tx.as_ref(), activity_tx.as_ref(), ) @@ -953,6 +967,10 @@ async fn handle_tcp_connection( .collect(), secret_resolver: secret_resolver.clone(), activity_tx: activity_tx.clone(), + dynamic_credentials: dynamic_credentials.clone(), + token_grant_resolver: dynamic_credentials + .as_ref() + .map(|_| crate::l7::token_grant_injection::default_resolver()), }; if effective_tls_skip { @@ -2908,6 +2926,33 @@ where .await } +async fn inject_token_grant_for_forward_request( + method: &str, + upstream_target: &str, + forward_request_bytes: Vec, + l7_ctx: &crate::l7::relay::L7EvalContext, +) -> Result> { + let header_end = forward_request_bytes + .windows(4) + .position(|w| w == b"\r\n\r\n") + .map_or(forward_request_bytes.len(), |p| p + 4); + let header_str = std::str::from_utf8(&forward_request_bytes[..header_end]) + .into_diagnostic() + .map_err(|_| miette::miette!("Forward HTTP headers contain invalid UTF-8"))?; + let body_length = crate::l7::rest::parse_body_length(header_str)?; + let forward_request_for_token_grant = crate::l7::provider::L7Request { + action: method.to_string(), + target: upstream_target.to_string(), + query_params: std::collections::HashMap::new(), + raw_header: forward_request_bytes, + body_length, + }; + + crate::l7::token_grant_injection::inject_if_needed(forward_request_for_token_grant, l7_ctx) + .await + .map(|req| req.raw_header) +} + /// Handle a plain HTTP forward proxy request (non-CONNECT). /// /// Public IPs are allowed through when the endpoint passes OPA evaluation. @@ -2929,6 +2974,13 @@ async fn handle_forward_proxy( policy_local_ctx: Option>, trusted_host_gateway: Arc>, secret_resolver: Option>, + dynamic_credentials: Option< + Arc< + std::sync::RwLock< + std::collections::HashMap, + >, + >, + >, denial_tx: Option<&mpsc::UnboundedSender>, activity_tx: Option<&ActivitySender>, ) -> Result<()> { @@ -3158,6 +3210,10 @@ async fn handle_forward_proxy( .collect(), secret_resolver: secret_resolver.clone(), activity_tx: activity_tx.cloned(), + dynamic_credentials: dynamic_credentials.clone(), + token_grant_resolver: dynamic_credentials + .as_ref() + .map(|_| crate::l7::token_grant_injection::default_resolver()), }; let mut l7_activity_pending = false; @@ -3828,6 +3884,36 @@ async fn handle_forward_proxy( } emit_forward_success_activity(activity_tx, l7_activity_pending); + forward_request_bytes = match inject_token_grant_for_forward_request( + method, + &upstream_target, + forward_request_bytes, + &l7_ctx, + ) + .await + { + Ok(bytes) => bytes, + Err(e) => { + warn!( + dst_host = %host_lc, + dst_port = port, + error = %e, + "token grant failed in forward proxy" + ); + respond( + client, + &build_json_error_response( + 502, + "Bad Gateway", + "token_grant_failed", + "dynamic token grant failed", + ), + ) + .await?; + return Ok(()); + } + }; + // 9. Rewrite request and forward to upstream let rewritten = match rewrite_forward_request( &forward_request_bytes, @@ -4174,6 +4260,53 @@ mod tests { .map_err(|e| miette::miette!("upstream task failed: {e}")) } + fn forward_token_grant_context( + resolver_response: std::result::Result<&str, &str>, + ) -> ( + crate::l7::relay::L7EvalContext, + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture, + ) { + let provider_key = "api.example.test\t8080\t/v1/**\tprovider:access_token"; + let fixture = match resolver_response { + Ok(token) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::success( + provider_key, + token, + ) + } + Err(error) => { + crate::l7::token_grant_injection::test_support::TokenGrantTestFixture::failure( + provider_key, + error, + ) + } + }; + let ctx = crate::l7::relay::L7EvalContext { + host: "api.example.test".into(), + port: 8080, + policy_name: "rest_api".into(), + binary_path: "/usr/bin/curl".into(), + ancestors: vec![], + cmdline_paths: vec![], + secret_resolver: None, + activity_tx: None, + dynamic_credentials: Some(fixture.dynamic_credentials()), + token_grant_resolver: Some(fixture.resolver()), + }; + + (ctx, fixture) + } + + fn authorization_header_count(headers: &str) -> usize { + headers + .lines() + .filter(|line| { + line.split_once(':') + .is_some_and(|(name, _)| name.eq_ignore_ascii_case("authorization")) + }) + .count() + } + fn forward_websocket_policy_parts( data: &str, host: &str, @@ -4215,6 +4348,8 @@ mod tests { cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; (config, tunnel_engine, ctx) } @@ -4381,6 +4516,8 @@ mod tests { cmdline_paths: vec![], secret_resolver: resolver, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let query_params = std::collections::HashMap::new(); @@ -4422,6 +4559,8 @@ mod tests { cmdline_paths: vec![], secret_resolver: None, activity_tx: None, + dynamic_credentials: None, + token_grant_resolver: None, }; let query_params = std::collections::HashMap::new(); let config = websocket_l7_config(crate::l7::L7Protocol::Rest, false); @@ -6491,6 +6630,40 @@ network_policies: // --- rewrite_forward_request tests --- + #[tokio::test] + async fn forward_proxy_injects_token_grant_before_rewriting_request() { + let (ctx, fixture) = forward_token_grant_context(Ok("grant-token")); + let raw = b"GET http://api.example.test:8080/v1/projects HTTP/1.1\r\nHost: api.example.test:8080\r\nAuthorization: Bearer stale-token\r\nConnection: close\r\n\r\n".to_vec(); + + let with_token = inject_token_grant_for_forward_request("GET", "/v1/projects", raw, &ctx) + .await + .expect("forward token grant should inject"); + let rewritten = + rewrite_forward_request(&with_token, with_token.len(), "/v1/projects", None, false) + .expect("forward request should rewrite"); + let rewritten = String::from_utf8_lossy(&rewritten); + + assert!(rewritten.starts_with("GET /v1/projects HTTP/1.1\r\n")); + assert!(rewritten.contains("Authorization: Bearer grant-token\r\n")); + assert!(!rewritten.contains("stale-token")); + assert_eq!(authorization_header_count(&rewritten), 1); + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + + #[tokio::test] + async fn forward_proxy_token_grant_failure_returns_error_before_rewrite() { + let (ctx, fixture) = forward_token_grant_context(Err("oauth unavailable")); + let raw = b"GET http://api.example.test:8080/v1/projects HTTP/1.1\r\nHost: api.example.test:8080\r\nConnection: close\r\n\r\n".to_vec(); + + let err = inject_token_grant_for_forward_request("GET", "/v1/projects", raw, &ctx) + .await + .expect_err("forward token grant failure should stop request rewriting"); + + assert!(err.to_string().contains("Token grant failed")); + assert!(err.to_string().contains("oauth unavailable")); + fixture.assert_one_request("api.example.test\t8080\t/v1/**\tprovider:access_token"); + } + #[test] fn test_rewrite_get_request() { let raw = diff --git a/crates/openshell-sandbox/src/spiffe_endpoint.rs b/crates/openshell-sandbox/src/spiffe_endpoint.rs new file mode 100644 index 000000000..449462627 --- /dev/null +++ b/crates/openshell-sandbox/src/spiffe_endpoint.rs @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::path::Path; + +/// Convert a path to a SPIFFE Workload API endpoint URL. +/// +/// If the path already has a scheme (`unix:` or `tcp:`), use it as-is. +/// Otherwise, assume it is a Unix socket path and prepend `unix:`. +pub fn workload_api_endpoint(path: &Path) -> String { + let path = path.to_string_lossy(); + if path.starts_with("unix:") || path.starts_with("tcp:") { + path.into_owned() + } else { + format!("unix:{path}") + } +} diff --git a/crates/openshell-sandbox/src/ssh.rs b/crates/openshell-sandbox/src/ssh.rs index 9db2bf97d..db8c1fee5 100644 --- a/crates/openshell-sandbox/src/ssh.rs +++ b/crates/openshell-sandbox/src/ssh.rs @@ -5,7 +5,7 @@ use crate::child_env; use crate::policy::SandboxPolicy; -use crate::process::drop_privileges; +use crate::process::{drop_privileges, is_supervisor_only_env_var}; use crate::provider_credentials::ProviderCredentialState; use crate::sandbox; #[cfg(target_os = "linux")] @@ -720,6 +720,9 @@ fn apply_child_env( } for (key, value) in provider_env { + if is_supervisor_only_env_var(key) { + continue; + } cmd.env(key, value); } } @@ -812,7 +815,7 @@ fn spawn_pty_shell( netns_fd, #[cfg(target_os = "linux")] prepared_sandbox, - ); + )?; } let mut child = cmd.spawn()?; @@ -960,7 +963,7 @@ fn spawn_pipe_exec( netns_fd, #[cfg(target_os = "linux")] prepared_sandbox, - ); + )?; } let mut child = cmd.spawn()?; @@ -1083,6 +1086,13 @@ mod unsafe_pty { } #[allow(unsafe_code)] + #[cfg_attr( + not(target_os = "linux"), + allow( + clippy::unnecessary_wraps, + reason = "Linux pre_exec setup can fail while non-Linux setup cannot." + ) + )] pub fn install_pre_exec( cmd: &mut Command, policy: SandboxPolicy, @@ -1090,11 +1100,16 @@ mod unsafe_pty { slave_fd: RawFd, netns_fd: Option, #[cfg(target_os = "linux")] prepared: crate::sandbox::linux::PreparedSandbox, - ) { + ) -> anyhow::Result<()> { // Wrap in Option so we can .take() it out of the FnMut closure. // pre_exec is only called once (after fork, before exec). #[cfg(target_os = "linux")] let mut prepared = Some(prepared); + #[cfg(target_os = "linux")] + let supervisor_identity_mount = crate::process::supervisor_identity_mount_from_env() + .map_err(|err| { + anyhow::anyhow!("failed to prepare supervisor identity isolation: {err}") + })?; unsafe { cmd.pre_exec(move || { setsid().map_err(|err| std::io::Error::other(err.to_string()))?; @@ -1104,40 +1119,61 @@ mod unsafe_pty { netns_fd, &policy, #[cfg(target_os = "linux")] + supervisor_identity_mount, + #[cfg(target_os = "linux")] prepared.take(), ) }); } + Ok(()) } /// Pre-exec hook for pipe-based (non-PTY) exec. /// /// Skips `setsid` and `TIOCSCTTY` since there is no controlling terminal. #[allow(unsafe_code)] + #[cfg_attr( + not(target_os = "linux"), + allow( + clippy::unnecessary_wraps, + reason = "Linux pre_exec setup can fail while non-Linux setup cannot." + ) + )] pub fn install_pre_exec_no_pty( cmd: &mut Command, policy: SandboxPolicy, _workdir: Option, netns_fd: Option, #[cfg(target_os = "linux")] prepared: crate::sandbox::linux::PreparedSandbox, - ) { + ) -> anyhow::Result<()> { #[cfg(target_os = "linux")] let mut prepared = Some(prepared); + #[cfg(target_os = "linux")] + let supervisor_identity_mount = crate::process::supervisor_identity_mount_from_env() + .map_err(|err| { + anyhow::anyhow!("failed to prepare supervisor identity isolation: {err}") + })?; unsafe { cmd.pre_exec(move || { enter_netns_and_sandbox( netns_fd, &policy, #[cfg(target_os = "linux")] + supervisor_identity_mount, + #[cfg(target_os = "linux")] prepared.take(), ) }); } + Ok(()) } fn enter_netns_and_sandbox( netns_fd: Option, policy: &SandboxPolicy, + #[cfg(target_os = "linux")] supervisor_identity_mount: Option< + &crate::process::SupervisorIdentityMountNamespace, + >, #[cfg(target_os = "linux")] prepared: Option, ) -> std::io::Result<()> { // Enter network namespace before dropping privileges. @@ -1156,6 +1192,11 @@ mod unsafe_pty { #[cfg(not(target_os = "linux"))] let _ = netns_fd; + #[cfg(target_os = "linux")] + if let Some(mount) = supervisor_identity_mount { + mount.enter_for_child()?; + } + // Drop privileges. initgroups/setgid/setuid need /etc/group and // /etc/passwd which would be blocked if Landlock were already enforced. drop_privileges(policy).map_err(|err| std::io::Error::other(err.to_string()))?; @@ -1541,7 +1582,8 @@ mod tests { None, ) .expect("prepare should succeed in test environment"), - ); + ) + .expect("install pre_exec should succeed"); let output = cmd .spawn() diff --git a/crates/openshell-sandbox/src/token_grant.rs b/crates/openshell-sandbox/src/token_grant.rs new file mode 100644 index 000000000..03e9bfb39 --- /dev/null +++ b/crates/openshell-sandbox/src/token_grant.rs @@ -0,0 +1,1424 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! `OAuth2` JWT client assertion token grant using SPIFFE JWT-SVID. +//! +//! When a provider profile includes a `token_grant` configuration, the +//! supervisor obtains `OAuth2` access tokens on-demand by authenticating to the +//! token service using the sandbox's SPIFFE JWT-SVID as the client assertion. +//! +//! ## Flow +//! +//! 1. HTTP proxy intercepts outbound request to provider endpoint +//! 2. Check token cache for unexpired access token +//! 3. On cache miss or expiry: +//! a. Fetch JWT-SVID from SPIRE agent (via Workload API) +//! b. POST to token service with JWT client assertion grant +//! c. Cache the returned access token with TTL +//! 4. Inject `Authorization: Bearer ` header +//! +//! ## Configuration +//! +//! Token grant parameters come from the provider profile `token_grant` field: +//! - `token_endpoint` — `OAuth2` token service URL +//! - `jwt_svid_audience` — SPIRE JWT-SVID audience override (optional) +//! - `client_assertion_type` — `OAuth2` client assertion type (optional) +//! - `audience` — Resource audience to request from the token service +//! - `scopes` — `OAuth2` scopes to request (optional) +//! - `cache_ttl_seconds` — Cache override (0 = use `expires_in` from response) +//! +//! ## Environment +//! +//! Requires `OPENSHELL_PROVIDER_SPIFFE_WORKLOAD_API_SOCKET` to be set (path to +//! the SPIFFE Workload API socket). + +use std::collections::HashMap; +use std::future::Future; +use std::net::IpAddr; +use std::sync::{Arc, LazyLock, RwLock}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use miette::{IntoDiagnostic, Result, WrapErr}; +use openshell_core::sandbox_env; +use serde::Deserialize; +use spiffe::WorkloadApiClient; + +/// Token cache shared across all provider token grants. +static TOKEN_CACHE: LazyLock = LazyLock::new(TokenCache::new); +static TOKEN_GRANT_HTTP_CLIENT: LazyLock = LazyLock::new(|| { + reqwest::Client::builder() + .timeout(Duration::from_secs(30)) + .connect_timeout(Duration::from_secs(30)) + .no_proxy() + .redirect(reqwest::redirect::Policy::none()) + .build() + .expect("token grant HTTP client configuration should be valid") +}); +const MAX_OAUTH_ERROR_FIELD_LEN: usize = 256; +const DEFAULT_TOKEN_CACHE_TTL_SECONDS: i64 = 300; +const TOKEN_CACHE_EXPIRY_SKEW_SECONDS: i64 = 30; +const MAX_TOKEN_EXPIRES_IN_SECONDS: i64 = 3600; +const DEFAULT_CLIENT_ASSERTION_TYPE: &str = + "urn:ietf:params:oauth:client-assertion-type:jwt-bearer"; + +/// `OAuth2` token response from the authorization server. +#[derive(Debug, Clone, Deserialize)] +struct TokenResponse { + access_token: String, + #[serde(default)] + #[allow(dead_code)] + token_type: String, + #[serde(default)] + expires_in: i64, + #[serde(default)] + #[allow(dead_code)] + scope: String, +} + +#[derive(Debug, Deserialize)] +struct OAuthErrorResponse { + error: Option, + error_description: Option, +} + +/// Cached access token with expiration metadata. +#[derive(Debug, Clone)] +struct CachedToken { + access_token: String, + expires_at_ms: i64, +} + +/// Thread-safe token cache keyed by provider name. +struct TokenCache { + tokens: Arc>>, +} + +impl TokenCache { + fn new() -> Self { + Self { + tokens: Arc::new(RwLock::new(HashMap::new())), + } + } + + /// Get a cached token if it exists and is not expired. + fn get(&self, provider_name: &str) -> Option { + let now_ms = current_time_ms(); + let tokens = self.tokens.read().ok()?; + let cached = tokens.get(provider_name)?; + if cached.expires_at_ms > now_ms { + Some(cached.access_token.clone()) + } else { + None + } + } + + /// Store a token with expiration time. + fn set(&self, provider_name: String, access_token: String, expires_at_ms: i64) { + if let Ok(mut tokens) = self.tokens.write() { + tokens.insert( + provider_name, + CachedToken { + access_token, + expires_at_ms, + }, + ); + } + } +} + +/// Obtain an `OAuth2` access token for a provider using JWT client assertion grant. +/// +/// This function fetches the sandbox's SPIFFE JWT-SVID from the local SPIRE +/// agent, then exchanges it for an access token with a POST request to the provider's +/// token endpoint with the JWT client assertion grant flow (RFC 7523). +/// +/// Tokens are cached per provider name with TTL. Subsequent calls return the +/// cached token if it has not expired. +/// +/// # Arguments +/// +/// * `provider_name` — Unique provider identifier (used as cache key) +/// * `token_endpoint` — `OAuth2` token service URL +/// * `jwt_svid_audience` — Optional audience to request when fetching the JWT-SVID +/// * `client_assertion_type` — Optional `OAuth2` client assertion type +/// * `audience` — Resource audience to request in the token request +/// * `scopes` — `OAuth2` scopes to request (may be empty) +/// * `cache_ttl_override` — Cache TTL in seconds (0 = use `expires_in` from response) +/// +/// # Errors +/// +/// Returns an error if: +/// - SPIFFE Workload API socket is not configured +/// - SPIRE agent is unreachable +/// - JWT-SVID fetch fails +/// - Token service request fails +/// - Token response is invalid +pub async fn obtain_provider_token( + provider_name: &str, + token_endpoint: &str, + jwt_svid_audience: &str, + client_assertion_type: &str, + audience: &str, + scopes: &[String], + cache_ttl_override: i64, +) -> Result { + obtain_provider_token_with_grant( + ObtainProviderTokenInput { + cache: &TOKEN_CACHE, + provider_name, + token_endpoint, + jwt_svid_audience, + client_assertion_type, + audience, + scopes, + cache_ttl_override, + }, + |jwt_audience| async move { + // Fetch JWT-SVID with authorization server as audience + // For RFC 7523, the JWT assertion's aud claim identifies the issuer/realm + let jwt_svid = fetch_jwt_svid_for_token_grant(&jwt_audience).await?; + + // Perform OAuth2 JWT client assertion grant + // The audience parameter in the token request specifies the resource server + perform_token_grant( + token_endpoint, + &jwt_svid, + client_assertion_type, + audience, + scopes, + ) + .await + }, + ) + .await +} + +struct ObtainProviderTokenInput<'a> { + cache: &'a TokenCache, + provider_name: &'a str, + token_endpoint: &'a str, + jwt_svid_audience: &'a str, + client_assertion_type: &'a str, + audience: &'a str, + scopes: &'a [String], + cache_ttl_override: i64, +} + +async fn obtain_provider_token_with_grant( + input: ObtainProviderTokenInput<'_>, + grant: F, +) -> Result +where + F: FnOnce(String) -> Fut, + Fut: Future>, +{ + // Derive authorization server audience from token endpoint + // For Keycloak: https://auth.example.com/realms/openshell/protocol/openid-connect/token + // -> https://auth.example.com/realms/openshell + let jwt_audience = effective_jwt_svid_audience(input.token_endpoint, input.jwt_svid_audience); + let cache_key = token_cache_key( + input.provider_name, + input.token_endpoint, + &jwt_audience, + effective_client_assertion_type(input.client_assertion_type), + input.audience, + input.scopes, + ); + + // Check cache first + if let Some(cached) = input.cache.get(&cache_key) { + return Ok(cached); + } + + let token_response = grant(jwt_audience).await?; + validate_access_token(&token_response.access_token)?; + + let cache_ttl_seconds = + token_cache_ttl_seconds(input.cache_ttl_override, token_response.expires_in); + let expires_at_ms = current_time_ms().saturating_add(cache_ttl_seconds.saturating_mul(1000)); + + // Cache the token + input.cache.set( + cache_key, + token_response.access_token.clone(), + expires_at_ms, + ); + + Ok(token_response.access_token) +} + +/// Fetch JWT-SVID from SPIRE agent for token grant authentication. +/// +/// This function connects to the local SPIRE agent via the Workload API and +/// requests a JWT-SVID with the specified audience. The JWT-SVID is used as +/// the client assertion in the `OAuth2` grant request. +async fn fetch_jwt_svid_for_token_grant(audience: &str) -> Result { + let socket_path = provider_spiffe_workload_api_socket_from_env()?; + + let endpoint = + crate::spiffe_endpoint::workload_api_endpoint(std::path::Path::new(&socket_path)); + + // Connect to SPIRE agent + let client = WorkloadApiClient::connect_to(&endpoint) + .await + .into_diagnostic() + .wrap_err_with(|| { + format!("failed to connect to SPIFFE Workload API endpoint {endpoint}") + })?; + + // Fetch JWT-SVID with token service audience + // None = use the sandbox's default SPIFFE ID + client + .fetch_jwt_token([audience], None) + .await + .into_diagnostic() + .wrap_err("failed to fetch JWT-SVID for token grant") +} + +fn provider_spiffe_workload_api_socket_from_env() -> Result { + std::env::var(sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET) + .ok() + .filter(|value| !value.trim().is_empty()) + .ok_or_else(|| { + miette::miette!( + "{} not set — SPIFFE authentication unavailable for token grant", + sandbox_env::PROVIDER_SPIFFE_WORKLOAD_API_SOCKET + ) + }) +} + +/// Perform `OAuth2` JWT client assertion grant. +/// +/// POSTs to the token endpoint with: +/// - `grant_type=client_credentials` +/// - `client_assertion_type=` +/// - `client_assertion=` (client identity is in the JWT's `sub` claim) +/// - `audience=` (if provided) +/// - `scope=` (if provided) +/// +/// Note: `client_id` is NOT included - the client is identified by the `sub` claim +/// in the JWT-SVID itself. +async fn perform_token_grant( + token_endpoint: &str, + jwt_svid: &str, + client_assertion_type: &str, + audience: &str, + scopes: &[String], +) -> Result { + let token_endpoint_url = parse_token_endpoint_url(token_endpoint)?; + let client_assertion_type = effective_client_assertion_type(client_assertion_type); + let mut form_params = vec![ + ("grant_type", "client_credentials"), + ("client_assertion_type", client_assertion_type), + ("client_assertion", jwt_svid), + ]; + + // Add audience if provided + let audience_param; + if !audience.is_empty() { + audience_param = audience.to_string(); + form_params.push(("audience", &audience_param)); + } + + // Add scopes if provided + let scope_param; + if !scopes.is_empty() { + scope_param = scopes.join(" "); + form_params.push(("scope", &scope_param)); + } + + // POST to token endpoint + let response = TOKEN_GRANT_HTTP_CLIENT + .post(token_endpoint_url) + .form(&form_params) + .send() + .await + .into_diagnostic() + .wrap_err_with(|| format!("failed to POST to token endpoint {token_endpoint}"))?; + + // Check response status + if !response.status().is_success() { + let status = response.status(); + let body = response + .text() + .await + .unwrap_or_else(|_| "".to_string()); + return Err(miette::miette!( + "{}", + token_grant_failure_message(status, &body) + )); + } + + // Parse token response + let token_response = response + .json::() + .await + .into_diagnostic() + .wrap_err("failed to parse token response as JSON")?; + validate_access_token(&token_response.access_token)?; + Ok(token_response) +} + +fn parse_token_endpoint_url(token_endpoint: &str) -> Result { + let url = reqwest::Url::parse(token_endpoint) + .into_diagnostic() + .wrap_err("token_endpoint must be an absolute URL")?; + if token_endpoint_transport_allowed(&url) { + return Ok(url); + } + + Err(miette::miette!( + "token_endpoint must use https, except http for loopback or in-cluster service hosts" + )) +} + +fn token_endpoint_transport_allowed(url: &reqwest::Url) -> bool { + match url.scheme() { + "https" => true, + "http" => url + .host_str() + .is_some_and(|host| is_loopback_host(host) || is_kubernetes_service_host(host)), + _ => false, + } +} + +fn is_loopback_host(host: &str) -> bool { + let host = host.trim_matches(['[', ']']); + if host.eq_ignore_ascii_case("localhost") { + return true; + } + + match host.parse::() { + Ok(IpAddr::V4(v4)) => v4.is_loopback(), + Ok(IpAddr::V6(v6)) => { + v6.is_loopback() || v6.to_ipv4_mapped().is_some_and(|v4| v4.is_loopback()) + } + Err(_) => false, + } +} + +fn is_kubernetes_service_host(host: &str) -> bool { + let host = host.trim_end_matches('.').to_ascii_lowercase(); + let labels = host.split('.').collect::>(); + let is_service_name = labels.len() == 3 && labels[2] == "svc"; + let is_cluster_local_service = + labels.len() == 5 && labels[2] == "svc" && labels[3] == "cluster" && labels[4] == "local"; + (is_service_name || is_cluster_local_service) && labels.iter().all(|label| !label.is_empty()) +} + +pub fn validate_access_token(token: &str) -> Result<()> { + if token.is_empty() || !is_token68(token) { + return Err(miette::miette!( + "token grant returned a malformed access token" + )); + } + Ok(()) +} + +fn is_token68(token: &str) -> bool { + let mut padding_started = false; + let mut saw_value = false; + for byte in token.bytes() { + if byte == b'=' { + padding_started = true; + continue; + } + if padding_started || !is_token68_value_byte(byte) { + return false; + } + saw_value = true; + } + saw_value +} + +fn is_token68_value_byte(byte: u8) -> bool { + byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'.' | b'_' | b'~' | b'+' | b'/') +} + +fn token_cache_ttl_seconds(cache_ttl_override: i64, expires_in: i64) -> i64 { + if cache_ttl_override > 0 { + return cache_ttl_override; + } + + let ttl = if expires_in > 0 { + expires_in.min(MAX_TOKEN_EXPIRES_IN_SECONDS) + } else { + DEFAULT_TOKEN_CACHE_TTL_SECONDS + }; + + ttl.saturating_sub(TOKEN_CACHE_EXPIRY_SKEW_SECONDS).max(1) +} + +/// Derive the issuer/realm URL from a token endpoint URL. +/// +/// For Keycloak token endpoints like: +/// `https://auth.example.com/realms/openshell/protocol/openid-connect/token` +/// Returns: +/// `https://auth.example.com/realms/openshell` +/// +/// This is used as the JWT-SVID audience claim when authenticating to the +/// authorization server via JWT client assertion (RFC 7523). +fn derive_issuer_from_token_endpoint(token_endpoint: &str) -> String { + // For Keycloak, strip everything after /realms/{realm-name} + if let Some(realms_idx) = token_endpoint.find("/realms/") { + // Find the next path segment after the realm name + let after_realms = &token_endpoint[realms_idx + "/realms/".len()..]; + if let Some(slash_idx) = after_realms.find('/') { + // Return everything up to (but not including) the next slash + let realm_end = realms_idx + "/realms/".len() + slash_idx; + return token_endpoint[..realm_end].to_string(); + } + } + + // Fallback: if we can't parse it, use the full token endpoint + // This works for some OAuth2 servers that accept the token endpoint as aud + token_endpoint.to_string() +} + +fn effective_jwt_svid_audience(token_endpoint: &str, jwt_svid_audience: &str) -> String { + if jwt_svid_audience.is_empty() { + derive_issuer_from_token_endpoint(token_endpoint) + } else { + jwt_svid_audience.to_string() + } +} + +fn effective_client_assertion_type(client_assertion_type: &str) -> &str { + if client_assertion_type.trim().is_empty() { + DEFAULT_CLIENT_ASSERTION_TYPE + } else { + client_assertion_type + } +} + +fn token_cache_key( + provider_name: &str, + token_endpoint: &str, + jwt_svid_audience: &str, + client_assertion_type: &str, + audience: &str, + scopes: &[String], +) -> String { + format!( + "{}\t{}\t{}\t{}\t{}\t{}", + provider_name, + token_endpoint, + jwt_svid_audience, + client_assertion_type, + audience, + scopes.join(" ") + ) +} + +fn token_grant_failure_message(status: reqwest::StatusCode, body: &str) -> String { + let Ok(error_response) = serde_json::from_str::(body) else { + return format!("token grant failed with status {status}"); + }; + + let error = error_response + .error + .as_deref() + .map(sanitize_oauth_error_field) + .filter(|value| !value.is_empty()); + let description = error_response + .error_description + .as_deref() + .map(sanitize_oauth_error_field) + .filter(|value| !value.is_empty()); + + match (error, description) { + (Some(error), Some(description)) => { + format!( + "token grant failed with status {status}: error={error}; error_description={description}" + ) + } + (Some(error), None) => { + format!("token grant failed with status {status}: error={error}") + } + (None, Some(description)) => { + format!("token grant failed with status {status}: error_description={description}") + } + (None, None) => format!("token grant failed with status {status}"), + } +} + +fn sanitize_oauth_error_field(value: &str) -> String { + value + .chars() + .map(|ch| if ch.is_control() { ' ' } else { ch }) + .take(MAX_OAUTH_ERROR_FIELD_LEN) + .collect::() + .trim() + .to_string() +} + +/// Get current Unix timestamp in milliseconds. +fn current_time_ms() -> i64 { + let millis = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or(Duration::from_secs(0)) + .as_millis(); + i64::try_from(millis).unwrap_or(i64::MAX) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + use std::sync::{ + Arc, + atomic::{AtomicUsize, Ordering}, + }; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + use tokio::net::TcpListener; + + #[derive(Debug)] + struct CapturedTokenRequest { + request_line: String, + headers: HashMap, + form: HashMap, + } + + async fn token_endpoint_once( + status: &str, + body: &str, + ) -> (String, tokio::task::JoinHandle) { + let listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("bind token endpoint"); + let addr = listener.local_addr().expect("token endpoint local addr"); + let status = status.to_string(); + let body = body.to_string(); + let handle = tokio::spawn(async move { + let (mut stream, _) = listener.accept().await.expect("accept token request"); + let mut buf = Vec::new(); + let mut chunk = [0u8; 512]; + let mut expected_len = None; + + loop { + let n = stream.read(&mut chunk).await.expect("read token request"); + assert!(n > 0, "token request stream closed before headers"); + buf.extend_from_slice(&chunk[..n]); + + if expected_len.is_none() + && let Some(header_end) = header_end(&buf) + { + let headers = String::from_utf8_lossy(&buf[..header_end]); + let content_length = headers + .lines() + .find_map(|line| { + let (name, value) = line.split_once(':')?; + name.eq_ignore_ascii_case("content-length") + .then(|| value.trim().parse::().ok()) + .flatten() + }) + .unwrap_or(0); + expected_len = Some(header_end + content_length); + } + + if expected_len.is_some_and(|len| buf.len() >= len) { + break; + } + } + + let captured = parse_token_request(&buf); + let response = format!( + "HTTP/1.1 {status}\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{body}", + body.len(), + ); + stream + .write_all(response.as_bytes()) + .await + .expect("write token response"); + captured + }); + + (format!("http://{addr}/token"), handle) + } + + async fn token_endpoint_redirect_once( + location: &str, + ) -> (String, tokio::task::JoinHandle) { + let listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("bind token endpoint"); + let addr = listener.local_addr().expect("token endpoint local addr"); + let location = location.to_string(); + let handle = tokio::spawn(async move { + let (mut stream, _) = listener.accept().await.expect("accept token request"); + let mut buf = Vec::new(); + let mut chunk = [0u8; 512]; + let mut expected_len = None; + + loop { + let n = stream.read(&mut chunk).await.expect("read token request"); + assert!(n > 0, "token request stream closed before headers"); + buf.extend_from_slice(&chunk[..n]); + + if expected_len.is_none() + && let Some(header_end) = header_end(&buf) + { + let headers = String::from_utf8_lossy(&buf[..header_end]); + let content_length = headers + .lines() + .find_map(|line| { + let (name, value) = line.split_once(':')?; + name.eq_ignore_ascii_case("content-length") + .then(|| value.trim().parse::().ok()) + .flatten() + }) + .unwrap_or(0); + expected_len = Some(header_end + content_length); + } + + if expected_len.is_some_and(|len| buf.len() >= len) { + break; + } + } + + let captured = parse_token_request(&buf); + let response = format!( + "HTTP/1.1 302 Found\r\nLocation: {location}\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", + ); + stream + .write_all(response.as_bytes()) + .await + .expect("write token response"); + captured + }); + + (format!("http://{addr}/token"), handle) + } + + fn header_end(buf: &[u8]) -> Option { + buf.windows(4) + .position(|w| w == b"\r\n\r\n") + .map(|idx| idx + 4) + } + + fn parse_token_request(buf: &[u8]) -> CapturedTokenRequest { + let header_end = header_end(buf).expect("request should contain header terminator"); + let headers = String::from_utf8_lossy(&buf[..header_end]); + let mut lines = headers.lines(); + let request_line = lines.next().expect("request line").to_string(); + let headers = lines + .filter_map(|line| { + let (name, value) = line.split_once(':')?; + Some((name.to_ascii_lowercase(), value.trim().to_string())) + }) + .collect(); + let body = String::from_utf8_lossy(&buf[header_end..]).to_string(); + + CapturedTokenRequest { + request_line, + headers, + form: parse_form_body(&body), + } + } + + fn parse_form_body(body: &str) -> HashMap { + body.split('&') + .filter(|part| !part.is_empty()) + .filter_map(|part| { + let (name, value) = part.split_once('=')?; + Some((decode_form_component(name), decode_form_component(value))) + }) + .collect() + } + + fn decode_form_component(value: &str) -> String { + let bytes = value.as_bytes(); + let mut decoded = Vec::with_capacity(bytes.len()); + let mut idx = 0; + while idx < bytes.len() { + match bytes[idx] { + b'+' => { + decoded.push(b' '); + idx += 1; + } + b'%' if idx + 2 < bytes.len() => { + let hex = &value[idx + 1..idx + 3]; + if let Ok(byte) = u8::from_str_radix(hex, 16) { + decoded.push(byte); + idx += 3; + } else { + decoded.push(bytes[idx]); + idx += 1; + } + } + byte => { + decoded.push(byte); + idx += 1; + } + } + } + String::from_utf8(decoded).expect("form body should be UTF-8") + } + + struct CountedTokenGrantInput<'a> { + cache: &'a TokenCache, + provider_name: &'a str, + token_endpoint: &'a str, + jwt_svid_audience: &'a str, + audience: &'a str, + scopes: &'a [String], + cache_ttl_override: i64, + expires_in: i64, + grant_calls: Arc, + } + + async fn obtain_counted_test_token(input: CountedTokenGrantInput<'_>) -> Result { + obtain_provider_token_with_grant( + ObtainProviderTokenInput { + cache: input.cache, + provider_name: input.provider_name, + token_endpoint: input.token_endpoint, + jwt_svid_audience: input.jwt_svid_audience, + client_assertion_type: DEFAULT_CLIENT_ASSERTION_TYPE, + audience: input.audience, + scopes: input.scopes, + cache_ttl_override: input.cache_ttl_override, + }, + move |_| { + let grant_calls = input.grant_calls.clone(); + async move { + let call = grant_calls.fetch_add(1, Ordering::SeqCst) + 1; + Ok(TokenResponse { + access_token: format!("token-{call}"), + token_type: "Bearer".to_string(), + expires_in: input.expires_in, + scope: input.scopes.join(" "), + }) + } + }, + ) + .await + } + + async fn obtain_token_without_grant_call( + cache: &TokenCache, + provider_name: &str, + token_endpoint: &str, + jwt_svid_audience: &str, + audience: &str, + scopes: &[String], + cache_ttl_override: i64, + ) -> Result { + obtain_provider_token_with_grant( + ObtainProviderTokenInput { + cache, + provider_name, + token_endpoint, + jwt_svid_audience, + client_assertion_type: DEFAULT_CLIENT_ASSERTION_TYPE, + audience, + scopes, + cache_ttl_override, + }, + |_| async { Err(miette::miette!("grant should not be called on cache hit")) }, + ) + .await + } + + #[test] + fn test_derive_issuer_from_keycloak_token_endpoint() { + let token_endpoint = + "https://auth.example.com/realms/openshell/protocol/openid-connect/token"; + let issuer = derive_issuer_from_token_endpoint(token_endpoint); + assert_eq!(issuer, "https://auth.example.com/realms/openshell"); + } + + #[test] + fn test_derive_issuer_from_https_keycloak_endpoint() { + let token_endpoint = + "https://auth.example.com/realms/production/protocol/openid-connect/token"; + let issuer = derive_issuer_from_token_endpoint(token_endpoint); + assert_eq!(issuer, "https://auth.example.com/realms/production"); + } + + #[test] + fn test_derive_issuer_fallback_for_non_keycloak() { + let token_endpoint = "https://oauth.example.com/token"; + let issuer = derive_issuer_from_token_endpoint(token_endpoint); + // Fallback: returns the full token endpoint + assert_eq!(issuer, "https://oauth.example.com/token"); + } + + #[test] + fn effective_jwt_svid_audience_prefers_explicit_override() { + let audience = effective_jwt_svid_audience( + "https://auth.example.com/realms/openshell/protocol/openid-connect/token", + "spiffe://custom-audience", + ); + + assert_eq!(audience, "spiffe://custom-audience"); + } + + #[test] + fn validate_access_token_accepts_token68_values() { + for token in [ + "abcXYZ123-._~+/", + "eyJhbGciOiJSUzI1NiJ9.payload.sig", + "token==", + ] { + validate_access_token(token).expect("token68 bearer token should be accepted"); + } + } + + #[test] + fn validate_access_token_rejects_header_injection_and_non_token68_values() { + for token in [ + "", + "token with spaces", + "token\r\nX-Injected: yes", + "token\u{7f}", + "tokené", + "token=continued", + "==", + ] { + let err = validate_access_token(token) + .expect_err("malformed bearer token should be rejected"); + assert_eq!( + err.to_string(), + "token grant returned a malformed access token" + ); + } + } + + #[test] + fn token_endpoint_url_allows_https_loopback_and_in_cluster_http() { + for endpoint in [ + "https://auth.example.com/token", + "http://127.0.0.1:8080/token", + "http://[::1]:8080/token", + "http://token-issuer.default.svc.cluster.local/token", + "http://token-issuer.default.svc/token", + ] { + parse_token_endpoint_url(endpoint).expect("token endpoint should be allowed"); + } + } + + #[test] + fn token_endpoint_url_rejects_plain_http_non_cluster_hosts() { + for endpoint in [ + "http://auth.example.com/token", + "http://keycloak/realms/openshell/protocol/openid-connect/token", + "http://token-issuer.default.svc.evil.com/token", + "ftp://auth.example.com/token", + "/relative/token", + ] { + assert!( + parse_token_endpoint_url(endpoint).is_err(), + "token endpoint should be rejected: {endpoint}" + ); + } + } + + #[test] + fn token_cache_key_varies_by_resource_audience_and_scopes() { + let base = token_cache_key( + "alpha.default.svc.cluster.local\t80\t\tprovider:access_token", + "https://auth.example.com/realms/openshell/protocol/openid-connect/token", + "https://auth.example.com/realms/openshell", + DEFAULT_CLIENT_ASSERTION_TYPE, + "alpha", + &["alpha".to_string()], + ); + let different_audience = token_cache_key( + "alpha.default.svc.cluster.local\t80\t\tprovider:access_token", + "https://auth.example.com/realms/openshell/protocol/openid-connect/token", + "https://auth.example.com/realms/openshell", + DEFAULT_CLIENT_ASSERTION_TYPE, + "delta", + &["alpha".to_string()], + ); + let different_scopes = token_cache_key( + "alpha.default.svc.cluster.local\t80\t\tprovider:access_token", + "https://auth.example.com/realms/openshell/protocol/openid-connect/token", + "https://auth.example.com/realms/openshell", + DEFAULT_CLIENT_ASSERTION_TYPE, + "alpha", + &["delta".to_string()], + ); + let different_assertion_type = token_cache_key( + "alpha.default.svc.cluster.local\t80\t\tprovider:access_token", + "https://auth.example.com/realms/openshell/protocol/openid-connect/token", + "https://auth.example.com/realms/openshell", + "urn:ietf:params:oauth:client-assertion-type:jwt-spiffe", + "alpha", + &["alpha".to_string()], + ); + + assert_ne!(base, different_audience); + assert_ne!(base, different_scopes); + assert_ne!(base, different_assertion_type); + } + + #[test] + fn token_cache_ttl_uses_override_without_endpoint_skew() { + assert_eq!(token_cache_ttl_seconds(120, 10), 120); + assert_eq!(token_cache_ttl_seconds(120, i64::MAX), 120); + } + + #[test] + fn token_cache_ttl_skews_default_and_response_expires_in() { + assert_eq!( + token_cache_ttl_seconds(0, 0), + DEFAULT_TOKEN_CACHE_TTL_SECONDS - TOKEN_CACHE_EXPIRY_SKEW_SECONDS + ); + assert_eq!(token_cache_ttl_seconds(0, 60), 30); + assert_eq!(token_cache_ttl_seconds(0, 10), 1); + } + + #[test] + fn token_cache_ttl_clamps_large_response_expires_in() { + assert_eq!( + token_cache_ttl_seconds(0, i64::MAX), + MAX_TOKEN_EXPIRES_IN_SECONDS - TOKEN_CACHE_EXPIRY_SKEW_SECONDS + ); + } + + #[tokio::test] + async fn obtain_provider_token_uses_cache_for_same_key() { + let cache = TokenCache::new(); + let grant_calls = Arc::new(AtomicUsize::new(0)); + let scopes = vec!["read".to_string()]; + + let first = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource", + scopes: &scopes, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("first call should grant token"); + let second = obtain_token_without_grant_call( + &cache, + "api.example.test\t443\t/v1/**\tprovider:access_token", + "https://auth.example.com/token", + "https://auth.example.com", + "api://resource", + &scopes, + 0, + ) + .await + .expect("second call should use cache"); + + assert_eq!(first, "token-1"); + assert_eq!(second, "token-1"); + assert_eq!(grant_calls.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn obtain_provider_token_separates_cache_by_audience_and_scopes() { + let cache = TokenCache::new(); + let grant_calls = Arc::new(AtomicUsize::new(0)); + let read_scope = vec!["read".to_string()]; + let write_scope = vec!["write".to_string()]; + + let first = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource-one", + scopes: &read_scope, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("first audience should grant token"); + let different_audience = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource-two", + scopes: &read_scope, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("different audience should grant token"); + let different_scope = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource-one", + scopes: &write_scope, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("different scope should grant token"); + + assert_eq!(first, "token-1"); + assert_eq!(different_audience, "token-2"); + assert_eq!(different_scope, "token-3"); + assert_eq!(grant_calls.load(Ordering::SeqCst), 3); + } + + #[tokio::test] + async fn obtain_provider_token_regrants_after_expired_cache_entry() { + let cache = TokenCache::new(); + let grant_calls = Arc::new(AtomicUsize::new(0)); + let scopes = vec!["read".to_string()]; + let provider_name = "api.example.test\t443\t/v1/**\tprovider:access_token"; + let token_endpoint = "https://auth.example.com/token"; + let jwt_svid_audience = "https://auth.example.com"; + let audience = "api://resource"; + + let cache_key = token_cache_key( + provider_name, + token_endpoint, + jwt_svid_audience, + DEFAULT_CLIENT_ASSERTION_TYPE, + audience, + &scopes, + ); + cache.set( + cache_key, + "expired-token".to_string(), + current_time_ms() - 1, + ); + + let token = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name, + token_endpoint, + jwt_svid_audience, + audience, + scopes: &scopes, + cache_ttl_override: 0, + expires_in: 60, + grant_calls: grant_calls.clone(), + }) + .await + .expect("expired cache entry should grant token again"); + + assert_eq!(token, "token-1"); + assert_eq!(grant_calls.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn obtain_provider_token_rejects_malformed_token_before_cache() { + let cache = TokenCache::new(); + let scopes = vec!["read".to_string()]; + let provider_name = "api.example.test\t443\t/v1/**\tprovider:access_token"; + let token_endpoint = "https://auth.example.com/token"; + let jwt_svid_audience = "https://auth.example.com"; + let audience = "api://resource"; + + let err = obtain_provider_token_with_grant( + ObtainProviderTokenInput { + cache: &cache, + provider_name, + token_endpoint, + jwt_svid_audience, + client_assertion_type: DEFAULT_CLIENT_ASSERTION_TYPE, + audience, + scopes: &scopes, + cache_ttl_override: 0, + }, + |_| async { + Ok(TokenResponse { + access_token: "access-123\r\nX-Injected: yes".to_string(), + token_type: "Bearer".to_string(), + expires_in: 60, + scope: "read".to_string(), + }) + }, + ) + .await + .expect_err("malformed access token should fail before caching"); + + let cache_key = token_cache_key( + provider_name, + token_endpoint, + jwt_svid_audience, + DEFAULT_CLIENT_ASSERTION_TYPE, + audience, + &scopes, + ); + + assert_eq!( + err.to_string(), + "token grant returned a malformed access token" + ); + assert!( + cache.get(&cache_key).is_none(), + "malformed access token must not be cached" + ); + } + + #[tokio::test] + async fn obtain_provider_token_cache_ttl_override_extends_zero_expires_in() { + let cache = TokenCache::new(); + let grant_calls = Arc::new(AtomicUsize::new(0)); + let scopes = vec!["read".to_string()]; + + let first = obtain_counted_test_token(CountedTokenGrantInput { + cache: &cache, + provider_name: "api.example.test\t443\t/v1/**\tprovider:access_token", + token_endpoint: "https://auth.example.com/token", + jwt_svid_audience: "https://auth.example.com", + audience: "api://resource", + scopes: &scopes, + cache_ttl_override: 60, + expires_in: 0, + grant_calls: grant_calls.clone(), + }) + .await + .expect("first override call should grant token"); + let second = obtain_token_without_grant_call( + &cache, + "api.example.test\t443\t/v1/**\tprovider:access_token", + "https://auth.example.com/token", + "https://auth.example.com", + "api://resource", + &scopes, + 60, + ) + .await + .expect("override should keep token cached"); + + assert_eq!(first, "token-1"); + assert_eq!(second, "token-1"); + assert_eq!(grant_calls.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn perform_token_grant_posts_jwt_assertion_and_parses_success_response() { + let (endpoint, request) = token_endpoint_once( + "200 OK", + r#"{"access_token":"access-123","token_type":"Bearer","expires_in":42,"scope":"read write"}"#, + ) + .await; + let scopes = vec!["read".to_string(), "write".to_string()]; + + let response = + perform_token_grant(&endpoint, "jwt-svid-token", "", "api://resource", &scopes) + .await + .expect("token grant should succeed"); + let request = request.await.expect("token endpoint task should finish"); + + assert_eq!(response.access_token, "access-123"); + assert_eq!(response.expires_in, 42); + assert_eq!(request.request_line, "POST /token HTTP/1.1"); + assert_eq!( + request.headers.get("content-type").map(String::as_str), + Some("application/x-www-form-urlencoded") + ); + assert_eq!( + request.form.get("grant_type").map(String::as_str), + Some("client_credentials") + ); + assert_eq!( + request + .form + .get("client_assertion_type") + .map(String::as_str), + Some(DEFAULT_CLIENT_ASSERTION_TYPE) + ); + assert_eq!( + request.form.get("client_assertion").map(String::as_str), + Some("jwt-svid-token") + ); + assert_eq!( + request.form.get("audience").map(String::as_str), + Some("api://resource") + ); + assert_eq!( + request.form.get("scope").map(String::as_str), + Some("read write") + ); + assert!( + !request.form.contains_key("client_id"), + "JWT-SVID subject should identify the client" + ); + } + + #[tokio::test] + async fn perform_token_grant_uses_configured_client_assertion_type() { + let (endpoint, request) = + token_endpoint_once("200 OK", r#"{"access_token":"access-123"}"#).await; + + let response = perform_token_grant( + &endpoint, + "jwt-svid-token", + "urn:ietf:params:oauth:client-assertion-type:jwt-spiffe", + "", + &[], + ) + .await + .expect("token grant should succeed"); + let request = request.await.expect("token endpoint task should finish"); + + assert_eq!(response.access_token, "access-123"); + assert_eq!( + request + .form + .get("client_assertion_type") + .map(String::as_str), + Some("urn:ietf:params:oauth:client-assertion-type:jwt-spiffe") + ); + } + + #[tokio::test] + async fn perform_token_grant_rejects_malformed_access_token() { + let (endpoint, request) = token_endpoint_once( + "200 OK", + r#"{"access_token":"access-123\r\nX-Injected: yes"}"#, + ) + .await; + + let err = perform_token_grant(&endpoint, "jwt-svid-token", "", "", &[]) + .await + .expect_err("malformed access token should fail closed"); + let _request = request.await.expect("token endpoint task should finish"); + + assert_eq!( + err.to_string(), + "token grant returned a malformed access token" + ); + } + + #[tokio::test] + async fn perform_token_grant_does_not_follow_redirects() { + let (endpoint, request) = token_endpoint_redirect_once("http://127.0.0.1:1/stolen").await; + + let err = perform_token_grant(&endpoint, "jwt-svid-token", "", "", &[]) + .await + .expect_err("redirect response should fail closed"); + let _request = request.await.expect("token endpoint task should finish"); + + assert_eq!(err.to_string(), "token grant failed with status 302 Found"); + } + + #[tokio::test] + async fn perform_token_grant_omits_empty_audience_and_scope() { + let (endpoint, request) = + token_endpoint_once("200 OK", r#"{"access_token":"access-123"}"#).await; + + let response = perform_token_grant(&endpoint, "jwt-svid-token", "", "", &[]) + .await + .expect("token grant should succeed without audience or scopes"); + let request = request.await.expect("token endpoint task should finish"); + + assert_eq!(response.access_token, "access-123"); + assert_eq!( + request.form.get("client_assertion").map(String::as_str), + Some("jwt-svid-token") + ); + assert!(!request.form.contains_key("audience")); + assert!(!request.form.contains_key("scope")); + } + + #[tokio::test] + async fn perform_token_grant_reports_sanitized_oauth_error_response() { + let (endpoint, request) = token_endpoint_once( + "401 Unauthorized", + r#"{"error":"invalid_client","error_description":"bad jwt assertion"}"#, + ) + .await; + + let err = perform_token_grant(&endpoint, "jwt-svid-token", "", "api://resource", &[]) + .await + .expect_err("token grant should fail on OAuth error"); + let request = request.await.expect("token endpoint task should finish"); + + assert_eq!( + request.form.get("audience").map(String::as_str), + Some("api://resource") + ); + assert_eq!( + err.to_string(), + "token grant failed with status 401 Unauthorized: error=invalid_client; error_description=bad jwt assertion" + ); + } + + #[tokio::test] + async fn perform_token_grant_does_not_echo_unstructured_error_body() { + let (endpoint, request) = token_endpoint_once( + "500 Internal Server Error", + "internal stack trace with implementation details", + ) + .await; + + let err = perform_token_grant(&endpoint, "jwt-svid-token", "", "", &[]) + .await + .expect_err("token grant should fail on server error"); + let _request = request.await.expect("token endpoint task should finish"); + let message = err.to_string(); + + assert_eq!( + message, + "token grant failed with status 500 Internal Server Error" + ); + assert!(!message.contains("stack trace")); + assert!(!message.contains("implementation details")); + } + + #[tokio::test] + async fn perform_token_grant_reports_malformed_success_json() { + let (endpoint, request) = token_endpoint_once("200 OK", r#"{"access_token":42"#).await; + + let err = perform_token_grant(&endpoint, "jwt-svid-token", "", "", &[]) + .await + .expect_err("malformed token response should fail"); + let _request = request.await.expect("token endpoint task should finish"); + + assert!( + err.to_string() + .contains("failed to parse token response as JSON") + ); + } + + #[test] + fn token_grant_failure_message_reports_oauth_error_fields() { + let message = token_grant_failure_message( + reqwest::StatusCode::UNAUTHORIZED, + r#"{"error":"invalid_client","error_description":"Invalid client credentials"}"#, + ); + + assert_eq!( + message, + "token grant failed with status 401 Unauthorized: error=invalid_client; error_description=Invalid client credentials" + ); + } + + #[test] + fn token_grant_failure_message_omits_unstructured_response_body() { + let message = token_grant_failure_message( + reqwest::StatusCode::INTERNAL_SERVER_ERROR, + "internal error containing implementation details", + ); + + assert_eq!( + message, + "token grant failed with status 500 Internal Server Error" + ); + } + + #[test] + fn token_grant_failure_message_sanitizes_oauth_error_fields() { + let long_description = "a".repeat(MAX_OAUTH_ERROR_FIELD_LEN + 20); + let body = + format!(r#"{{"error":"invalid_client\n","error_description":"{long_description}"}}"#); + let message = token_grant_failure_message(reqwest::StatusCode::UNAUTHORIZED, &body); + + assert!(!message.contains('\n')); + assert!(message.contains("error=invalid_client")); + assert!(message.contains(&"a".repeat(MAX_OAUTH_ERROR_FIELD_LEN))); + assert!(!message.contains(&"a".repeat(MAX_OAUTH_ERROR_FIELD_LEN + 1))); + } +} diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index 3fc746c6d..cb19cb5f6 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -35,7 +35,7 @@ k8s-openapi = { workspace = true } tokio = { workspace = true } # gRPC -tonic = { workspace = true, features = ["channel", "tls"] } +tonic = { workspace = true, features = ["channel", "tls-native-roots"] } prost = { workspace = true } prost-types = { workspace = true } @@ -86,6 +86,7 @@ sha2 = { workspace = true } jsonwebtoken = { workspace = true } async-trait = "0.1" url = { workspace = true } +glob = { workspace = true } hex = "0.4" russh = "0.57" rand = { workspace = true } diff --git a/crates/openshell-server/src/auth/principal.rs b/crates/openshell-server/src/auth/principal.rs index a95eb831b..1d4cb7276 100644 --- a/crates/openshell-server/src/auth/principal.rs +++ b/crates/openshell-server/src/auth/principal.rs @@ -6,7 +6,7 @@ //! A `Principal` is the result of running the [`super::authenticator::Authenticator`] //! chain on an inbound request. It generalizes over the kinds of callers the //! gateway recognizes — human users (OIDC), sandbox supervisors (gateway-minted -//! JWT, future SPIFFE), and anonymous callers (truly unauthenticated methods +//! JWT), and anonymous callers (truly unauthenticated methods //! like health probes). //! //! Handlers read the principal from the gRPC `Request` extensions and gate @@ -52,8 +52,8 @@ pub struct SandboxPrincipal { /// How this principal was verified — used for audit logs and method-specific /// authorization checks. pub source: SandboxIdentitySource, - /// SPIFFE trust domain. Populated when the credential is SPIFFE-shaped; - /// reserved for future per-sandbox cert / SPIRE authenticators. + /// Optional namespace component parsed from sandbox identity credentials. + /// Gateway-minted sandbox JWTs currently use an identity-shaped subject. pub trust_domain: Option, } @@ -70,8 +70,6 @@ pub enum SandboxIdentitySource { /// Per-sandbox client certificate. Reserved for channel-bound sandbox /// identity. BootstrapCert { fingerprint: String }, - /// SPIRE-issued SVID. Reserved for SPIFFE/SPIRE sandbox identity. - SpiffeSvid { spiffe_id: String }, /// K8s `ServiceAccount` token used to bootstrap a gateway-minted JWT /// via `IssueSandboxToken`. Populated only on that one RPC path. K8sServiceAccount { pod_name: String, pod_uid: String }, diff --git a/crates/openshell-server/src/grpc/policy.rs b/crates/openshell-server/src/grpc/policy.rs index 380671f10..844859877 100644 --- a/crates/openshell-server/src/grpc/policy.rs +++ b/crates/openshell-server/src/grpc/policy.rs @@ -1256,6 +1256,7 @@ pub(super) async fn compute_provider_env_revision( Status::internal(format!("decode provider '{provider_name}' failed: {e}")) })?; hasher.update(provider.r#type.as_bytes()); + hash_provider_profile_revision(store, &provider.r#type, &mut hasher).await?; let mut credential_keys: Vec<_> = provider.credentials.keys().collect(); credential_keys.sort(); @@ -1281,6 +1282,41 @@ pub(super) async fn compute_provider_env_revision( )?)) } +async fn hash_provider_profile_revision( + store: &Store, + provider_type: &str, + hasher: &mut Sha256, +) -> Result<(), Status> { + if let Some(profile) = get_default_profile(provider_type) { + hasher.update(b"builtin-profile"); + hasher.update(profile.to_proto().encode_to_vec()); + return Ok(()); + } + + hasher.update(b"custom-profile"); + match store + .get_by_name( + openshell_core::proto::StoredProviderProfile::object_type(), + provider_type, + ) + .await + .map_err(|e| { + Status::internal(format!( + "fetch provider profile '{provider_type}' failed: {e}" + )) + })? { + Some(record) => { + hasher.update(record.id.as_bytes()); + hasher.update(record.updated_at_ms.to_le_bytes()); + hasher.update(record.payload.as_slice()); + } + None => { + hasher.update(b"missing"); + } + } + Ok(()) +} + async fn profile_provider_policy_layers( store: &Store, provider_names: &[String], @@ -1389,6 +1425,7 @@ pub(super) async fn handle_get_sandbox_provider_environment( environment: provider_environment.environment, provider_env_revision, credential_expires_at_ms: provider_environment.credential_expires_at_ms, + dynamic_credentials: provider_environment.dynamic_credentials, })) } @@ -4883,6 +4920,88 @@ mod tests { ); } + #[tokio::test] + async fn provider_env_revision_changes_when_custom_profile_token_grant_changes() { + use openshell_core::proto::{ + ProviderCredentialTokenGrant, ProviderProfile, ProviderProfileCategory, + ProviderProfileCredential, StoredProviderProfile, + }; + use std::time::Duration; + + fn token_grant_profile(token_endpoint: &str) -> StoredProviderProfile { + StoredProviderProfile { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "profile-custom-token".to_string(), + name: "custom-token".to_string(), + created_at_ms: 1_000_000, + labels: HashMap::new(), + resource_version: 0, + }), + profile: Some(ProviderProfile { + id: "custom-token".to_string(), + display_name: "Custom Token".to_string(), + description: String::new(), + category: ProviderProfileCategory::Other as i32, + credentials: vec![ProviderProfileCredential { + name: "access_token".to_string(), + auth_style: "bearer".to_string(), + header_name: "authorization".to_string(), + token_grant: Some(ProviderCredentialTokenGrant { + token_endpoint: token_endpoint.to_string(), + audience: "api://default".to_string(), + ..Default::default() + }), + ..Default::default() + }], + endpoints: vec![NetworkEndpoint { + host: "api.custom.example".to_string(), + port: 443, + ..Default::default() + }], + binaries: Vec::new(), + inference_capable: false, + discovery: None, + }), + } + } + + let state = test_server_state().await; + state + .store + .put_message(&test_provider("work-custom-token", "custom-token")) + .await + .unwrap(); + state + .store + .put_message(&token_grant_profile("https://auth.example.com/token")) + .await + .unwrap(); + + let first = + compute_provider_env_revision(state.store.as_ref(), &["work-custom-token".to_string()]) + .await + .unwrap(); + + tokio::time::sleep(Duration::from_millis(2)).await; + state + .store + .put_message(&token_grant_profile( + "https://auth.example.com/rotated-token", + )) + .await + .unwrap(); + + let second = + compute_provider_env_revision(state.store.as_ref(), &["work-custom-token".to_string()]) + .await + .unwrap(); + + assert_ne!( + first, second, + "custom provider profile updates must trigger sandbox dynamic credential refresh" + ); + } + #[tokio::test] async fn sandbox_config_and_provider_env_follow_attached_provider_lifecycle() { use crate::grpc::sandbox::{ diff --git a/crates/openshell-server/src/grpc/provider.rs b/crates/openshell-server/src/grpc/provider.rs index 4552fceae..641118206 100644 --- a/crates/openshell-server/src/grpc/provider.rs +++ b/crates/openshell-server/src/grpc/provider.rs @@ -8,7 +8,10 @@ use crate::persistence::{ ObjectId, ObjectLabels, ObjectName, ObjectType, Store, WriteCondition, generate_name, }; -use openshell_core::proto::{Provider, Sandbox}; +use openshell_core::proto::{ + Provider, ProviderCredentialTokenGrantAudienceOverride, ProviderProfile, + ProviderProfileCredential, Sandbox, +}; use openshell_core::telemetry::{ LifecycleOperation, ProviderProfile as TelemetryProviderProfile, TelemetryOutcome, }; @@ -36,10 +39,11 @@ fn redact_provider_credentials(mut provider: Provider) -> Provider { provider } -#[derive(Debug, Clone, Default, PartialEq, Eq)] +#[derive(Debug, Clone, Default, PartialEq)] pub(super) struct ProviderEnvironment { pub environment: std::collections::HashMap, pub credential_expires_at_ms: std::collections::HashMap, + pub dynamic_credentials: std::collections::HashMap, } impl ProviderEnvironment { @@ -94,7 +98,7 @@ pub(super) async fn create_provider_record( return Err(Status::invalid_argument("provider.type is required")); } if provider.credentials.is_empty() - && !provider_type_allows_empty_credentials_for_refresh(store, &provider.r#type).await? + && !provider_type_allows_empty_credentials(store, &provider.r#type).await? { return Err(Status::invalid_argument( "provider.credentials must not be empty", @@ -535,9 +539,325 @@ pub(super) async fn resolve_provider_environment( Ok(ProviderEnvironment { environment: env, credential_expires_at_ms: expires, + dynamic_credentials: resolve_dynamic_credentials(store, provider_names).await?, }) } +/// Resolve dynamic credentials (token grants) from provider profiles. +/// +/// Returns a map of endpoint-bound keys to credential metadata for credentials +/// that have `token_grant` configuration. Keys are internal supervisor metadata: +/// host, port, endpoint path, and provider credential identity. +pub(super) async fn resolve_dynamic_credentials( + store: &Store, + provider_names: &[String], +) -> Result, Status> { + if provider_names.is_empty() { + return Ok(std::collections::HashMap::new()); + } + + let mut dynamic_creds = std::collections::HashMap::new(); + + for provider_name in provider_names { + let provider = store + .get_message_by_name::(provider_name) + .await + .map_err(|e| { + Status::internal(format!("failed to fetch provider '{provider_name}': {e}")) + })? + .ok_or_else(|| { + Status::failed_precondition(format!("provider '{provider_name}' not found")) + })?; + + let profile_id = + normalize_provider_type(&provider.r#type).unwrap_or(provider.r#type.as_str()); + let Some(profile) = get_provider_type_profile(store, profile_id).await? else { + continue; + }; + + insert_dynamic_credentials_for_profile( + &mut dynamic_creds, + &profile.to_proto(), + provider_name, + ); + } + + Ok(dynamic_creds) +} + +fn insert_dynamic_credentials_for_profile( + dynamic_creds: &mut std::collections::HashMap, + profile: &ProviderProfile, + provider_name: &str, +) { + for credential in &profile.credentials { + if credential.token_grant.is_none() { + continue; + } + for endpoint in &profile.endpoints { + for port in endpoint_ports(endpoint.port, &endpoint.ports) { + insert_dynamic_credentials_for_endpoint( + dynamic_creds, + &endpoint.host, + port, + &endpoint.path, + provider_name, + &credential.name, + credential, + ); + } + } + } +} + +fn endpoint_ports(port: u32, ports: &[u32]) -> Vec { + if ports.is_empty() { + if port == 0 { Vec::new() } else { vec![port] } + } else { + ports.iter().copied().filter(|port| *port != 0).collect() + } +} + +fn dynamic_credential_key( + host: &str, + port: u32, + path: &str, + provider_name: &str, + credential_name: &str, +) -> String { + format!( + "{}\t{port}\t{}\t{}:{}", + host.to_ascii_lowercase(), + path, + provider_name, + credential_name + ) +} + +fn insert_dynamic_credentials_for_endpoint( + dynamic_creds: &mut std::collections::HashMap, + endpoint_host: &str, + endpoint_port: u32, + endpoint_path: &str, + provider_name: &str, + credential_name: &str, + credential: &ProviderProfileCredential, +) { + let default_key = dynamic_credential_key( + endpoint_host, + endpoint_port, + endpoint_path, + provider_name, + credential_name, + ); + dynamic_creds.insert(default_key, resolved_dynamic_credential(credential, None)); + + let Some(token_grant) = credential.token_grant.as_ref() else { + return; + }; + + for override_config in &token_grant.audience_overrides { + if !token_grant_override_matches_endpoint(override_config, endpoint_host, endpoint_port) { + continue; + } + + let override_host = if override_config.host.is_empty() { + endpoint_host + } else { + override_config.host.as_str() + }; + let override_port = if override_config.port == 0 { + endpoint_port + } else { + override_config.port + }; + let override_path = if override_config.path.is_empty() { + endpoint_path + } else { + override_config.path.as_str() + }; + let override_key = dynamic_credential_key( + override_host, + override_port, + override_path, + provider_name, + credential_name, + ); + dynamic_creds.insert( + override_key, + resolved_dynamic_credential(credential, Some(override_config)), + ); + } +} + +fn resolved_dynamic_credential( + credential: &ProviderProfileCredential, + override_config: Option<&ProviderCredentialTokenGrantAudienceOverride>, +) -> ProviderProfileCredential { + let mut credential = credential.clone(); + if let Some(token_grant) = credential.token_grant.as_mut() { + if let Some(override_config) = override_config { + if !override_config.audience.is_empty() { + token_grant.audience.clone_from(&override_config.audience); + } + if !override_config.scopes.is_empty() { + token_grant.scopes.clone_from(&override_config.scopes); + } + } + token_grant.audience_overrides.clear(); + } + credential +} + +fn token_grant_override_matches_endpoint( + override_config: &ProviderCredentialTokenGrantAudienceOverride, + endpoint_host: &str, + endpoint_port: u32, +) -> bool { + let host_matches = override_config.host.is_empty() + || host_pattern_matches(&override_config.host, endpoint_host) + || host_pattern_matches(endpoint_host, &override_config.host); + let port_matches = override_config.port == 0 || override_config.port == endpoint_port; + host_matches && port_matches +} + +fn host_pattern_matches(pattern: &str, host: &str) -> bool { + let pattern = pattern.to_ascii_lowercase(); + let host = host.to_ascii_lowercase(); + if pattern == host { + return true; + } + if !pattern.contains('*') { + return false; + } + + let pattern_labels: Vec<&str> = pattern.split('.').collect(); + let host_labels: Vec<&str> = host.split('.').collect(); + host_pattern_labels_match(&pattern_labels, &host_labels) +} + +fn host_pattern_labels_match(pattern: &[&str], host: &[&str]) -> bool { + match pattern.split_first() { + None => host.is_empty(), + Some((label, rest)) if *label == "**" => { + host_pattern_labels_match(rest, host) + || (!host.is_empty() && host_pattern_labels_match(pattern, &host[1..])) + } + Some((label, rest)) if *label == "*" => { + !host.is_empty() && host_pattern_labels_match(rest, &host[1..]) + } + Some((literal, rest)) => { + host.first().is_some_and(|label| label == literal) + && host_pattern_labels_match(rest, &host[1..]) + } + } +} + +fn dynamic_token_grant_match_score(host: &str, path: &str) -> u32 { + host_pattern_specificity(host) + endpoint_path_specificity(path) +} + +fn host_pattern_specificity(pattern: &str) -> u32 { + let wildcard_penalty = count_as_u32(pattern.matches('*').count()); + let label_count = count_as_u32(pattern.split('.').filter(|label| !label.is_empty()).count()); + let literal_chars = count_as_u32(pattern.chars().filter(|ch| *ch != '*').count()); + 100_000u32 + .saturating_sub(wildcard_penalty.saturating_mul(10_000)) + .saturating_add(label_count.saturating_mul(100)) + .saturating_add(literal_chars) +} + +fn endpoint_path_specificity(path: &str) -> u32 { + if path.is_empty() || path == "**" { + return 0; + } + 1_000_000u32.saturating_add(count_as_u32(path.chars().filter(|ch| *ch != '*').count())) +} + +fn count_as_u32(count: usize) -> u32 { + u32::try_from(count).unwrap_or(u32::MAX) +} + +fn host_patterns_can_overlap(first: &str, second: &str) -> bool { + let first = first.to_ascii_lowercase(); + let second = second.to_ascii_lowercase(); + if !first.contains('*') { + return host_pattern_matches(&second, &first); + } + if !second.contains('*') { + return host_pattern_matches(&first, &second); + } + let first_labels: Vec<&str> = first.split('.').collect(); + let second_labels: Vec<&str> = second.split('.').collect(); + host_pattern_labels_can_overlap(&first_labels, &second_labels) +} + +fn host_pattern_labels_can_overlap(first: &[&str], second: &[&str]) -> bool { + match (first.split_first(), second.split_first()) { + (None, None) => true, + (None, Some((label, rest))) if *label == "**" => { + host_pattern_labels_can_overlap(first, rest) + } + (Some((label, rest)), None) if *label == "**" => { + host_pattern_labels_can_overlap(rest, second) + } + (None, _) | (_, None) => false, + (Some((label, rest)), _) if *label == "**" => { + host_pattern_labels_can_overlap(rest, second) + || host_pattern_labels_can_overlap(first, &second[1..]) + } + (_, Some((label, rest))) if *label == "**" => { + host_pattern_labels_can_overlap(first, rest) + || host_pattern_labels_can_overlap(&first[1..], second) + } + (Some((first_label, first_rest)), Some((second_label, second_rest))) => { + (*first_label == "*" || *second_label == "*" || first_label == second_label) + && host_pattern_labels_can_overlap(first_rest, second_rest) + } + } +} + +fn path_patterns_can_overlap(first: &str, second: &str) -> bool { + if path_matches_all(first) || path_matches_all(second) { + return true; + } + if !first.contains('*') { + return endpoint_path_matches(second, first); + } + if !second.contains('*') { + return endpoint_path_matches(first, second); + } + match (path_prefix_pattern(first), path_prefix_pattern(second)) { + (Some(first_prefix), Some(second_prefix)) => { + first_prefix == second_prefix + || first_prefix.starts_with(&format!("{second_prefix}/")) + || second_prefix.starts_with(&format!("{first_prefix}/")) + } + _ => true, + } +} + +fn path_matches_all(path: &str) -> bool { + path.is_empty() || path == "**" || path == "/**" +} + +fn path_prefix_pattern(path: &str) -> Option<&str> { + path.strip_suffix("/**") +} + +fn endpoint_path_matches(pattern: &str, path: &str) -> bool { + if path_matches_all(pattern) { + return true; + } + if pattern == path { + return true; + } + if let Some(prefix) = path_prefix_pattern(pattern) { + return path == prefix || path.starts_with(&format!("{prefix}/")); + } + glob::Pattern::new(pattern).is_ok_and(|glob| glob.matches(path)) +} + pub async fn validate_provider_environment_keys_unique( store: &Store, provider_names: &[String], @@ -599,6 +919,7 @@ async fn validate_provider_environment_keys_unique_at( now_ms: i64, ) -> Result<(), Status> { let mut seen = std::collections::HashMap::::new(); + let mut dynamic_bindings = Vec::new(); for name in provider_names { let provider = match candidate_provider { Some(candidate) if candidate.object_name() == name.as_str() => candidate.clone(), @@ -622,6 +943,162 @@ async fn validate_provider_environment_keys_unique_at( seen.insert(key, provider_name.clone()); } } + dynamic_bindings.extend(dynamic_token_grant_bindings_for_provider(store, &provider).await?); + } + validate_dynamic_token_grant_bindings_unambiguous(&dynamic_bindings)?; + Ok(()) +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct DynamicTokenGrantBinding { + provider_name: String, + credential_name: String, + host: String, + port: u32, + path: String, + score: u32, +} + +async fn dynamic_token_grant_bindings_for_provider( + store: &Store, + provider: &Provider, +) -> Result, Status> { + let provider_name = provider.object_name().to_string(); + let profile_id = normalize_provider_type(&provider.r#type).unwrap_or(provider.r#type.as_str()); + let Some(profile) = get_provider_type_profile(store, profile_id).await? else { + return Ok(Vec::new()); + }; + Ok(dynamic_token_grant_bindings_for_profile( + &provider_name, + &profile.to_proto(), + )) +} + +fn dynamic_token_grant_bindings_for_profile( + provider_name: &str, + profile: &ProviderProfile, +) -> Vec { + let mut bindings = Vec::new(); + for credential in &profile.credentials { + if credential.token_grant.is_none() { + continue; + } + for endpoint in &profile.endpoints { + for port in endpoint_ports(endpoint.port, &endpoint.ports) { + push_dynamic_token_grant_bindings_for_endpoint( + &mut bindings, + provider_name, + credential, + &endpoint.host, + port, + &endpoint.path, + ); + } + } + } + bindings +} + +fn push_dynamic_token_grant_bindings_for_endpoint( + bindings: &mut Vec, + provider_name: &str, + credential: &ProviderProfileCredential, + endpoint_host: &str, + endpoint_port: u32, + endpoint_path: &str, +) { + push_dynamic_token_grant_binding( + bindings, + provider_name, + &credential.name, + endpoint_host, + endpoint_port, + endpoint_path, + ); + + let Some(token_grant) = credential.token_grant.as_ref() else { + return; + }; + + for override_config in &token_grant.audience_overrides { + if !token_grant_override_matches_endpoint(override_config, endpoint_host, endpoint_port) { + continue; + } + let override_host = if override_config.host.is_empty() { + endpoint_host + } else { + override_config.host.as_str() + }; + let override_port = if override_config.port == 0 { + endpoint_port + } else { + override_config.port + }; + let override_path = if override_config.path.is_empty() { + endpoint_path + } else { + override_config.path.as_str() + }; + push_dynamic_token_grant_binding( + bindings, + provider_name, + &credential.name, + override_host, + override_port, + override_path, + ); + } +} + +fn push_dynamic_token_grant_binding( + bindings: &mut Vec, + provider_name: &str, + credential_name: &str, + host: &str, + port: u32, + path: &str, +) { + let candidate = DynamicTokenGrantBinding { + provider_name: provider_name.to_string(), + credential_name: credential_name.to_string(), + host: host.to_ascii_lowercase(), + port, + path: path.to_string(), + score: dynamic_token_grant_match_score(host, path), + }; + if !bindings.iter().any(|binding| binding == &candidate) { + bindings.push(candidate); + } +} + +fn validate_dynamic_token_grant_bindings_unambiguous( + bindings: &[DynamicTokenGrantBinding], +) -> Result<(), Status> { + for (index, first) in bindings.iter().enumerate() { + for second in bindings.iter().skip(index + 1) { + if first.provider_name == second.provider_name + && first.credential_name == second.credential_name + { + continue; + } + if first.port == second.port + && first.score == second.score + && host_patterns_can_overlap(&first.host, &second.host) + && path_patterns_can_overlap(&first.path, &second.path) + { + return Err(Status::failed_precondition(format!( + "dynamic token grants for '{}:{}' and '{}:{}' are ambiguous for {}:{} path selectors '{}' and '{}'; make one host/path selector more specific or attach only one matching provider", + first.provider_name, + first.credential_name, + second.provider_name, + second.credential_name, + first.host, + first.port, + first.path, + second.path + ))); + } + } } Ok(()) } @@ -702,10 +1179,9 @@ use openshell_core::proto::{ GetProviderRequest, ImportProviderProfilesRequest, ImportProviderProfilesResponse, LintProviderProfilesRequest, LintProviderProfilesResponse, ListProviderProfilesRequest, ListProviderProfilesResponse, ListProvidersRequest, ListProvidersResponse, - ProviderCredentialRefreshStrategy, ProviderProfile, ProviderProfileDiagnostic, - ProviderProfileImportItem, ProviderProfileResponse, ProviderResponse, - RotateProviderCredentialRequest, RotateProviderCredentialResponse, StoredProviderProfile, - UpdateProviderRequest, + ProviderCredentialRefreshStrategy, ProviderProfileDiagnostic, ProviderProfileImportItem, + ProviderProfileResponse, ProviderResponse, RotateProviderCredentialRequest, + RotateProviderCredentialResponse, StoredProviderProfile, UpdateProviderRequest, }; use openshell_providers::{ CredentialRefreshProfile, ProfileValidationDiagnostic, ProviderTypeProfile, default_profiles, @@ -824,6 +1300,11 @@ pub(super) async fn handle_import_provider_profiles( add_empty_profile_set_diagnostic(&profiles, &mut diagnostics); diagnostics.extend(profile_conflict_diagnostics(state.store.as_ref(), &profiles).await?); diagnostics.extend(validate_profile_set(&profiles)); + if !has_errors(&diagnostics) { + diagnostics.extend( + profile_import_attached_sandbox_diagnostics(state.store.as_ref(), &profiles).await?, + ); + } if has_errors(&diagnostics) { return Ok(Response::new(ImportProviderProfilesResponse { @@ -978,14 +1459,14 @@ fn validate_refresh_material( Ok(()) } -async fn provider_type_allows_empty_credentials_for_refresh( +async fn provider_type_allows_empty_credentials( store: &Store, provider_type: &str, ) -> Result { let Some(profile) = get_provider_type_profile(store, provider_type).await? else { return Ok(false); }; - Ok(profile.allows_gateway_refresh_bootstrap()) + Ok(profile.allows_empty_provider_credentials()) } async fn merged_provider_profiles(store: &Store) -> Result, Status> { @@ -1094,6 +1575,83 @@ async fn profile_conflict_diagnostics( Ok(diagnostics) } +async fn profile_import_attached_sandbox_diagnostics( + store: &Store, + profiles: &[(String, ProviderTypeProfile)], +) -> Result, Status> { + let mut candidate_profiles = + std::collections::HashMap::::new(); + for (source, profile) in profiles { + let Some(id) = normalize_profile_id(&profile.id) else { + continue; + }; + candidate_profiles.insert(id, (source.clone(), profile.to_proto())); + } + if candidate_profiles.is_empty() { + return Ok(Vec::new()); + } + + let sandboxes = scan_sandboxes(store, |sandbox| { + sandbox + .spec + .as_ref() + .is_some_and(|spec| !spec.providers.is_empty()) + .then_some(sandbox) + }) + .await?; + let mut diagnostics = Vec::new(); + for sandbox in sandboxes { + let sandbox_name = sandbox.object_name().to_string(); + let spec = sandbox.spec.as_ref().expect("filtered by scan_sandboxes"); + let mut bindings = Vec::new(); + let mut imported_profiles_used = Vec::<(String, String)>::new(); + + for provider_name in &spec.providers { + let Some(provider) = store + .get_message_by_name::(provider_name) + .await + .map_err(|e| Status::internal(format!("fetch provider failed: {e}")))? + else { + continue; + }; + let profile_id = + normalize_provider_type(&provider.r#type).unwrap_or(provider.r#type.as_str()); + if let Some((source, profile)) = candidate_profiles.get(profile_id) { + bindings.extend(dynamic_token_grant_bindings_for_profile( + provider.object_name(), + profile, + )); + let used = (source.clone(), profile_id.to_string()); + if !imported_profiles_used.contains(&used) { + imported_profiles_used.push(used); + } + } else { + bindings.extend(dynamic_token_grant_bindings_for_provider(store, &provider).await?); + } + } + + if imported_profiles_used.is_empty() { + continue; + } + if let Err(err) = validate_dynamic_token_grant_bindings_unambiguous(&bindings) { + for (source, profile_id) in &imported_profiles_used { + diagnostics.push(ProfileValidationDiagnostic { + source: source.clone(), + profile_id: profile_id.clone(), + field: "credentials.token_grant.audience_overrides".to_string(), + message: format!( + "import would create ambiguous dynamic token grants on sandbox '{sandbox_name}': {}", + err.message() + ), + severity: "error".to_string(), + }); + } + } + } + + Ok(diagnostics) +} + fn stored_provider_profile(profile: ProviderProfile) -> StoredProviderProfile { use crate::persistence::current_time_ms; let now_ms = current_time_ms(); @@ -1614,6 +2172,7 @@ mod tests { DeleteProviderProfileRequest, GetProviderProfileRequest, ImportProviderProfilesRequest, L7Allow, L7Rule, LintProviderProfilesRequest, ListProviderProfilesRequest, NetworkBinary, NetworkEndpoint, ProviderCredentialRefresh, ProviderCredentialRefreshMaterial, + ProviderCredentialTokenGrant, ProviderCredentialTokenGrantAudienceOverride, ProviderProfile, ProviderProfileCategory, ProviderProfileCredential, ProviderProfileImportItem, Sandbox, SandboxSpec, }; @@ -1678,6 +2237,242 @@ mod tests { ); } + #[test] + fn dynamic_credentials_expand_endpoint_audience_overrides() { + let service_audiences = [ + ("alpha.default.svc.cluster.local", "alpha"), + ("beta.default.svc.cluster.local", "beta"), + ("gamma.default.svc.cluster.local", "gamma"), + ("delta.default.svc.cluster.local", "delta"), + ]; + let credential = ProviderProfileCredential { + name: "access_token".to_string(), + description: String::new(), + env_vars: Vec::new(), + required: false, + auth_style: "bearer".to_string(), + header_name: "Authorization".to_string(), + query_param: String::new(), + refresh: None, + path_template: String::new(), + token_grant: Some(ProviderCredentialTokenGrant { + token_endpoint: "http://keycloak.default.svc.cluster.local/realms/openshell/protocol/openid-connect/token".to_string(), + audience: "api://default".to_string(), + jwt_svid_audience: "http://keycloak.default.svc.cluster.local/realms/openshell" + .to_string(), + client_assertion_type: + "urn:ietf:params:oauth:client-assertion-type:jwt-bearer".to_string(), + scopes: vec!["openid".to_string()], + cache_ttl_seconds: 300, + audience_overrides: service_audiences + .iter() + .map( + |(host, audience)| ProviderCredentialTokenGrantAudienceOverride { + host: (*host).to_string(), + port: 80, + path: String::new(), + audience: (*audience).to_string(), + scopes: vec![(*audience).to_string()], + }, + ) + .collect(), + }), + }; + let profile = ProviderProfile { + id: "keycloak-sso".to_string(), + display_name: "Keycloak SSO".to_string(), + description: String::new(), + category: ProviderProfileCategory::Other as i32, + credentials: vec![credential], + endpoints: service_audiences + .iter() + .map(|(host, _)| NetworkEndpoint { + host: (*host).to_string(), + port: 80, + ..Default::default() + }) + .collect(), + binaries: Vec::new(), + inference_capable: false, + discovery: None, + }; + + let mut dynamic_creds = HashMap::new(); + insert_dynamic_credentials_for_profile(&mut dynamic_creds, &profile, "keycloak"); + + assert_eq!(dynamic_creds.len(), 4); + for (host, audience) in service_audiences { + let key = dynamic_credential_key(host, 80, "", "keycloak", "access_token"); + let grant = dynamic_creds[&key].token_grant.as_ref().unwrap(); + assert_eq!(grant.audience, audience); + assert_eq!(grant.scopes, vec![audience.to_string()]); + assert!(grant.audience_overrides.is_empty()); + } + } + + async fn import_token_grant_profile( + state: &Arc, + id: &str, + host: &str, + port: u32, + path: &str, + ) { + let mut profile = custom_profile(id); + profile.credentials = vec![token_grant_credential("access_token")]; + profile.endpoints = vec![NetworkEndpoint { + host: host.to_string(), + port, + path: path.to_string(), + protocol: "rest".to_string(), + ..Default::default() + }]; + handle_import_provider_profiles( + state, + Request::new(ImportProviderProfilesRequest { + profiles: vec![ProviderProfileImportItem { + profile: Some(profile), + source: format!("{id}.yaml"), + }], + }), + ) + .await + .unwrap(); + } + + async fn create_empty_token_grant_provider( + store: &Store, + name: &str, + provider_type: &str, + ) -> Provider { + create_provider_record( + store, + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: name.to_string(), + created_at_ms: 1_000_000, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: provider_type.to_string(), + credentials: HashMap::new(), + config: HashMap::new(), + credential_expires_at_ms: HashMap::new(), + }, + ) + .await + .unwrap() + } + + #[tokio::test] + async fn dynamic_token_grants_reject_equal_specificity_overlap() { + let state = test_server_state().await; + let store = state.store.as_ref(); + import_token_grant_profile(&state, "grant-a", "api.example.com", 443, "/v1/**").await; + import_token_grant_profile(&state, "grant-b", "api.example.com", 443, "/v1/**").await; + create_empty_token_grant_provider(store, "provider-a", "grant-a").await; + create_empty_token_grant_provider(store, "provider-b", "grant-b").await; + + let err = validate_provider_environment_keys_unique( + store, + &["provider-a".to_string(), "provider-b".to_string()], + ) + .await + .expect_err("equal-specificity dynamic grants should be ambiguous"); + + assert_eq!(err.code(), Code::FailedPrecondition); + assert!(err.message().contains("dynamic token grants")); + assert!(err.message().contains("ambiguous")); + } + + #[tokio::test] + async fn dynamic_token_grants_allow_more_specific_path_overlap() { + let state = test_server_state().await; + let store = state.store.as_ref(); + import_token_grant_profile(&state, "grant-default", "api.example.com", 443, "/v1/**").await; + import_token_grant_profile( + &state, + "grant-admin", + "api.example.com", + 443, + "/v1/admin/**", + ) + .await; + create_empty_token_grant_provider(store, "provider-default", "grant-default").await; + create_empty_token_grant_provider(store, "provider-admin", "grant-admin").await; + + validate_provider_environment_keys_unique( + store, + &["provider-default".to_string(), "provider-admin".to_string()], + ) + .await + .expect("more-specific path should make dynamic grants deterministic"); + } + + #[tokio::test] + async fn import_provider_profile_rejects_attached_dynamic_binding_ambiguity() { + let state = test_server_state().await; + let store = state.store.as_ref(); + import_token_grant_profile(&state, "grant-existing", "api.example.com", 443, "/v1/**") + .await; + create_empty_token_grant_provider(store, "provider-existing", "grant-existing").await; + create_provider_record( + store, + provider_with_values("provider-candidate", "grant-new"), + ) + .await + .unwrap(); + store + .put_message(&Sandbox { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "sandbox-import-ambiguity-id".to_string(), + name: "sandbox-import-ambiguity".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + spec: Some(SandboxSpec { + providers: vec![ + "provider-existing".to_string(), + "provider-candidate".to_string(), + ], + ..Default::default() + }), + ..Default::default() + }) + .await + .unwrap(); + + let mut profile = custom_profile("grant-new"); + profile.credentials = vec![token_grant_credential("access_token")]; + profile.endpoints = vec![NetworkEndpoint { + host: "api.example.com".to_string(), + port: 443, + path: "/v1/**".to_string(), + protocol: "rest".to_string(), + ..Default::default() + }]; + let response = handle_import_provider_profiles( + &state, + Request::new(ImportProviderProfilesRequest { + profiles: vec![ProviderProfileImportItem { + profile: Some(profile), + source: "grant-new.yaml".to_string(), + }], + }), + ) + .await + .unwrap() + .into_inner(); + + assert!(!response.imported); + assert!(response.diagnostics.iter().any(|diagnostic| { + diagnostic + .message + .contains("import would create ambiguous dynamic token grants") + })); + } + fn provider_with_values(name: &str, provider_type: &str) -> Provider { Provider { metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { @@ -1759,6 +2554,7 @@ mod tests { }, ], }), + token_grant: None, } } @@ -1796,6 +2592,31 @@ mod tests { query_param: String::new(), refresh: None, path_template: String::new(), + token_grant: None, + } + } + + fn token_grant_credential(name: &str) -> ProviderProfileCredential { + ProviderProfileCredential { + name: name.to_string(), + description: String::new(), + env_vars: Vec::new(), + required: true, + auth_style: "bearer".to_string(), + header_name: "authorization".to_string(), + query_param: String::new(), + refresh: None, + path_template: String::new(), + token_grant: Some(ProviderCredentialTokenGrant { + token_endpoint: "https://auth.example.com/token".to_string(), + audience: "api://default".to_string(), + jwt_svid_audience: "https://auth.example.com".to_string(), + client_assertion_type: "urn:ietf:params:oauth:client-assertion-type:jwt-bearer" + .to_string(), + scopes: vec!["read".to_string()], + cache_ttl_seconds: 300, + audience_overrides: Vec::new(), + }), } } @@ -3227,6 +4048,7 @@ mod tests { }, ], }), + token_grant: None, }], endpoints: vec![], binaries: vec![], @@ -3653,6 +4475,24 @@ mod tests { assert!(!result.contains_key("endpoint")); } + #[tokio::test] + async fn resolve_provider_env_allows_static_provider_without_profile() { + let store = test_store().await; + create_provider_record( + &store, + provider_with_values("static-provider", "unprofiled-static-api"), + ) + .await + .unwrap(); + + let result = resolve_provider_environment(&store, &["static-provider".to_string()]) + .await + .unwrap(); + + assert_eq!(result.get("API_TOKEN"), Some(&"token-123".to_string())); + assert!(result.dynamic_credentials.is_empty()); + } + #[tokio::test] async fn resolve_provider_env_skips_expired_credentials_and_returns_expiry_metadata() { let store = test_store().await; diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs index e94326f98..2abde7bc4 100644 --- a/crates/openshell-server/src/multiplex.rs +++ b/crates/openshell-server/src/multiplex.rs @@ -283,9 +283,9 @@ where /// for local single-user gateways, or to an unsafe local developer user when /// `auth.allow_unauthenticated_users` is explicitly enabled. /// -/// When neither OIDC nor gateway-minted JWTs are configured (a barebones +/// When neither OIDC nor sandbox credentials are configured (a barebones /// dev gateway), the chain is left as `None` so the router short-circuits -/// to pass-through. +/// to pass-through unless mTLS or local unauthenticated users are enabled. fn build_authenticator_chain(state: &ServerState) -> Option { let mut authenticators: Vec> = Vec::new(); if let Some(k8s) = state.k8s_sa_authenticator.clone() { @@ -368,19 +368,13 @@ fn unauthenticated_dev_user_principal() -> Principal { }) } -fn status_response(status: tonic::Status) -> Response { - let response = status.into_http(); - let (parts, body) = response.into_parts(); - let body = tonic::body::BoxBody::new(body); - Response::from_parts(parts, body) +fn status_response(status: tonic::Status) -> Response { + status.into_http() } impl tower::Service> for AuthGrpcRouter where - S: tower::Service, Response = Response> - + Clone - + Send - + 'static, + S: tower::Service, Response = Response> + Clone + Send + 'static, S::Future: Send, S::Error: Send + Into>, B: Send + 'static, @@ -951,7 +945,7 @@ mod tests { } impl Service> for PrincipalRecorder { - type Response = Response; + type Response = Response; type Error = std::convert::Infallible; type Future = Pin> + Send>>; @@ -962,14 +956,7 @@ mod tests { fn call(&mut self, req: Request) -> Self::Future { let principal = req.extensions().get::().cloned(); *self.recorded.lock().unwrap() = principal; - Box::pin(async move { - let body = tonic::body::BoxBody::new( - Full::new(Bytes::new()) - .map_err(|never| match never {}) - .boxed_unsync(), - ); - Ok(Response::new(body)) - }) + Box::pin(async move { Ok(Response::new(tonic::body::Body::empty())) }) } } diff --git a/crates/openshell-tui/Cargo.toml b/crates/openshell-tui/Cargo.toml index 71e3935f4..238166136 100644 --- a/crates/openshell-tui/Cargo.toml +++ b/crates/openshell-tui/Cargo.toml @@ -21,7 +21,7 @@ ratatui = { workspace = true } crossterm = { workspace = true } terminal-colorsaurus = { workspace = true } tokio = { workspace = true } -tonic = { workspace = true, features = ["tls"] } +tonic = { workspace = true, features = ["tls-native-roots"] } miette = { workspace = true } owo-colors = { workspace = true } serde = { workspace = true } diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index 343985f6f..62d7826f3 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -57,6 +57,8 @@ See [`values.yaml`](values.yaml) for source defaults. Selected overlays: - [`ci/values-cert-manager.yaml`](ci/values-cert-manager.yaml) - cert-manager integration - [`ci/values-keycloak.yaml`](ci/values-keycloak.yaml) - Keycloak OIDC integration - [`ci/values-high-availability.yaml`](ci/values-high-availability.yaml) - CI overlay for multi-replica external PostgreSQL testing +- [`ci/values-spire.yaml`](ci/values-spire.yaml) - SPIFFE/SPIRE provider token grants +- [`ci/values-spire-stack.yaml`](ci/values-spire-stack.yaml) - SPIRE hardened chart values for local development ### Database backend @@ -114,6 +116,17 @@ sandbox JWT signing Secret. This precedence applies even if external non-cert-manager TLS source manages TLS and you pre-create the sandbox JWT signing Secret. +## SPIFFE/SPIRE provider token grants + +Set `server.providerTokenGrants.spiffe.enabled=true` to let sandbox supervisors +use SPIFFE JWT-SVIDs for dynamic provider token grants. The chart keeps +supervisor-to-gateway authentication on gateway-minted sandbox JWTs and passes +the SPIFFE Workload API socket path to the Kubernetes driver so sandbox pods can +mount the SPIFFE CSI socket. + +For local development, uncomment the SPIRE Helm releases in `skaffold.yaml` and +add `ci/values-spire.yaml` to the OpenShell release values files. + ## Values | Key | Type | Default | Description | @@ -189,6 +202,8 @@ JWT signing Secret. | server.oidc.rolesClaim | string | `""` | Dot-separated path to the roles array in the JWT claims. Keycloak: "realm_access.roles", Entra ID: "roles", Okta: "groups". | | server.oidc.scopesClaim | string | `""` | Dot-separated path to the scopes array in the JWT claims. | | server.oidc.userRole | string | `""` | Role name for standard user access. | +| server.providerTokenGrants.spiffe.enabled | bool | `false` | Mount the SPIFFE Workload API socket into sandbox pods for dynamic provider token grants. | +| server.providerTokenGrants.spiffe.workloadApiSocketPath | string | `"/spiffe-workload-api/spire-agent.sock"` | Path to the SPIFFE Workload API socket mounted into sandbox pods. | | server.sandboxImage | string | `"ghcr.io/nvidia/openshell-community/sandboxes/base:latest"` | Default sandbox image used when requests do not specify one. | | server.sandboxImagePullPolicy | string | `""` | Kubernetes imagePullPolicy for sandbox pods. Empty = Kubernetes default (Always for :latest, IfNotPresent otherwise). Set to "Always" for dev clusters so new images are picked up without manual eviction. | | server.sandboxImagePullSecrets | list | `[]` | Image pull secrets attached to sandbox pods. Referenced Secrets must exist in the sandbox namespace. | diff --git a/deploy/helm/openshell/README.md.gotmpl b/deploy/helm/openshell/README.md.gotmpl index 7b70d4b80..17b5f7821 100644 --- a/deploy/helm/openshell/README.md.gotmpl +++ b/deploy/helm/openshell/README.md.gotmpl @@ -57,6 +57,8 @@ See [`values.yaml`](values.yaml) for source defaults. Selected overlays: - [`ci/values-cert-manager.yaml`](ci/values-cert-manager.yaml) - cert-manager integration - [`ci/values-keycloak.yaml`](ci/values-keycloak.yaml) - Keycloak OIDC integration - [`ci/values-high-availability.yaml`](ci/values-high-availability.yaml) - CI overlay for multi-replica external PostgreSQL testing +- [`ci/values-spire.yaml`](ci/values-spire.yaml) - SPIFFE/SPIRE provider token grants +- [`ci/values-spire-stack.yaml`](ci/values-spire-stack.yaml) - SPIRE hardened chart values for local development ### Database backend @@ -114,5 +116,16 @@ sandbox JWT signing Secret. This precedence applies even if external non-cert-manager TLS source manages TLS and you pre-create the sandbox JWT signing Secret. +## SPIFFE/SPIRE provider token grants + +Set `server.providerTokenGrants.spiffe.enabled=true` to let sandbox supervisors +use SPIFFE JWT-SVIDs for dynamic provider token grants. The chart keeps +supervisor-to-gateway authentication on gateway-minted sandbox JWTs and passes +the SPIFFE Workload API socket path to the Kubernetes driver so sandbox pods can +mount the SPIFFE CSI socket. + +For local development, uncomment the SPIRE Helm releases in `skaffold.yaml` and +add `ci/values-spire.yaml` to the OpenShell release values files. + {{ template "chart.valuesSection" . }} {{ template "helm-docs.versionFooter" . }} diff --git a/deploy/helm/openshell/ci/values-spire-stack.yaml b/deploy/helm/openshell/ci/values-spire-stack.yaml new file mode 100644 index 000000000..b55f7cfc5 --- /dev/null +++ b/deploy/helm/openshell/ci/values-spire-stack.yaml @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# SPIRE hardened chart values for the local Helm dev environment. +global: + spire: + clusterName: openshell-dev + jwtIssuer: https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local + trustDomain: openshell.local + +spire-server: + defaultJwtSvidTTL: 5m + controllerManager: + identities: + clusterSPIFFEIDs: + openshell-sandboxes: + enabled: true + spiffeIDTemplate: 'spiffe://{{ .TrustDomain }}/openshell/sandbox/{{ index .PodMeta.Annotations "openshell.io/sandbox-id" }}' + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openshell + podSelector: + matchLabels: + openshell.ai/managed-by: openshell diff --git a/deploy/helm/openshell/ci/values-spire.yaml b/deploy/helm/openshell/ci/values-spire.yaml new file mode 100644 index 000000000..201520e81 --- /dev/null +++ b/deploy/helm/openshell/ci/values-spire.yaml @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# OpenShell overlay for local SPIRE-backed provider token grants. +server: + providerTokenGrants: + spiffe: + enabled: true + workloadApiSocketPath: /spiffe-workload-api/spire-agent.sock diff --git a/deploy/helm/openshell/skaffold.yaml b/deploy/helm/openshell/skaffold.yaml index 8a4037429..d4608da5e 100644 --- a/deploy/helm/openshell/skaffold.yaml +++ b/deploy/helm/openshell/skaffold.yaml @@ -81,6 +81,26 @@ deploy: # # wait ensures Gateway API CRDs are registered before the openshell # # release attempts to create Gateway and HTTPRoute resources. # wait: true + # SPIRE — installs SPIRE Server, Agent, Controller Manager, CSI Driver, + # and OIDC Discovery Provider using the SPIFFE hardened charts. + # Uncomment both releases and ci/values-spire.yaml below to use + # SPIFFE JWT-SVIDs for dynamic provider token grants. + #- name: spire-crds + # repo: https://spiffe.github.io/helm-charts-hardened/ + # remoteChart: spire-crds + # version: 0.5.0 + # namespace: spire + # createNamespace: true + # wait: true + #- name: spire + # repo: https://spiffe.github.io/helm-charts-hardened/ + # remoteChart: spire + # version: 0.29.0 + # namespace: spire + # createNamespace: true + # valuesFiles: + # - ci/values-spire-stack.yaml + # wait: true - name: openshell chartPath: . namespace: openshell @@ -97,6 +117,9 @@ deploy: #- ci/values-keycloak.yaml # To enable the Gateway API HTTPRoute (requires Envoy Gateway above): #- ci/values-gateway.yaml + # To enable SPIFFE/SPIRE provider token grants (requires the + # spire-crds and spire releases above): + #- ci/values-spire.yaml # To test multi-replica external PostgreSQL behavior: #- ci/values-high-availability.yaml setValueTemplates: diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index cd21bee2f..d51b343e5 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -103,6 +103,9 @@ data: service_account_name = {{ include "openshell.sandboxServiceAccountName" . | quote }} supervisor_sideload_method = {{ include "openshell.supervisorSideloadMethod" . | quote }} sa_token_ttl_secs = {{ .Values.server.sandboxJwt.k8sSaTokenTtlSecs | default 3600 }} + {{- if .Values.server.providerTokenGrants.spiffe.enabled }} + provider_spiffe_workload_api_socket_path = {{ .Values.server.providerTokenGrants.spiffe.workloadApiSocketPath | quote }} + {{- end }} {{- if .Values.server.sandboxImagePullPolicy }} image_pull_policy = {{ .Values.server.sandboxImagePullPolicy | quote }} {{- end }} diff --git a/deploy/helm/openshell/tests/gateway_config_test.yaml b/deploy/helm/openshell/tests/gateway_config_test.yaml index 393d59a37..e4edb5612 100644 --- a/deploy/helm/openshell/tests/gateway_config_test.yaml +++ b/deploy/helm/openshell/tests/gateway_config_test.yaml @@ -225,3 +225,26 @@ tests: secretKeyRef: name: openshell-ha-pg key: uri + - it: renders provider SPIFFE token grants while keeping gateway JWT auth + set: + server.providerTokenGrants.spiffe.enabled: true + template: templates/gateway-config.yaml + asserts: + - matchRegex: + path: data["gateway.toml"] + pattern: '\[openshell\.gateway\.gateway_jwt\]' + - matchRegex: + path: data["gateway.toml"] + pattern: 'provider_spiffe_workload_api_socket_path\s*=\s*"/spiffe-workload-api/spire-agent\.sock"' + - notMatchRegex: + path: data["gateway.toml"] + pattern: '\[openshell\.gateway\.spiffe\]' + + - it: keeps the gateway sandbox JWT secret mounted when provider SPIFFE grants are enabled + set: + server.providerTokenGrants.spiffe.enabled: true + template: templates/statefulset.yaml + asserts: + - matchRegex: + path: spec.template.spec.volumes[1].name + pattern: '^sandbox-jwt$' diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index 91faeec28..4fd5ba4fc 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -233,6 +233,16 @@ server: # (owner-read only). Override to 0440 or 0444 if the container UID # does not match the volume file owner. secretDefaultMode: "" + # Dynamic provider token grants. When SPIFFE is enabled here, sandbox + # supervisors mount the SPIFFE Workload API socket so provider profiles can + # exchange JWT-SVIDs for upstream access tokens. Supervisor-to-gateway + # authentication still uses gateway-minted sandbox JWTs. + providerTokenGrants: + spiffe: + # -- Mount the SPIFFE Workload API socket into sandbox pods for dynamic provider token grants. + enabled: false + # -- Path to the SPIFFE Workload API socket mounted into sandbox pods. + workloadApiSocketPath: /spiffe-workload-api/spire-agent.sock # OIDC (OpenID Connect) configuration for JWT-based authentication. # When issuer is set, the server validates Bearer tokens on gRPC requests. oidc: diff --git a/docs/kubernetes/access-control.mdx b/docs/kubernetes/access-control.mdx index 5c333bb53..8824b6de1 100644 --- a/docs/kubernetes/access-control.mdx +++ b/docs/kubernetes/access-control.mdx @@ -19,6 +19,14 @@ The Helm chart always generates mTLS certificates at install time. The gateway u For how the CLI resolves gateways and stores credentials, refer to [Gateway Authentication](/reference/gateway-auth). +## Sandbox Supervisor Identity + +Kubernetes sandbox supervisors authenticate back to the gateway as sandbox workloads. By default, the gateway mints its own sandbox JWTs and Kubernetes sandboxes bootstrap them with a projected ServiceAccount token. + +Dynamic provider token grants can use SPIFFE without changing supervisor-to-gateway authentication. Set `server.providerTokenGrants.spiffe.enabled=true` to mount the SPIFFE CSI Workload API socket into sandbox pods while keeping the projected ServiceAccount token bootstrap and gateway-minted sandbox JWT path. + +Provider token grants require a SPIFFE implementation such as SPIRE and a `ClusterSPIFFEID` that assigns per-sandbox IDs from the pod's `openshell.io/sandbox-id` annotation. Provider profiles with `token_grant` metadata cause the sandbox supervisor to request JWT-SVIDs and exchange them for upstream OAuth2 access tokens. + ## OIDC User Authentication Set `server.oidc.issuer` to enable OIDC. The gateway validates the `Authorization: Bearer ` header on every request against the issuer's JWKS endpoint. diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index c70d8acbd..86ffa3f83 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -184,6 +184,11 @@ workspace_default_storage_size = "10Gi" # default_runtime_class_name = "kata-containers" # Kubelet clamps projected tokens below 600 seconds. The driver caps values at 86400. sa_token_ttl_secs = 3600 +# Optional SPIFFE Workload API socket mounted into sandbox pods for dynamic +# provider token grants. Use an absolute path under a dedicated directory; +# shared roots such as /run, /var, /tmp, and /etc are rejected. +# Supervisor-to-gateway auth still uses gateway JWTs. +provider_spiffe_workload_api_socket_path = "/spiffe-workload-api/spire-agent.sock" ``` ### Docker diff --git a/docs/sandboxes/providers-v2.mdx b/docs/sandboxes/providers-v2.mdx index 1eb6c008f..1456c5cfa 100644 --- a/docs/sandboxes/providers-v2.mdx +++ b/docs/sandboxes/providers-v2.mdx @@ -44,7 +44,7 @@ openshell settings delete --global --key providers_v2_enabled ``` -The feature flag controls provider-derived policy layers. It does not change the current credential injection model. OpenShell still injects placeholder environment variables into sandbox processes and resolves those placeholders in outbound HTTP traffic. +The feature flag controls provider-derived policy layers. OpenShell still supports placeholder environment variables for provider credentials, and provider profiles can also declare dynamic token grants that the sandbox proxy resolves on demand for matching HTTP endpoints. ## Available Features @@ -60,6 +60,7 @@ Providers v2 currently includes these user-facing features: - Runtime sandbox provider lifecycle commands under `openshell sandbox provider list|attach|detach`. - Credential refresh configuration with `openshell provider refresh status|configure|rotate|delete`. - Credential expiry metadata with `openshell provider update --credential-expires-at`; values accept Unix epoch milliseconds or ISO/RFC3339 timestamps. +- Dynamic token grants that use the sandbox's SPIFFE JWT-SVID as an OAuth2 client assertion and inject short-lived tokens into supported headers for matching profile endpoints. ## Roadmap @@ -67,8 +68,8 @@ The following Providers v2 design items are not part of the current behavior: | Roadmap item | Current behavior | |---|---| -| Profile-driven explicit credential injection | Profile `auth_style`, `header_name`, `query_param`, and `path_template` fields are stored and validated, but runtime injection still depends on environment placeholders generated from provider credentials. | -| Endpoint and binary scoped credential injection | Provider profile endpoints and binaries affect policy composition. They do not yet restrict which outbound requests can receive credential injection. | +| General profile-driven credential placement | Static `auth_style`, `header_name`, `query_param`, and `path_template` placement metadata is stored and validated, but static credential injection still depends on environment placeholders generated from provider credentials. Dynamic `token_grant` credentials support `bearer` and `header` placement for matching HTTP endpoints. | +| Endpoint and binary scoped credential injection | Provider profile endpoints and binaries affect policy composition. Dynamic token grants are endpoint-scoped. Static placeholder injection is not yet restricted by profile endpoint or binary metadata. | | Credential verification on create | `openshell provider create` does not yet probe provider verification endpoints or expose `--no-verify`. | | Automatic credential scope extraction | OpenShell does not yet inspect upstream provider responses to discover credential scopes. | | Inference mounting from attached providers | `inference_capable` is profile metadata. Attaching an inference-capable provider does not yet create `inference.local` routes. | @@ -159,8 +160,8 @@ credentials: required: true # Accepted values: basic, bearer, header, query, path. - # These fields describe the intended credential placement. - # Runtime injection still uses env placeholder resolution today. + # These fields describe static credential placement. + # Static runtime injection still uses env placeholder resolution. auth_style: bearer header_name: authorization query_param: api_key @@ -185,6 +186,24 @@ credentials: required: true secret: true + # Optional dynamic credential. The sandbox supervisor requests a + # SPIFFE JWT-SVID, exchanges it at token_endpoint, caches the returned + # access token, and injects it according to auth_style/header_name for + # matching endpoint traffic. + token_grant: + token_endpoint: https://login.example.com/realms/custom/protocol/openid-connect/token + audience: api://custom-api + jwt_svid_audience: https://login.example.com/realms/custom + client_assertion_type: urn:ietf:params:oauth:client-assertion-type:jwt-bearer + scopes: [api.read, api.write] + cache_ttl_seconds: 300 + audience_overrides: + - host: api.example.com + port: 443 + path: /v1/projects/** + audience: api://custom-projects + scopes: [projects.read] + discovery: credentials: [api_token] @@ -240,7 +259,7 @@ binaries: `category` groups profiles in `openshell provider list-profiles`. Use one of the values in the category enum. -`credentials` declares the credential names, environment variables, auth metadata, and optional refresh metadata for the provider type. The `auth_style` field accepts `basic`, `bearer`, `header`, `query`, or `path`. When `auth_style` is `path`, set `path_template` to a URL path containing the `{credential}` placeholder exactly once (for example, `/v1/{credential}/resources`). The current runtime still exposes configured credential keys as placeholder environment variables and resolves placeholders in outbound HTTP requests. +`credentials` declares the credential names, environment variables, auth metadata, optional refresh metadata, and optional dynamic token grant metadata for the provider type. The `auth_style` field accepts `basic`, `bearer`, `header`, `query`, or `path`. When `auth_style` is `path`, set `path_template` to a URL path containing the `{credential}` placeholder exactly once (for example, `/v1/{credential}/resources`). Static credentials are exposed as placeholder environment variables and resolved in outbound HTTP requests. Dynamic token grants are resolved by the sandbox proxy on demand for matching profile endpoints and support `bearer` or `header` placement. `discovery` controls what `--from-existing` scans when `providers_v2_enabled=true`. Each entry in `discovery.credentials` must name a @@ -280,6 +299,33 @@ Gateway-managed refresh strategies use these material keys: OpenShell keeps token endpoints profile-owned. Refresh material cannot override `token_url` or `token_uri` during refresh configuration. +### Dynamic Token Grants + +`token_grant` belongs to one credential declaration. When a sandbox with the provider attached sends HTTP traffic to a matching profile endpoint, the supervisor requests a SPIFFE JWT-SVID from the local Workload API, exchanges it at `token_endpoint`, caches the returned access token, and injects it before forwarding the request upstream. Use `auth_style: bearer` to inject `Authorization: Bearer `, or `auth_style: header` with `header_name` to inject the raw access token into a custom header. Token grants do not support `query` or `path` placement. + +Create provider instances for token-grant-only profiles with `--runtime-credentials`. This records an empty provider instance and makes the runtime-resolved credential source explicit: + +```shell +openshell provider create \ + --name spiffe-token-demo \ + --type spiffe-token-demo \ + --runtime-credentials +``` + +Token grant fields: + +| Field | Required | Behavior | +|---|---|---| +| `token_endpoint` | Yes | OAuth2 token endpoint that accepts a SPIFFE JWT-SVID client assertion. Use `https://` unless the endpoint is loopback or a Kubernetes service DNS name such as `token-issuer.default.svc.cluster.local`. | +| `audience` | No | Resource audience requested from the token service. | +| `jwt_svid_audience` | No | Audience used when requesting the JWT-SVID. When omitted, OpenShell derives an issuer-style audience from Keycloak token endpoint paths or falls back to the full token endpoint URL. | +| `client_assertion_type` | No | OAuth2 `client_assertion_type` form value. Defaults to RFC 7523 `urn:ietf:params:oauth:client-assertion-type:jwt-bearer`. Set a provider-specific value, such as `urn:ietf:params:oauth:client-assertion-type:jwt-spiffe`, only when the token issuer explicitly requires it. | +| `scopes` | No | OAuth2 scopes sent as a space-separated `scope` parameter. | +| `cache_ttl_seconds` | No | Token cache TTL override. When omitted or `0`, OpenShell uses the token response `expires_in` with a 30-second safety margin and one-hour cap, or five minutes minus the margin if the response does not include an expiry. | +| `audience_overrides` | No | Endpoint-specific `audience` and `scopes` overrides selected by host, port, and path. | + +Token grants require the sandbox supervisor to have access to a SPIFFE Workload API socket. They apply to HTTP traffic that the proxy can inspect. Endpoints with `tls: skip` bypass TLS termination and cannot receive dynamic token grant injection for HTTPS traffic. The token service must return a token value that is safe for HTTP header placement; malformed values are rejected before caching or header injection. + ## Provider Instances A provider instance stores concrete credentials and config for a profile type. Built-in profile IDs and imported custom profile IDs are accepted by `--type`. @@ -321,6 +367,8 @@ openshell provider create \ --credential CUSTOM_API_TOKEN ``` +Provider profiles whose required credentials are fully runtime-resolvable through `token_grant` or gateway-managed refresh can be created without `--credential`. + Inspect the provider: ```shell @@ -616,7 +664,7 @@ Already-running processes keep the environment they started with. OpenShell does Detaching a provider removes its provider policy layer from future effective policy reads and removes its credential placeholders from future process environments. It does not remove environment variables from already-running processes. -OpenShell rejects provider updates and refresh configuration when they would make two providers attached to the same sandbox expose the same active credential environment key. Use provider-specific credential names when one sandbox needs multiple providers with overlapping upstream concepts. +OpenShell rejects provider updates and refresh configuration when they would make two providers attached to the same sandbox expose the same active credential environment key. It also rejects attached provider sets with ambiguous dynamic token grants at equal host/path specificity. Use provider-specific credential names and make one dynamic grant selector more specific when one sandbox needs multiple providers with overlapping upstream concepts. ## Next Steps diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 25e440f5b..0284384b1 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -122,7 +122,7 @@ This enables credential injection and L7 inspection without explicit configurati |---|---| | Default | Auto-detect and terminate. OpenShell generates the sandbox CA at startup and injects it into the process trust stores (`NODE_EXTRA_CA_CERTS`, `DENO_CERT`, `SSL_CERT_FILE`, `REQUESTS_CA_BUNDLE`, `CURL_CA_BUNDLE`, `GIT_SSL_CAINFO`). | | What you can change | Set `tls: skip` on an endpoint to disable TLS detection and termination for that endpoint. Use this for client-certificate mTLS to upstream or non-standard binary protocols. | -| Risk if relaxed | `tls: skip` disables credential injection and L7 inspection for that endpoint. The proxy relays encrypted traffic without seeing the contents. | +| Risk if relaxed | `tls: skip` disables placeholder credential rewriting, dynamic token grant injection, and L7 inspection for that endpoint. The proxy relays encrypted traffic without seeing the contents. | | Recommendation | Use auto-detect (the default) for most endpoints. Use `tls: skip` only when the upstream requires the client's own TLS certificate (mTLS) or uses a non-HTTP protocol. | ### SSRF Protection @@ -212,7 +212,7 @@ OpenShell applies seccomp in two phases. A narrow supervisor-startup prelude run | Aspect | Detail | |---|---| -| Startup prelude | After privileged bootstrap helpers complete, the supervisor sets `PR_SET_NO_NEW_PRIVS` and synchronizes a seccomp filter across all runtime threads that blocks `mount`, the new mount API syscalls, `pivot_root`, `umount2`, `bpf`, `perf_event_open`, `userfaultfd`, module-loading syscalls, and kexec. This closes the long-lived privileged remount and kernel-surface window while leaving required setup syscalls such as `setns` available. | +| Startup prelude | After privileged bootstrap helpers complete, including network setup and provider-token SPIFFE child mount-namespace preparation, the supervisor sets `PR_SET_NO_NEW_PRIVS` and synchronizes a seccomp filter across all runtime threads that blocks `mount`, the new mount API syscalls, `pivot_root`, `umount2`, `bpf`, `perf_event_open`, `userfaultfd`, module-loading syscalls, and kexec. This closes the long-lived privileged remount and kernel-surface window while leaving required setup syscalls such as `setns` available. | | Socket domains | The filter allows `AF_INET` and `AF_INET6` (for proxy communication) and blocks `AF_PACKET`, `AF_BLUETOOTH`, and `AF_VSOCK` with `EPERM`. `AF_NETLINK` is partially allowed: only `NETLINK_ROUTE` (protocol 0) is permitted so that `getifaddrs(3)` works; all other netlink protocols are blocked. Write operations via `NETLINK_ROUTE` still require `CAP_NET_ADMIN`, which the sandbox does not grant. | | Runtime unconditional syscall blocks | `memfd_create`, `ptrace`, `bpf`, `process_vm_readv`, `process_vm_writev`, `pidfd_open`, `pidfd_getfd`, `pidfd_send_signal`, `io_uring_setup`, `mount`, `fsopen`, `fsconfig`, `fsmount`, `fspick`, `move_mount`, `open_tree`, `setns`, `umount2`, `pivot_root`, `userfaultfd`, `perf_event_open`. | | Conditional syscall blocks | `execveat` with `AT_EMPTY_PATH`, `unshare` and `clone` with `CLONE_NEWUSER`, and `seccomp(SECCOMP_SET_MODE_FILTER)` are denied with `EPERM`. | @@ -225,9 +225,9 @@ OpenShell applies seccomp in two phases. A narrow supervisor-startup prelude run The sandbox supervisor applies enforcement in a specific order during process startup. This ordering is intentional: named network-namespace setup still relies on privileged helpers, and privilege dropping still needs `/etc/group` and `/etc/passwd`, which Landlock subsequently restricts. -1. Privileged supervisor bootstrap helpers, including network-namespace setup and optional `nft` probes. +1. Privileged supervisor bootstrap helpers, including network-namespace setup, provider-token SPIFFE child mount-namespace setup, and optional `nft` probes. 2. Supervisor startup prelude seccomp (`PR_SET_NO_NEW_PRIVS` plus the early syscall denylist) synchronized across runtime threads. -3. Network namespace entry (`setns`) in child `pre_exec`. +3. Network and child-only mount namespace entry (`setns`) in child `pre_exec`. 4. Privilege drop (`initgroups` + `setgid` + `setuid`). 5. Core-dump hardening (`RLIMIT_CORE=0`, plus `PR_SET_DUMPABLE=0` on Linux). 6. Landlock filesystem restrictions. @@ -286,7 +286,7 @@ The following patterns weaken security without providing meaningful benefit. | Using `access: full` when finer rules would suffice | `access: full` with `protocol: rest` or `protocol: websocket` enables inspection but allows all methods and paths for that protocol. | Use `access: read-only` or explicit `rules` to restrict what the agent can do at the L7 level. | | Adding endpoints permanently when operator approval would suffice | Adding endpoints to the policy YAML makes them permanently reachable across all instances. | Use operator approval. Approved endpoints persist within the sandbox instance but reset on re-creation. | | Using broad binary globs | A glob like `/**` allows any binary to reach the endpoint, defeating binary-scoped enforcement. | Scope globs to specific directories (for example, `/sandbox/.vscode-server/**`). | -| Skipping TLS termination on HTTPS APIs | Setting `tls: skip` disables credential injection and L7 inspection. | Use the default auto-detect behavior unless the upstream requires client-certificate mTLS. | +| Skipping TLS termination on HTTPS APIs | Setting `tls: skip` disables placeholder credential rewriting, dynamic token grant injection, and L7 inspection. | Use the default auto-detect behavior unless the upstream requires client-certificate mTLS. | | Setting `enforcement: enforce` before auditing | Jumping to `enforce` without first running in `audit` mode risks breaking the agent's workflow. | Start with `audit`, review the logs, and switch to `enforce` after you validate the rules. | ## Related Topics diff --git a/examples/spiffe-token-grant-demo/README.md b/examples/spiffe-token-grant-demo/README.md new file mode 100644 index 000000000..57ad9b80f --- /dev/null +++ b/examples/spiffe-token-grant-demo/README.md @@ -0,0 +1,143 @@ +# SPIFFE Token Grant Demo + +This example validates provider dynamic token grants using SPIFFE JWT-SVIDs. +It mirrors the PR 1781 alpha/beta flow without configuring OpenShell gateway +OIDC authentication. + +The demo deploys three in-cluster workloads: + +| Workload | Purpose | +|---|---| +| `token-issuer` | Accepts a SPIFFE JWT-SVID client assertion and returns a short-lived demo access token | +| `alpha` | Requires a bearer token with audience and scope `alpha` | +| `beta` | Requires a bearer token with audience and scope `beta` | + +The OpenShell provider profile in `provider-profile.yaml` configures a dynamic +credential with `token_grant`. When a sandbox curls `alpha` or `beta`, the +sandbox supervisor fetches a JWT-SVID from the SPIFFE Workload API, exchanges it +at `token-issuer`, and injects the returned access token into the outbound HTTP +request. + +## Prerequisites + +- A Kubernetes OpenShell dev cluster. +- SPIRE enabled for provider token grants. +- OpenShell configured with the Kubernetes ServiceAccount supervisor bootstrap + path. Gateway end-user OIDC is not required for this demo. +- `providers_v2_enabled=true` on the target gateway. + +For the Helm dev environment, deploy with the SPIRE releases and +`ci/values-spire.yaml` enabled in `deploy/helm/openshell/skaffold.yaml`. + +## Deploy Workloads + +From the repository root: + +```bash +ACCESS_TOKEN_SECRET="$(openssl rand -hex 32)" +KUBECONFIG=kubeconfig kubectl -n default create secret generic openshell-spiffe-token-demo \ + --from-literal=access-token-secret="$ACCESS_TOKEN_SECRET" \ + --dry-run=client \ + -o yaml | KUBECONFIG=kubeconfig kubectl apply -f - +KUBECONFIG=kubeconfig kubectl apply -k examples/spiffe-token-grant-demo/k8s +KUBECONFIG=kubeconfig kubectl -n default rollout restart deployment/token-issuer deployment/alpha deployment/beta +KUBECONFIG=kubeconfig kubectl -n default rollout status deployment/token-issuer --timeout=180s +KUBECONFIG=kubeconfig kubectl -n default rollout status deployment/alpha --timeout=180s +KUBECONFIG=kubeconfig kubectl -n default rollout status deployment/beta --timeout=180s +``` + +## Register Provider And Test + +Port-forward the local gateway in one terminal: + +```bash +KUBECONFIG=kubeconfig kubectl port-forward -n openshell svc/openshell 8097:8080 +``` + +Then run: + +```bash +export XDG_CONFIG_HOME="$(mktemp -d)" +export GATEWAY=http://127.0.0.1:8097 + +openshell --gateway-endpoint "$GATEWAY" settings set \ + --global --key providers_v2_enabled --value true --yes + +openshell --gateway-endpoint "$GATEWAY" provider profile import \ + -f examples/spiffe-token-grant-demo/provider-profile.yaml + +openshell --gateway-endpoint "$GATEWAY" provider create \ + --name spiffe-token-demo \ + --type spiffe-token-demo \ + --runtime-credentials + +openshell --gateway-endpoint "$GATEWAY" sandbox create \ + --name spiffe-token-demo \ + --provider spiffe-token-demo \ + --keep \ + --no-tty \ + -- echo "sandbox ready" + +openshell --gateway-endpoint "$GATEWAY" sandbox exec \ + --name spiffe-token-demo \ + --no-tty \ + -- curl -sS http://alpha.default.svc.cluster.local/ + +openshell --gateway-endpoint "$GATEWAY" sandbox exec \ + --name spiffe-token-demo \ + --no-tty \ + -- curl -sS http://beta.default.svc.cluster.local/ +``` + +Expected output includes endpoint-specific token claims: + +```text +alpha called with path /: + aud: alpha, account + scope: alpha profile email + azp: spiffe://openshell.local/openshell/sandbox/ + +beta called with path /: + aud: beta, account + scope: beta profile email + azp: spiffe://openshell.local/openshell/sandbox/ +``` + +The protected services also write proof-of-life logs when they accept a call: + +```bash +KUBECONFIG=kubeconfig kubectl -n default logs deployment/alpha --tail=20 +KUBECONFIG=kubeconfig kubectl -n default logs deployment/beta --tail=20 +``` + +Example log lines: + +```text +alpha accepted request path=/ aud="alpha, account" scope="alpha profile email" client_id=spiffe://openshell.local/openshell/sandbox/ +beta accepted request path=/ aud="beta, account" scope="beta profile email" client_id=spiffe://openshell.local/openshell/sandbox/ +``` + +## Automated Demo + +`demo.sh` applies the workloads, registers the provider profile, creates a +sandbox, curls alpha and beta, prints the alpha/beta pod logs, and deletes the +sandbox with `openshell` on exit. It leaves the Kubernetes demo workloads in +place. + +```bash +KUBECONFIG=kubeconfig bash examples/spiffe-token-grant-demo/demo.sh +``` + +## Cleanup + +Delete the sandbox through OpenShell: + +```bash +openshell --gateway-endpoint "$GATEWAY" sandbox delete spiffe-token-demo +``` + +Delete the demo workloads with Kubernetes: + +```bash +KUBECONFIG=kubeconfig kubectl delete -k examples/spiffe-token-grant-demo/k8s +``` diff --git a/examples/spiffe-token-grant-demo/demo.sh b/examples/spiffe-token-grant-demo/demo.sh new file mode 100755 index 000000000..ceb44adc3 --- /dev/null +++ b/examples/spiffe-token-grant-demo/demo.sh @@ -0,0 +1,142 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROFILE_FILE="${SCRIPT_DIR}/provider-profile.yaml" +K8S_DIR="${SCRIPT_DIR}/k8s" + +SANDBOX_NAME="${SANDBOX_NAME:-spiffe-token-demo}" +PROVIDER_NAME="${PROVIDER_NAME:-spiffe-token-demo}" +PROFILE_ID="${PROFILE_ID:-spiffe-token-demo}" +PORT_FORWARD_PORT="${PORT_FORWARD_PORT:-8097}" +GATEWAY_ENDPOINT="${GATEWAY_ENDPOINT:-http://127.0.0.1:${PORT_FORWARD_PORT}}" +KEEP_SANDBOX="${KEEP_SANDBOX:-0}" +ACCESS_TOKEN_SECRET="${ACCESS_TOKEN_SECRET:-$(openssl rand -hex 32)}" + +TEMP_CONFIG_HOME="" +if [[ -z "${XDG_CONFIG_HOME:-}" ]]; then + TEMP_CONFIG_HOME="$(mktemp -d)" + export XDG_CONFIG_HOME="$TEMP_CONFIG_HOME" +fi + +PF_PID="" + +cleanup() { + if [[ "$KEEP_SANDBOX" != "1" ]]; then + openshell --gateway-endpoint "$GATEWAY_ENDPOINT" sandbox delete "$SANDBOX_NAME" >/dev/null 2>&1 || true + fi + if [[ -n "$PF_PID" ]]; then + kill "$PF_PID" >/dev/null 2>&1 || true + fi + if [[ -n "$TEMP_CONFIG_HOME" ]]; then + rm -rf "$TEMP_CONFIG_HOME" + fi +} +trap cleanup EXIT + +run() { + printf "\n$ %s\n" "$*" + "$@" +} + +wait_for_port_forward() { + for _ in $(seq 1 60); do + if nc -z 127.0.0.1 "$PORT_FORWARD_PORT" >/dev/null 2>&1; then + return 0 + fi + sleep 0.25 + done + printf "gateway port-forward did not become ready\n" >&2 + exit 1 +} + +assert_contains() { + local haystack="$1" + local needle="$2" + if [[ "$haystack" != *"$needle"* ]]; then + printf "expected output to contain: %s\n" "$needle" >&2 + printf "actual output:\n%s\n" "$haystack" >&2 + exit 1 + fi +} + +sandbox_curl_until() { + local label="$1" + local url="$2" + local expected="$3" + local output="" + + for attempt in $(seq 1 12); do + printf "\n$ openshell sandbox exec %s curl (attempt %s)\n" "$label" "$attempt" + if output=$("${OS[@]}" sandbox exec --name "$SANDBOX_NAME" --no-tty -- curl -sS --max-time 10 "$url" 2>&1); then + printf "%s\n" "$output" + if [[ "$output" == *"$expected"* ]]; then + SANDBOX_CURL_OUTPUT="$output" + return 0 + fi + else + printf "%s\n" "$output" + fi + sleep 2 + done + + printf "timed out waiting for %s to return expected output\n" "$label" >&2 + printf "last output:\n%s\n" "$output" >&2 + exit 1 +} + +OS=(openshell --gateway-endpoint "$GATEWAY_ENDPOINT") + +printf "\n$ kubectl -n default create secret generic openshell-spiffe-token-demo --from-literal=access-token-secret=*** --dry-run=client -o yaml | kubectl apply -f -\n" +kubectl -n default create secret generic openshell-spiffe-token-demo \ + --from-literal=access-token-secret="$ACCESS_TOKEN_SECRET" \ + --dry-run=client \ + -o yaml | kubectl apply -f - + +run kubectl apply -k "$K8S_DIR" +run kubectl -n default rollout restart deployment/token-issuer deployment/alpha deployment/beta +run kubectl -n default rollout status deployment/token-issuer --timeout=180s +run kubectl -n default rollout status deployment/alpha --timeout=180s +run kubectl -n default rollout status deployment/beta --timeout=180s + +kubectl -n openshell port-forward svc/openshell "${PORT_FORWARD_PORT}:8080" >/tmp/openshell-spiffe-token-demo-port-forward.log 2>&1 & +PF_PID=$! +wait_for_port_forward + +"${OS[@]}" sandbox delete "$SANDBOX_NAME" >/dev/null 2>&1 || true +"${OS[@]}" provider delete "$PROVIDER_NAME" >/dev/null 2>&1 || true +"${OS[@]}" provider profile delete "$PROFILE_ID" >/dev/null 2>&1 || true + +run "${OS[@]}" settings set --global --key providers_v2_enabled --value true --yes +run "${OS[@]}" provider profile lint -f "$PROFILE_FILE" +run "${OS[@]}" provider profile import -f "$PROFILE_FILE" +run "${OS[@]}" provider create --name "$PROVIDER_NAME" --type "$PROFILE_ID" --runtime-credentials +run "${OS[@]}" sandbox create --name "$SANDBOX_NAME" --provider "$PROVIDER_NAME" --keep --no-tty -- echo "sandbox ready" + +sandbox_curl_until "alpha" "http://alpha.default.svc.cluster.local/" "alpha called with path /:" +ALPHA_OUTPUT="$SANDBOX_CURL_OUTPUT" +assert_contains "$ALPHA_OUTPUT" "alpha called with path /:" +assert_contains "$ALPHA_OUTPUT" "aud: alpha, account" +assert_contains "$ALPHA_OUTPUT" "scope: alpha profile email" +assert_contains "$ALPHA_OUTPUT" "azp: spiffe://openshell.local/openshell/sandbox/" + +sandbox_curl_until "beta" "http://beta.default.svc.cluster.local/" "beta called with path /:" +BETA_OUTPUT="$SANDBOX_CURL_OUTPUT" +assert_contains "$BETA_OUTPUT" "beta called with path /:" +assert_contains "$BETA_OUTPUT" "aud: beta, account" +assert_contains "$BETA_OUTPUT" "scope: beta profile email" +assert_contains "$BETA_OUTPUT" "azp: spiffe://openshell.local/openshell/sandbox/" + +sleep 1 + +printf "\n$ kubectl -n default logs -l app=alpha --tail=20 --prefix=true\n" +kubectl -n default logs -l app=alpha --tail=20 --prefix=true | sed 's/^/alpha> /' + +printf "\n$ kubectl -n default logs -l app=beta --tail=20 --prefix=true\n" +kubectl -n default logs -l app=beta --tail=20 --prefix=true | sed 's/^/beta> /' + +printf "\nSPIFFE token grant demo succeeded.\n" diff --git a/examples/spiffe-token-grant-demo/k8s/kustomization.yaml b/examples/spiffe-token-grant-demo/k8s/kustomization.yaml new file mode 100644 index 000000000..ccd3a94a3 --- /dev/null +++ b/examples/spiffe-token-grant-demo/k8s/kustomization.yaml @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +generatorOptions: + disableNameSuffixHash: true + +configMapGenerator: + - name: openshell-spiffe-token-demo-scripts + files: + - token-issuer.js + - protected-service.js + +resources: + - workloads.yaml diff --git a/examples/spiffe-token-grant-demo/k8s/protected-service.js b/examples/spiffe-token-grant-demo/k8s/protected-service.js new file mode 100644 index 000000000..1df31f572 --- /dev/null +++ b/examples/spiffe-token-grant-demo/k8s/protected-service.js @@ -0,0 +1,114 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const http = require("http"); +const crypto = require("crypto"); + +const PORT = Number(process.env.PORT || 8080); +const SERVICE_NAME = process.env.SERVICE_NAME || "alpha"; +const EXPECTED_AUDIENCE = process.env.EXPECTED_AUDIENCE || SERVICE_NAME; +const EXPECTED_SCOPE = process.env.EXPECTED_SCOPE || SERVICE_NAME; +const ACCESS_TOKEN_ISSUER = + process.env.ACCESS_TOKEN_ISSUER || "http://token-issuer.default.svc.cluster.local"; +const ACCESS_TOKEN_SECRET = process.env.ACCESS_TOKEN_SECRET; + +if (!ACCESS_TOKEN_SECRET) { + throw new Error("ACCESS_TOKEN_SECRET is required"); +} + +function b64urlDecode(value) { + const padded = `${value}${"=".repeat((4 - (value.length % 4)) % 4)}`; + return Buffer.from(padded.replace(/-/g, "+").replace(/_/g, "/"), "base64"); +} + +function b64urlEncode(value) { + return Buffer.from(value) + .toString("base64") + .replace(/=/g, "") + .replace(/\+/g, "-") + .replace(/\//g, "_"); +} + +function parseJwt(jwt) { + const parts = jwt.split("."); + if (parts.length !== 3) { + throw new Error("JWT must contain three segments"); + } + return { + payload: JSON.parse(b64urlDecode(parts[1]).toString("utf8")), + signingInput: `${parts[0]}.${parts[1]}`, + signature: parts[2], + }; +} + +function verifyAccessToken(jwt) { + const parsed = parseJwt(jwt); + const expected = b64urlEncode( + crypto.createHmac("sha256", ACCESS_TOKEN_SECRET).update(parsed.signingInput).digest(), + ); + if ( + parsed.signature.length !== expected.length || + !crypto.timingSafeEqual(Buffer.from(parsed.signature), Buffer.from(expected)) + ) { + throw new Error("access token signature validation failed"); + } + + const now = Math.floor(Date.now() / 1000); + if (parsed.payload.exp && parsed.payload.exp <= now) { + throw new Error("access token expired"); + } + if (parsed.payload.iss !== ACCESS_TOKEN_ISSUER) { + throw new Error(`unexpected access token issuer ${parsed.payload.iss}`); + } + const aud = Array.isArray(parsed.payload.aud) ? parsed.payload.aud : [parsed.payload.aud]; + if (!aud.includes(EXPECTED_AUDIENCE)) { + throw new Error(`access token audience did not include ${EXPECTED_AUDIENCE}`); + } + const scopes = String(parsed.payload.scope || "").split(/\s+/).filter(Boolean); + if (!scopes.includes(EXPECTED_SCOPE)) { + throw new Error(`access token scope did not include ${EXPECTED_SCOPE}`); + } + return parsed.payload; +} + +function text(res, status, body) { + res.writeHead(status, { "content-type": "text/plain" }); + res.end(body); +} + +http + .createServer((req, res) => { + try { + if (req.url === "/healthz") { + return text(res, 200, "ok\n"); + } + const auth = req.headers.authorization || ""; + const token = auth.startsWith("Bearer ") ? auth.slice("Bearer ".length) : ""; + if (!token) { + console.warn(`${SERVICE_NAME} rejected request path=${req.url} reason=missing_bearer_token`); + return text(res, 401, `${SERVICE_NAME} missing bearer token\n`); + } + const claims = verifyAccessToken(token); + const aud = Array.isArray(claims.aud) ? claims.aud.join(", ") : claims.aud; + console.log( + `${SERVICE_NAME} accepted request path=${req.url} aud="${aud}" scope="${claims.scope}" client_id=${claims.client_id}`, + ); + return text( + res, + 200, + `${SERVICE_NAME} called with path ${req.url}:\n` + + ` sub: ${claims.sub}\n` + + ` aud: ${aud}\n` + + ` iss: ${claims.iss}\n` + + ` scope: ${claims.scope}\n` + + ` azp: ${claims.azp}\n` + + ` client_id: ${claims.client_id}\n`, + ); + } catch (error) { + console.warn(`${SERVICE_NAME} rejected request path=${req.url} reason="${error.message}"`); + return text(res, 403, `${SERVICE_NAME} rejected token: ${error.message}\n`); + } + }) + .listen(PORT, "0.0.0.0", () => { + console.log(`${SERVICE_NAME} listening on ${PORT}`); + }); diff --git a/examples/spiffe-token-grant-demo/k8s/token-issuer.js b/examples/spiffe-token-grant-demo/k8s/token-issuer.js new file mode 100644 index 000000000..55e7ad48b --- /dev/null +++ b/examples/spiffe-token-grant-demo/k8s/token-issuer.js @@ -0,0 +1,206 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +const http = require("http"); +const crypto = require("crypto"); + +const PORT = Number(process.env.PORT || 8080); +const JWKS_URI = + process.env.SPIRE_JWKS_URI || + "https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local/keys"; +const SPIRE_ISSUER = + process.env.SPIRE_ISSUER || + "https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local"; +const JWT_SVID_AUDIENCE = + process.env.JWT_SVID_AUDIENCE || "http://token-issuer.default.svc.cluster.local"; +const TRUST_DOMAIN_PREFIX = + process.env.TRUST_DOMAIN_PREFIX || "spiffe://openshell.local/openshell/sandbox/"; +const ACCESS_TOKEN_ISSUER = + process.env.ACCESS_TOKEN_ISSUER || "http://token-issuer.default.svc.cluster.local"; +const ACCESS_TOKEN_SECRET = process.env.ACCESS_TOKEN_SECRET; + +if (!ACCESS_TOKEN_SECRET) { + throw new Error("ACCESS_TOKEN_SECRET is required"); +} + +let cachedJwks; +let cachedJwksAt = 0; + +function b64urlDecode(value) { + const padded = `${value}${"=".repeat((4 - (value.length % 4)) % 4)}`; + return Buffer.from(padded.replace(/-/g, "+").replace(/_/g, "/"), "base64"); +} + +function b64urlEncode(value) { + return Buffer.from(value) + .toString("base64") + .replace(/=/g, "") + .replace(/\+/g, "-") + .replace(/\//g, "_"); +} + +function parseJwt(jwt) { + const parts = jwt.split("."); + if (parts.length !== 3) { + throw new Error("JWT must contain three segments"); + } + return { + header: JSON.parse(b64urlDecode(parts[0]).toString("utf8")), + payload: JSON.parse(b64urlDecode(parts[1]).toString("utf8")), + signingInput: `${parts[0]}.${parts[1]}`, + signature: b64urlDecode(parts[2]), + }; +} + +async function jwks() { + const now = Date.now(); + if (cachedJwks && now - cachedJwksAt < 60000) { + return cachedJwks; + } + const response = await fetch(JWKS_URI); + if (!response.ok) { + throw new Error(`JWKS fetch failed with HTTP ${response.status}`); + } + cachedJwks = await response.json(); + cachedJwksAt = now; + return cachedJwks; +} + +function hasAudience(payload, expected) { + const aud = Array.isArray(payload.aud) ? payload.aud : [payload.aud]; + return aud.includes(expected); +} + +async function verifyJwtSvid(jwt) { + const parsed = parseJwt(jwt); + if (parsed.header.alg !== "RS256") { + throw new Error(`unsupported JWT-SVID alg ${parsed.header.alg}`); + } + + const keys = await jwks(); + const jwk = keys.keys.find((key) => key.kid === parsed.header.kid); + if (!jwk) { + throw new Error(`no JWKS key for kid ${parsed.header.kid}`); + } + + const verifier = crypto.createVerify("RSA-SHA256"); + verifier.update(parsed.signingInput); + verifier.end(); + const publicKey = crypto.createPublicKey({ key: jwk, format: "jwk" }); + if (!verifier.verify(publicKey, parsed.signature)) { + throw new Error("JWT-SVID signature validation failed"); + } + + const now = Math.floor(Date.now() / 1000); + if (parsed.payload.exp && parsed.payload.exp <= now) { + throw new Error("JWT-SVID expired"); + } + if (parsed.payload.nbf && parsed.payload.nbf > now + 30) { + throw new Error("JWT-SVID not active yet"); + } + if (parsed.payload.iss !== SPIRE_ISSUER) { + throw new Error(`unexpected JWT-SVID issuer ${parsed.payload.iss}`); + } + if (!hasAudience(parsed.payload, JWT_SVID_AUDIENCE)) { + throw new Error(`JWT-SVID audience did not include ${JWT_SVID_AUDIENCE}`); + } + if (!String(parsed.payload.sub || "").startsWith(TRUST_DOMAIN_PREFIX)) { + throw new Error("JWT-SVID subject was not an OpenShell sandbox SPIFFE ID"); + } + return parsed.payload; +} + +function signAccessToken(payload) { + const header = b64urlEncode(JSON.stringify({ alg: "HS256", typ: "JWT" })); + const body = b64urlEncode(JSON.stringify(payload)); + const signingInput = `${header}.${body}`; + const signature = crypto + .createHmac("sha256", ACCESS_TOKEN_SECRET) + .update(signingInput) + .digest(); + return `${signingInput}.${b64urlEncode(signature)}`; +} + +function json(res, status, body) { + res.writeHead(status, { "content-type": "application/json" }); + res.end(JSON.stringify(body)); +} + +async function bodyText(req) { + const chunks = []; + for await (const chunk of req) { + chunks.push(chunk); + if (Buffer.concat(chunks).length > 1024 * 1024) { + throw new Error("request body too large"); + } + } + return Buffer.concat(chunks).toString("utf8"); +} + +async function handleToken(req, res) { + const params = new URLSearchParams(await bodyText(req)); + if (params.get("grant_type") !== "client_credentials") { + return json(res, 400, { error: "unsupported_grant_type" }); + } + if ( + params.get("client_assertion_type") !== + "urn:ietf:params:oauth:client-assertion-type:jwt-spiffe" + ) { + return json(res, 400, { error: "unsupported_client_assertion_type" }); + } + + const jwtSvid = params.get("client_assertion"); + if (!jwtSvid) { + return json(res, 400, { error: "missing_client_assertion" }); + } + + const resourceAudience = params.get("audience") || ""; + const requestedScopes = (params.get("scope") || "").split(/\s+/).filter(Boolean); + if (!["alpha", "beta"].includes(resourceAudience)) { + return json(res, 400, { error: "unsupported_audience", audience: resourceAudience }); + } + if (!requestedScopes.includes(resourceAudience)) { + return json(res, 403, { error: "missing_matching_scope" }); + } + + const svid = await verifyJwtSvid(jwtSvid); + const now = Math.floor(Date.now() / 1000); + const subjectHash = crypto.createHash("sha256").update(svid.sub).digest("hex").slice(0, 32); + const accessToken = signAccessToken({ + iss: ACCESS_TOKEN_ISSUER, + sub: subjectHash, + aud: [resourceAudience, "account"], + scope: `${requestedScopes.join(" ")} profile email`, + azp: svid.sub, + client_id: svid.sub, + iat: now, + exp: now + 300, + }); + + return json(res, 200, { + access_token: accessToken, + token_type: "Bearer", + expires_in: 300, + scope: `${requestedScopes.join(" ")} profile email`, + }); +} + +http + .createServer(async (req, res) => { + try { + if (req.url === "/healthz") { + res.writeHead(200, { "content-type": "text/plain" }); + return res.end("ok\n"); + } + if (req.method === "POST" && req.url === "/token") { + return await handleToken(req, res); + } + return json(res, 404, { error: "not_found" }); + } catch (error) { + console.error(error); + return json(res, 500, { error: "server_error", message: error.message }); + } + }) + .listen(PORT, "0.0.0.0", () => { + console.log(`token issuer listening on ${PORT}`); + }); diff --git a/examples/spiffe-token-grant-demo/k8s/workloads.yaml b/examples/spiffe-token-grant-demo/k8s/workloads.yaml new file mode 100644 index 000000000..efba4e49f --- /dev/null +++ b/examples/spiffe-token-grant-demo/k8s/workloads.yaml @@ -0,0 +1,199 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: token-issuer + namespace: default + labels: + app: token-issuer +spec: + replicas: 1 + selector: + matchLabels: + app: token-issuer + template: + metadata: + labels: + app: token-issuer + spec: + containers: + - name: token-issuer + image: node:22-alpine + imagePullPolicy: IfNotPresent + command: ["node", "/demo/token-issuer.js"] + ports: + - name: http + containerPort: 8080 + env: + - name: ACCESS_TOKEN_SECRET + valueFrom: + secretKeyRef: + name: openshell-spiffe-token-demo + key: access-token-secret + - name: ACCESS_TOKEN_ISSUER + value: http://token-issuer.default.svc.cluster.local + - name: SPIRE_JWKS_URI + value: https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local/keys + - name: SPIRE_ISSUER + value: https://spire-spiffe-oidc-discovery-provider.spire.svc.cluster.local + - name: JWT_SVID_AUDIENCE + value: http://token-issuer.default.svc.cluster.local + - name: TRUST_DOMAIN_PREFIX + value: spiffe://openshell.local/openshell/sandbox/ + readinessProbe: + httpGet: + path: /healthz + port: http + volumeMounts: + - name: scripts + mountPath: /demo + readOnly: true + volumes: + - name: scripts + configMap: + name: openshell-spiffe-token-demo-scripts +--- +apiVersion: v1 +kind: Service +metadata: + name: token-issuer + namespace: default +spec: + selector: + app: token-issuer + ports: + - name: http + port: 80 + targetPort: http +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: alpha + namespace: default + labels: + app: alpha +spec: + replicas: 1 + selector: + matchLabels: + app: alpha + template: + metadata: + labels: + app: alpha + spec: + containers: + - name: alpha + image: node:22-alpine + imagePullPolicy: IfNotPresent + command: ["node", "/demo/protected-service.js"] + ports: + - name: http + containerPort: 8080 + env: + - name: SERVICE_NAME + value: alpha + - name: EXPECTED_AUDIENCE + value: alpha + - name: EXPECTED_SCOPE + value: alpha + - name: ACCESS_TOKEN_SECRET + valueFrom: + secretKeyRef: + name: openshell-spiffe-token-demo + key: access-token-secret + - name: ACCESS_TOKEN_ISSUER + value: http://token-issuer.default.svc.cluster.local + readinessProbe: + httpGet: + path: /healthz + port: http + volumeMounts: + - name: scripts + mountPath: /demo + readOnly: true + volumes: + - name: scripts + configMap: + name: openshell-spiffe-token-demo-scripts +--- +apiVersion: v1 +kind: Service +metadata: + name: alpha + namespace: default +spec: + selector: + app: alpha + ports: + - name: http + port: 80 + targetPort: http +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: beta + namespace: default + labels: + app: beta +spec: + replicas: 1 + selector: + matchLabels: + app: beta + template: + metadata: + labels: + app: beta + spec: + containers: + - name: beta + image: node:22-alpine + imagePullPolicy: IfNotPresent + command: ["node", "/demo/protected-service.js"] + ports: + - name: http + containerPort: 8080 + env: + - name: SERVICE_NAME + value: beta + - name: EXPECTED_AUDIENCE + value: beta + - name: EXPECTED_SCOPE + value: beta + - name: ACCESS_TOKEN_SECRET + valueFrom: + secretKeyRef: + name: openshell-spiffe-token-demo + key: access-token-secret + - name: ACCESS_TOKEN_ISSUER + value: http://token-issuer.default.svc.cluster.local + readinessProbe: + httpGet: + path: /healthz + port: http + volumeMounts: + - name: scripts + mountPath: /demo + readOnly: true + volumes: + - name: scripts + configMap: + name: openshell-spiffe-token-demo-scripts +--- +apiVersion: v1 +kind: Service +metadata: + name: beta + namespace: default +spec: + selector: + app: beta + ports: + - name: http + port: 80 + targetPort: http diff --git a/examples/spiffe-token-grant-demo/provider-profile.yaml b/examples/spiffe-token-grant-demo/provider-profile.yaml new file mode 100644 index 000000000..caadfe2b3 --- /dev/null +++ b/examples/spiffe-token-grant-demo/provider-profile.yaml @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: spiffe-token-demo +display_name: SPIFFE token grant demo +description: Dynamic token grant for alpha/beta demo services using SPIFFE JWT-SVID authentication +category: other +credentials: + - name: access_token + description: Access token obtained via SPIFFE JWT client assertion grant + required: false + auth_style: bearer + header_name: Authorization + token_grant: + token_endpoint: http://token-issuer.default.svc.cluster.local/token + audience: demo-default + jwt_svid_audience: http://token-issuer.default.svc.cluster.local + client_assertion_type: urn:ietf:params:oauth:client-assertion-type:jwt-spiffe + scopes: [demo] + cache_ttl_seconds: 60 + audience_overrides: + - host: alpha.default.svc.cluster.local + port: 80 + audience: alpha + scopes: [alpha] + - host: beta.default.svc.cluster.local + port: 80 + audience: beta + scopes: [beta] +endpoints: + - host: alpha.default.svc.cluster.local + port: 80 + protocol: rest + tls: none + access: read-write + enforcement: enforce + allowed_ips: + - 10.43.0.0/16 + - host: beta.default.svc.cluster.local + port: 80 + protocol: rest + tls: none + access: read-write + enforcement: enforce + allowed_ips: + - 10.43.0.0/16 +binaries: + - /usr/bin/curl + - /usr/local/bin/curl diff --git a/mise.lock b/mise.lock index 4413fb67d..9c188e0f3 100644 --- a/mise.lock +++ b/mise.lock @@ -216,6 +216,7 @@ backend = "aqua:GoogleContainerTools/skaffold" url = "https://storage.googleapis.com/skaffold/releases/v2.20.0/skaffold-linux-arm64" [tools.skaffold."platforms.linux-x64"] +checksum = "blake3:4de6b14984ff1c7e5f107dd12d15890feb4b6600032d61158162c243a81d9156" url = "https://storage.googleapis.com/skaffold/releases/v2.20.0/skaffold-linux-amd64" [tools.skaffold."platforms.macos-arm64"] diff --git a/proto/openshell.proto b/proto/openshell.proto index c2755aaf7..63dcee211 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -892,6 +892,52 @@ message ProviderProfileDiagnostic { string severity = 5; } +// Endpoint selector for token grant audience overrides. +message ProviderCredentialTokenGrantAudienceOverride { + // Optional: endpoint host selector. If omitted, inherits the profile endpoint host. + string host = 1; + + // Optional: endpoint port selector. If omitted, matches the expanded profile endpoint port. + uint32 port = 2; + + // Optional: endpoint path selector. If omitted, inherits the profile endpoint path. + string path = 3; + + // Resource audience to request for matching endpoints. + string audience = 4; + + // Optional: OAuth2 scopes to request. If omitted, inherits the token grant scopes. + repeated string scopes = 5; +} + +// Provider credential token grant configuration. +// When present, the credential is obtained dynamically via OAuth2 grant when needed. +message ProviderCredentialTokenGrant { + // OAuth2 token endpoint URL (e.g., https://keycloak.example.com/realms/my-realm/protocol/openid-connect/token) + string token_endpoint = 1; + + // Optional: default resource audience to request from the token service + string audience = 2; + + // Optional: audience to request when fetching the JWT-SVID from SPIRE. + // If omitted, the sandbox derives this from token_endpoint. + string jwt_svid_audience = 6; + + // Optional: OAuth2 scopes to request + repeated string scopes = 3; + + // Optional: override token cache TTL (seconds) + // If 0 or omitted, use expires_in from token response + int64 cache_ttl_seconds = 4; + + // Optional: endpoint-specific resource audience overrides. + repeated ProviderCredentialTokenGrantAudienceOverride audience_overrides = 5; + + // Optional: OAuth2 client_assertion_type value. If omitted, OpenShell uses + // urn:ietf:params:oauth:client-assertion-type:jwt-bearer. + string client_assertion_type = 7; +} + // Provider credential declaration. message ProviderProfileCredential { string name = 1; @@ -903,6 +949,7 @@ message ProviderProfileCredential { string query_param = 7; ProviderCredentialRefresh refresh = 8; string path_template = 9; + ProviderCredentialTokenGrant token_grant = 10; } enum ProviderCredentialRefreshStrategy { @@ -1100,6 +1147,10 @@ message GetSandboxProviderEnvironmentResponse { uint64 provider_env_revision = 2; // Expiration timestamps for returned environment variables. map credential_expires_at_ms = 3; + // Dynamic credentials that require token grants or other runtime injection. + // Maps endpoint-bound provider metadata to credential metadata. + // Supervisor uses this to inject Authorization headers for token grant credentials. + map dynamic_credentials = 4; } // ---------------------------------------------------------------------------