Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,77 @@ jobs:

- run: bun run test

e2e:
name: E2E (${{ matrix.target }}${{ matrix['shard-name'] && format(' {0}', matrix['shard-name']) || '' }})
strategy:
fail-fast: false
matrix:
include:
# Cloud is SHARDED: each shard boots its own fresh dev stack. The
# cloud dev server degrades after a few minutes of sustained suite
# load on 2-core runners (the SSE/OTel memory growth being
# instrumented on main) — requests start failing partway through and
# everything after dies with connection errors. Short shards on
# fresh boots stay under that threshold; re-merge into fewer jobs
# once the degradation is fixed.
- { target: cloud, shard: 1/8, shard-name: 1of8 }
- { target: cloud, shard: 2/8, shard-name: 2of8 }
- { target: cloud, shard: 3/8, shard-name: 3of8 }
- { target: cloud, shard: 4/8, shard-name: 4of8 }
- { target: cloud, shard: 5/8, shard-name: 5of8 }
- { target: cloud, shard: 6/8, shard-name: 6of8 }
- { target: cloud, shard: 7/8, shard-name: 7of8 }
- { target: cloud, shard: 8/8, shard-name: 8of8 }
- target: selfhost
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v4

- uses: oven-sh/setup-bun@v2
with:
bun-version: 1.3.11

# The dev stacks spawn Node sidecars (vite/workerd tooling); pin the
# same known-good runtime the unit-test job uses.
- uses: actions/setup-node@v4
with:
node-version: 22

- run: bun install --frozen-lockfile

# Install from e2e so bunx resolves ITS pinned playwright (the version
# the tests run against) rather than floating to the latest.
- name: Install Playwright Chromium
run: bunx playwright install --with-deps chromium chromium-headless-shell
working-directory: e2e

# The globalsetup boots the target's own dev server (ports are claimed
# per checkout, so this is hermetic) and tears it down after the run.
# --retry=2: browser scenarios time out sporadically on 2-core runners
# (single-test waitFor timeouts, not systemic failures); a retry on the
# same booted stack clears them.
- name: Run ${{ matrix.target }} scenarios
run: bunx vitest run --project ${{ matrix.target }} --retry=2 ${{ matrix.shard && format('--shard={0}', matrix.shard) || '' }}
working-directory: e2e

# Failed runs keep their trace.zip / session.mp4 / step screenshots in
# runs/<target>/<slug>/ — surface them instead of a bare red X.
- name: Upload run artifacts
if: failure()
uses: actions/upload-artifact@v4
with:
name: e2e-runs-${{ matrix.target }}${{ matrix['shard-name'] && format('-{0}', matrix['shard-name']) || '' }}
path: e2e/runs/
retention-days: 7

e2e-local:
name: E2E (stdio MCP)
# Skipped on pull_request: the local scenario boots a real `executor web`
# plus a browser and is currently flaky on PRs. Still runs on push to main.
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4

Expand Down
13 changes: 11 additions & 2 deletions apps/cloud/executor.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,23 @@ interface CloudPluginDeps {
* falls back to the credential-driven default. */
readonly workosVaultClient?: WorkOSVaultClient;
readonly activeToolkitSlug?: string;
/** Mirrors `HostConfig.allowLocalNetwork` (`ALLOW_LOCAL_NETWORK`): lets
* `microsoft.addGraph` point at a loopback emulator instead of the pinned
* Microsoft Graph URLs. Off by default; production leaves it unset. */
readonly allowLocalNetwork?: boolean;
}

export default defineExecutorConfig({
plugins: ({ workosCredentials, workosVaultClient, activeToolkitSlug }: CloudPluginDeps = {}) =>
plugins: ({
workosCredentials,
workosVaultClient,
activeToolkitSlug,
allowLocalNetwork,
}: CloudPluginDeps = {}) =>
[
openApiHttpPlugin(),
googleHttpPlugin(),
microsoftHttpPlugin(),
microsoftHttpPlugin({ allowUnsafeUrlOverrides: allowLocalNetwork === true }),
mcpHttpPlugin({
dangerouslyAllowStdioMCP: false,
}),
Expand Down
1 change: 1 addition & 0 deletions apps/cloud/src/engine/execution-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ export const CloudPluginsProvider: Layer.Layer<PluginsProvider> = Layer.succeed(
},
activeToolkitSlug:
context?.mcpResource?.kind === "toolkit" ? context.mcpResource.slug : undefined,
allowLocalNetwork: env.ALLOW_LOCAL_NETWORK === "true",
}),
});

Expand Down
56 changes: 50 additions & 6 deletions apps/cloud/src/mcp/agent-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,29 @@ const propsForPrincipal = (
});

export const makeCloudMcpAgentHandler = () => {
const serve = McpSessionDOSqlite.serve("/mcp", {
binding: "MCP_SESSION",
transport: "streamable-http",
});
const serveOptions = { binding: "MCP_SESSION", transport: "streamable-http" } as const;
// The agents SDK builds an exact-match `URLPattern` from the path handed to
// `serve` (see `createStreamingHttpHandler` in `agents/dist/mcp/index.js`) —
// a single `/mcp` handler never matches `/mcp/toolkits/<slug>` and falls
// through to its own internal 404. A second `serve` mounted on the
// parameterized path picks it up (`URLPattern` supports `:slug` segments);
// the auth/ownership/props logic above is unchanged and shared, only the
// final dispatch target differs.
const serve = McpSessionDOSqlite.serve("/mcp", serveOptions);
const serveToolkit = McpSessionDOSqlite.serve("/mcp/toolkits/:slug", serveOptions);

const ALLOWED_METHODS = new Set(["GET", "POST", "DELETE", "OPTIONS"]);

return async (request: Request, env: Env, ctx: ExecutionContext): Promise<Response> => {
if (request.method === "OPTIONS") return corsPreflightResponse();
// The old envelope (packages/hosts/mcp/src/envelope.ts) answered anything
// outside GET/POST/DELETE/OPTIONS with a JSON-RPC 405; the agents SDK
// handler only understands its own transport verbs and falls through to
// a bare 404. Reject before authenticating so PUT/PATCH/etc never reach
// the session engine.
if (!ALLOWED_METHODS.has(request.method)) {
return jsonRpcResponse(405, -32001, "Method not allowed");
}
const sessionId = request.headers.get("mcp-session-id");

const { auth, outcome } = await Effect.runPromise(authenticate(request));
Expand All @@ -132,7 +148,10 @@ export const makeCloudMcpAgentHandler = () => {
}

if (!sessionId && request.method === "DELETE") {
return new Response(null, { status: 204, headers: { "access-control-allow-origin": "*" } });
// Matches the old envelope's contract (@modelcontextprotocol/sdk's
// `WebStandardStreamableHTTPServerTransport.handleDeleteRequest`): 200,
// not 204 — see e2e/cloud/mcp-protocol.test.ts.
return new Response(null, { status: 200, headers: { "access-control-allow-origin": "*" } });
}

if (sessionId) {
Expand All @@ -159,7 +178,32 @@ export const makeCloudMcpAgentHandler = () => {
},
resource,
);
const response = await serve.fetch(forwarded, env, ctx);
const target = resource.kind === "toolkit" ? serveToolkit : serve;
let response: Response;
// oxlint-disable-next-line executor/no-try-catch-or-throw -- adapter boundary: the agents SDK aborts the isolate (throws) instead of returning a response for a condemned session
try {
response = await target.fetch(forwarded, env, ctx);
} catch (error) {
// `_cf_scheduleDestroy` (called above via DELETE) marks the DO
// condemned and schedules its alarm; the alarm's `destroy()` then
// `ctx.abort("destroyed")`s the isolate. A request that lands after the
// alarm has already fired — same DO, same tick budget as the DELETE in
// tests — throws that abort reason out of `serve.fetch` instead of the
// DO ever getting to answer. Map it to the old envelope's reconnect
// error for a dead session (e2e/cloud/mcp-protocol.test.ts expects the
// client to be told to reconnect, matching a timed-out session).
// oxlint-disable-next-line executor/no-unknown-error-message -- adapter boundary: the abort reason is a plain runtime Error whose message IS the signal
if (Predicate.isError(error) && error.message === "destroyed") {
return jsonRpcResponse(404, -32001, "Session timed out, please reconnect");
}
// oxlint-disable-next-line executor/no-try-catch-or-throw -- adapter boundary: rethrow anything that isn't the condemned-DO abort to the Workers runtime unchanged
throw error;
}
// The agents SDK answers a bare DELETE with 204; the old envelope's
// contract (see above) was 200 — rewrite for consistency.
if (request.method === "DELETE" && response.status === 204) {
return new Response(null, { status: 200, headers: response.headers });
}
return wrapMcpSseResponse(request, env, response);
};
};
Comment thread
greptile-apps[bot] marked this conversation as resolved.
4 changes: 0 additions & 4 deletions apps/cloud/src/routeTree.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -411,15 +411,11 @@ export const routeTree = rootRouteImport
._addFileTypes<FileRouteTypes>()

import type { getRouter } from './router.tsx'

import type { startInstance } from './start.ts'

declare module '@tanstack/react-start' {
interface Register {
ssr: true

router: Awaited<ReturnType<typeof getRouter>>

config: Awaited<ReturnType<typeof startInstance.getOptions>>
}
}
14 changes: 10 additions & 4 deletions apps/cloud/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,14 @@ const cloudflareHandler: ExportedHandler<Env> = {
// its own tracing for the same reason).
const browserTraces = browserTracesResponse(request, env);
if (browserTraces) return browserTraces;
if (!installTracerProvider()) {
return fetchHandler(request, env, ctx);
}
// The MCP dispatch is classified up front, independent of whether
// telemetry installs — an unset `AXIOM_TOKEN` (tracer not installed) must
// never take /mcp requests down with it. See `installTracerProvider`'s
// early return below: it only governs the tracing envelope for
// non-MCP paths.
const url = new URL(request.url);
const mcpRoute = classifyMcpPath(url.pathname);
const tracingInstalled = installTracerProvider();
if (mcpRoute?.kind === "mcp") {
// The Cloudflare Agents MCP bridge needs the platform ExecutionContext
// to pass authenticated session props into the hibernatable DO.
Expand All @@ -110,9 +113,12 @@ const cloudflareHandler: ExportedHandler<Env> = {
try {
return await mcpAgentHandler(prepareMcpOrgScope(request), env, ctx);
} finally {
ctx.waitUntil(flushTracerProvider());
if (tracingInstalled) ctx.waitUntil(flushTracerProvider());
}
}
if (!tracingInstalled) {
return fetchHandler(request, env, ctx);
}
// Effect-served paths bring their own http.server span (with traceparent
// join) — opening one here too would duplicate it. See the header note.
if (isAppOwnedPath(url.pathname)) {
Expand Down
1 change: 1 addition & 0 deletions apps/host-cloudflare/src/execution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export const makeCloudflarePluginsProvider = (
makeCloudflarePlugins(config.secretKey, {
activeToolkitSlug:
context?.mcpResource?.kind === "toolkit" ? context.mcpResource.slug : undefined,
allowLocalNetwork: config.allowLocalNetwork,
}),
});

Expand Down
4 changes: 2 additions & 2 deletions apps/host-cloudflare/src/plugins.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ import { toolkitsPlugin } from "@executor-js/plugin-toolkits/server";

export const makeCloudflarePlugins = (
secretKey: string,
options: { readonly activeToolkitSlug?: string } = {},
options: { readonly activeToolkitSlug?: string; readonly allowLocalNetwork?: boolean } = {},
) =>
[
openApiHttpPlugin(),
googleHttpPlugin(),
microsoftHttpPlugin(),
microsoftHttpPlugin({ allowUnsafeUrlOverrides: options.allowLocalNetwork === true }),
mcpHttpPlugin({ dangerouslyAllowStdioMCP: false }),
graphqlHttpPlugin(),
toolkitsPlugin({ activeToolkitSlug: options.activeToolkitSlug }),
Expand Down
12 changes: 10 additions & 2 deletions apps/host-selfhost/executor.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,20 @@ import { resolveSecretKey } from "./src/config";
// (slice 4) is added here as the first writable secret provider.
// ---------------------------------------------------------------------------

interface SelfHostPluginDeps {
readonly activeToolkitSlug?: string;
/** Mirrors `HostConfig.allowLocalNetwork` (EXECUTOR_ALLOW_LOCAL_NETWORK):
* lets `microsoft.addGraph` point at a loopback emulator instead of the
* pinned Microsoft Graph URLs. Off by default. */
readonly allowLocalNetwork?: boolean;
}

export default defineExecutorConfig({
plugins: ({ activeToolkitSlug }: { readonly activeToolkitSlug?: string } = {}) =>
plugins: ({ activeToolkitSlug, allowLocalNetwork }: SelfHostPluginDeps = {}) =>
[
openApiHttpPlugin(),
googleHttpPlugin(),
microsoftHttpPlugin(),
microsoftHttpPlugin({ allowUnsafeUrlOverrides: allowLocalNetwork === true }),
mcpHttpPlugin({ dangerouslyAllowStdioMCP: false }),
graphqlHttpPlugin(),
toolkitsPlugin({ activeToolkitSlug }),
Expand Down
4 changes: 4 additions & 0 deletions apps/host-selfhost/src/execution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ export const SelfHostPluginsProvider: Layer.Layer<PluginsProvider> = Layer.succe
executorConfig.plugins({
activeToolkitSlug:
context?.mcpResource?.kind === "toolkit" ? context.mcpResource.slug : undefined,
// Read the env directly (same computation as loadConfig().allowLocalNetwork):
// plugins() runs per request, and loadConfig does filesystem work
// (data dir, secret key) that must not ride the request path.
allowLocalNetwork: process.env.EXECUTOR_ALLOW_LOCAL_NETWORK === "true",
}),
},
);
Expand Down
38 changes: 36 additions & 2 deletions apps/host-selfhost/src/mcp/auth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
} from "@executor-js/host-mcp";

import { BetterAuth } from "../auth/better-auth";
import { MCP_ORIGINAL_PATH_HEADER, mcpResourcePathFromOriginalPath } from "./org-path";

// ---------------------------------------------------------------------------
// Self-host McpAuthProvider adapter, backed by Better Auth's mcp() plugin.
Expand All @@ -26,7 +27,14 @@ import { BetterAuth } from "../auth/better-auth";
//
// 2. `resourceMetadataUrl(request)` — the absolute `resource_metadata` URL the
// 401 challenge points at: the bare origin-root protected-resource doc
// (`<origin>/.well-known/oauth-protected-resource`).
// (`<origin>/.well-known/oauth-protected-resource`) UNLESS the request came
// in org-scoped (`/<org>/mcp…`), in which case both this and the PRM
// document's `resource` field must echo the org-scoped form back — the MCP
// SDK client enforces that the advertised `resource` is a same-origin
// path-prefix of the URL it actually dialed (RFC 9728). The strip
// middleware (../serve.ts, ../../vite.config.ts) rewrites org-scoped
// requests to the bare route before they reach here, so the org prefix is
// recovered from MCP_ORIGINAL_PATH_HEADER, not the live request path.
//
// 3. `authenticate(request)` resolving an MCP principal as a typed AuthOutcome,
// trying two credential shapes in order:
Expand Down Expand Up @@ -68,15 +76,37 @@ const userRole = (user: object): string | null => {
const hasBearer = (request: Request): boolean =>
(request.headers.get("authorization") ?? "").startsWith("Bearer ");

/**
* The org-scoped pathname the client actually dialed, recovered from the strip
* middleware's header (see ./org-path.ts). `null` for a request that was never
* org-scoped (already-bare `/mcp…`), OR whose header value isn't one the
* middleware would itself have set — never trust an arbitrary client-supplied
* string here, even though the middleware already strips a spoofed header at
* its own boundary; this is a second, cheap check against reflecting garbage
* into a security-relevant URL.
*/
const originalOrgScopedPathFor = (request: Request): string | null => {
const header = request.headers.get(MCP_ORIGINAL_PATH_HEADER);
return header ? mcpResourcePathFromOriginalPath(header) : null;
};

/** The pathname to derive the toolkit slug / resource path from: the
* org-scoped original when the client dialed org-scoped, else the request's
* own (already-bare) path. */
const effectivePathnameFor = (request: Request): string =>
originalOrgScopedPathFor(request) ?? new URL(request.url).pathname;

const toolkitSlugFromRequest = (request: Request): string | null => {
const pathname = new URL(request.url).pathname;
const pathname = effectivePathnameFor(request);
const index = pathname.indexOf(TOOLKIT_MCP_SEGMENT);
if (index < 0) return null;
const slug = pathname.slice(index + TOOLKIT_MCP_SEGMENT.length).split("/", 1)[0];
return slug && slug.length > 0 ? slug : null;
};

const mcpResourcePathFor = (request: Request): string => {
const orgScoped = originalOrgScopedPathFor(request);
if (orgScoped) return orgScoped;
const toolkitSlug = toolkitSlugFromRequest(request);
return toolkitSlug ? `/mcp/toolkits/${toolkitSlug}` : "/mcp";
};
Expand All @@ -85,9 +115,13 @@ const mcpResourcePathFor = (request: Request): string => {
* Absolute protected-resource metadata URL for the 401 challenge. Derive the
* origin from `baseURL` when set; otherwise from the live request so the URL is
* never relative (cloud-drop-in: a self-host behind any host resolves right).
* When the client dialed org-scoped, echo the org-scoped PRM path back (see
* `mcpResourcePathFor`) so the MCP SDK's same-origin resource check passes.
*/
const resourceMetadataUrlFor = (baseURL: string | undefined, request: Request): string => {
const origin = baseURL && baseURL.length > 0 ? baseURL : new URL(request.url).origin;
const orgScoped = originalOrgScopedPathFor(request);
if (orgScoped) return `${origin}${PROTECTED_RESOURCE_METADATA_PATH}${orgScoped}`;
const toolkitSlug = toolkitSlugFromRequest(request);
return toolkitSlug
? `${origin}${PROTECTED_RESOURCE_METADATA_PATH}/mcp/toolkits/${toolkitSlug}`
Expand Down
Loading
Loading