From 0eb1438aac948f573e4ae65ad09821cb6c01f81a Mon Sep 17 00:00:00 2001 From: Rhys Sullivan <39114868+RhysSullivan@users.noreply.github.com> Date: Thu, 2 Jul 2026 14:38:29 -0700 Subject: [PATCH] fix(openapi): extract success responses declared with wildcard 2XX status keys Microsoft Graph's OpenAPI spec declares every success response under the wildcard status key 2XX (13k+ occurrences, no numeric 200/201 keys at all), which the extractor's /^2\d\d$/ filter skipped entirely. No response body or file hint was ever extracted for Graph operations, so drive content downloads never took the binaryResponse ToolFile path and came back as lossy text. Accept 2XX between exact codes and default. Also normalize Microsoft Graph octet-stream success responses that carry a non-binary schema (report-style endpoints declare type: object there) to a binary string during spec build, and run full-graph selections through the same path-item transform. The Cloudflare extraction baseline gains one operation whose success response is declared as 2XX. --- .../plugins/microsoft/src/sdk/graph.test.ts | 316 ++++++++++++++++++ packages/plugins/microsoft/src/sdk/graph.ts | 97 +++++- .../sdk/__snapshots__/real-specs.test.ts.snap | 2 +- .../plugins/openapi/src/sdk/extract.test.ts | 90 +++++ packages/plugins/openapi/src/sdk/extract.ts | 4 + 5 files changed, 494 insertions(+), 15 deletions(-) create mode 100644 packages/plugins/microsoft/src/sdk/graph.test.ts create mode 100644 packages/plugins/openapi/src/sdk/extract.test.ts diff --git a/packages/plugins/microsoft/src/sdk/graph.test.ts b/packages/plugins/microsoft/src/sdk/graph.test.ts new file mode 100644 index 000000000..1a316b4ac --- /dev/null +++ b/packages/plugins/microsoft/src/sdk/graph.test.ts @@ -0,0 +1,316 @@ +import { expect, it } from "@effect/vitest"; +import { Effect, Option } from "effect"; + +import { + parseEntry, + streamOperationBindingsFromStructure, + structuralSplit, +} from "@executor-js/plugin-openapi"; + +import { microsoftGraphKeepPathItem } from "./graph"; + +// Mirrors the verbatim shape of the real Microsoft Graph v1.0 spec: every +// success response uses the OpenAPI wildcard status key `2XX` (the real spec +// has zero numeric 200/201 keys), drive content GET is already declared as a +// binary octet-stream, PUT declares a binary octet-stream requestBody, error +// responses are $refs, and path-level shared parameters carry +// `x-ms-docs-key-type`. +const driveContentFixture = ` +openapi: 3.0.4 +info: + title: Microsoft Graph Fixture + version: v1.0 +servers: + - url: https://graph.microsoft.com/v1.0 +paths: + /drives/{drive-id}/items/{driveItem-id}/content: + get: + tags: + - drives.driveItem + summary: Get content for the navigation property items from drives + operationId: drives.GetItemsContent + parameters: + - name: $format + in: query + description: Format of the content + style: form + explode: false + schema: + type: string + responses: + 2XX: + description: Retrieved media content + content: + application/octet-stream: + schema: + type: string + format: binary + 4XX: + $ref: '#/components/responses/error' + 5XX: + $ref: '#/components/responses/error' + put: + tags: + - drives.driveItem + summary: Update content for the navigation property items in drives + operationId: drives.UpdateItemsContent + requestBody: + description: New media content. + content: + application/octet-stream: + schema: + type: string + format: binary + required: true + responses: + 2XX: + description: Success + content: + application/json: + schema: + $ref: '#/components/schemas/microsoft.graph.driveItem' + 4XX: + $ref: '#/components/responses/error' + 5XX: + $ref: '#/components/responses/error' + delete: + tags: + - drives.driveItem + summary: Delete content for the navigation property items in drives + operationId: drives.DeleteItemsContent + responses: + '204': + description: Success + 4XX: + $ref: '#/components/responses/error' + 5XX: + $ref: '#/components/responses/error' + parameters: + - name: drive-id + in: path + description: The unique identifier of drive + required: true + schema: + type: string + x-ms-docs-key-type: drive + - name: driveItem-id + in: path + description: The unique identifier of driveItem + required: true + schema: + type: string + x-ms-docs-key-type: driveItem +components: + schemas: + microsoft.graph.driveItem: + type: object + properties: + id: + type: string + microsoft.graph.ODataErrors.ODataError: + type: object + properties: + error: + type: object + responses: + error: + description: error + content: + application/json: + schema: + $ref: '#/components/schemas/microsoft.graph.ODataErrors.ODataError' +`; + +// Report-style Graph endpoints declare an octet-stream success response with an +// object schema (a `value` wrapper) instead of a binary string. +const reportContentFixture = ` +openapi: 3.0.4 +info: + title: Microsoft Graph Fixture + version: v1.0 +servers: + - url: https://graph.microsoft.com/v1.0 +paths: + /reports/getEmailActivityCounts(period={period}): + get: + tags: + - reports.Functions + summary: Invoke function getEmailActivityCounts + operationId: reports.getEmailActivityCounts + parameters: + - name: period + in: path + required: true + schema: + type: string + responses: + 2XX: + description: Success + content: + application/octet-stream: + schema: + type: object + properties: + value: + type: string + format: base64url + 4XX: + $ref: '#/components/responses/error' + 5XX: + $ref: '#/components/responses/error' +components: + schemas: + microsoft.graph.ODataErrors.ODataError: + type: object + properties: + error: + type: object + responses: + error: + description: error + content: + application/json: + schema: + $ref: '#/components/schemas/microsoft.graph.ODataErrors.ODataError' +`; + +const fullGraphSelection = { + coversFullGraph: true, + presetIds: [], + customScopes: [], + exactPaths: [], + pathPrefixes: [], + tagPrefixes: [], +} as const; + +const keptPathItem = (fixture: string): Record => { + const structure = structuralSplit(fixture); + expect(structure).not.toBeNull(); + const entry = parseEntry(structure!.text, structure!.pathItems[0]!, 2); + expect(entry).not.toBeNull(); + const [path, rawPathItem] = entry!; + const pathItem = microsoftGraphKeepPathItem(fullGraphSelection)( + path, + rawPathItem as Record, + ); + expect(pathItem).not.toBeNull(); + return pathItem as Record; +}; + +type StreamedBinding = { + readonly binding: { + readonly method: string; + readonly pathTemplate: string; + readonly responseBody: Option.Option<{ + readonly fileHint: Option.Option<{ + readonly kind: "binaryResponse" | "byteField"; + }>; + }>; + }; +}; + +const streamBindings = (fixture: string) => + Effect.gen(function* () { + const structure = structuralSplit(fixture); + expect(structure).not.toBeNull(); + const chunks: StreamedBinding[] = []; + yield* streamOperationBindingsFromStructure( + structure!, + { chunkSize: 10, keepPathItem: microsoftGraphKeepPathItem(fullGraphSelection) }, + (chunk) => + Effect.sync(() => { + chunks.push(...chunk); + }), + ); + return chunks; + }); + +const responseFileHintKind = ( + chunks: readonly StreamedBinding[], + method: string, + pathTemplate: string, +): string | undefined => { + const match = chunks.find( + (chunk) => chunk.binding.method === method && chunk.binding.pathTemplate === pathTemplate, + ); + expect(match).toBeDefined(); + const hint = Option.flatMap(match!.binding.responseBody, (body) => body.fileHint); + return Option.getOrUndefined(hint)?.kind; +}; + +it("keeps already-binary drive content responses untouched", () => { + const pathItem = keptPathItem(driveContentFixture); + + const get = pathItem.get as Record; + const getResponses = get.responses as Record; + expect(getResponses["2XX"]).toEqual({ + description: "Retrieved media content", + content: { + "application/octet-stream": { + schema: { type: "string", format: "binary" }, + }, + }, + }); + expect(getResponses["4XX"]).toEqual({ $ref: "#/components/responses/error" }); + expect(getResponses["5XX"]).toEqual({ $ref: "#/components/responses/error" }); + + // The real spec already declares the PUT requestBody as binary; the + // normalization must not touch request bodies. + const put = pathItem.put as Record; + expect(put.requestBody).toEqual({ + description: "New media content.", + content: { + "application/octet-stream": { + schema: { type: "string", format: "binary" }, + }, + }, + required: true, + }); + const putResponses = put.responses as Record; + expect(putResponses["2XX"]).toMatchObject({ + content: { + "application/json": { + schema: { $ref: "#/components/schemas/microsoft.graph.driveItem" }, + }, + }, + }); + + // Path-level shared parameters survive the filter. + expect(pathItem.parameters).toMatchObject([ + { name: "drive-id", "x-ms-docs-key-type": "drive" }, + { name: "driveItem-id", "x-ms-docs-key-type": "driveItem" }, + ]); +}); + +it("normalizes report-style octet-stream object schemas to binary strings", () => { + const pathItem = keptPathItem(reportContentFixture); + + const get = pathItem.get as Record; + const responses = get.responses as Record; + const success = responses["2XX"] as Record; + expect(success.description).toBe("Success"); + expect(success.content).toEqual({ + "application/octet-stream": { + schema: { type: "string", format: "binary" }, + }, + }); + expect(responses["4XX"]).toEqual({ $ref: "#/components/responses/error" }); +}); + +it.effect("streams drive content download bindings with a binaryResponse file hint", () => + Effect.gen(function* () { + const chunks = yield* streamBindings(driveContentFixture); + expect( + responseFileHintKind(chunks, "get", "/drives/{drive-id}/items/{driveItem-id}/content"), + ).toBe("binaryResponse"); + }), +); + +it.effect("streams report-style download bindings with a binaryResponse file hint", () => + Effect.gen(function* () { + const chunks = yield* streamBindings(reportContentFixture); + expect( + responseFileHintKind(chunks, "get", "/reports/getEmailActivityCounts(period={period})"), + ).toBe("binaryResponse"); + }), +); diff --git a/packages/plugins/microsoft/src/sdk/graph.ts b/packages/plugins/microsoft/src/sdk/graph.ts index fa957eae9..dc1949b35 100644 --- a/packages/plugins/microsoft/src/sdk/graph.ts +++ b/packages/plugins/microsoft/src/sdk/graph.ts @@ -537,6 +537,70 @@ const filterPathItem = ( return kept; }; +const normalizedMediaType = (mediaType: string): string => + mediaType.split(";")[0]?.trim().toLowerCase() ?? ""; + +const isBinaryStringSchema = (schema: unknown): boolean => + isRecord(schema) && + (schema.type === "string" || (Array.isArray(schema.type) && schema.type.includes("string"))) && + (schema.format === "binary" || schema.format === "byte"); + +// Graph declares success responses with the OpenAPI wildcard status key "2XX", +// never numeric codes like "200" (only "204" appears numerically in the spec). +const isSuccessStatusKey = (status: string): boolean => + /^2\d\d$/.test(status) || /^2xx$/i.test(status); + +// Rewrite any success response whose `application/octet-stream` media carries a +// non-binary schema (report-style Graph endpoints declare `type: object` with a +// `value` property there) to a plain binary string, so the OpenAPI extractor +// emits a `binaryResponse` file hint. Already-binary media and all other media +// types and response fields are left untouched. +const normalizeMicrosoftGraphContentPathItem = ( + pathItem: Record, +): Record => { + let changed = false; + const next: Record = { ...pathItem }; + + for (const [key, operation] of Object.entries(pathItem)) { + if (!HTTP_METHODS.has(key.toLowerCase()) || !isRecord(operation)) continue; + const responses = isRecord(operation.responses) ? operation.responses : undefined; + if (!responses) continue; + + let responsesChanged = false; + const nextResponses: Record = { ...responses }; + for (const [status, response] of Object.entries(responses)) { + if (!isSuccessStatusKey(status) || !isRecord(response)) continue; + const content = isRecord(response.content) ? response.content : undefined; + if (!content) continue; + + let contentChanged = false; + const nextContent: Record = { ...content }; + for (const [mediaType, media] of Object.entries(content)) { + if (normalizedMediaType(mediaType) !== "application/octet-stream") continue; + const schema = isRecord(media) ? media.schema : undefined; + if (isBinaryStringSchema(schema)) continue; + nextContent[mediaType] = { + ...(isRecord(media) ? media : {}), + schema: { type: "string", format: "binary" }, + }; + contentChanged = true; + } + + if (contentChanged) { + nextResponses[status] = { ...response, content: nextContent }; + responsesChanged = true; + } + } + + if (responsesChanged) { + next[key] = { ...operation, responses: nextResponses }; + changed = true; + } + } + + return changed ? next : pathItem; +}; + export const fetchMicrosoftGraphOpenApiSpec = Effect.fn("Microsoft.fetchGraphOpenApiSpec")( function* (specUrl: string) { const client = yield* HttpClient.HttpClient; @@ -605,11 +669,13 @@ export const fetchMicrosoftGraphPermissionsReference = Effect.fn( /** * Build the per-path-item filter that the streaming compile applies to each - * path-item as it parses the 37MB source. Returns `undefined` for a full-graph - * selection (keep everything). The selection predicate is identical to the old - * two-pass filter: the selected scopes are derived from the PRESET scopes - * (`microsoftGraphScopesForPresetIds`), not the expanded OAuth scopes, so the - * kept operation set matches regardless of caller. + * path-item as it parses the 37MB source. Full-graph selections keep every + * path-item, but still pass through this transform so octet-stream success + * responses are normalized to binary before the OpenAPI extractor runs. The + * selection predicate is identical to the old two-pass filter: the selected + * scopes are derived from the PRESET scopes (`microsoftGraphScopesForPresetIds`), + * not the expanded OAuth scopes, so the kept operation set matches regardless + * of caller. */ export const microsoftGraphKeepPathItem = (selection: { readonly coversFullGraph: boolean; @@ -618,19 +684,22 @@ export const microsoftGraphKeepPathItem = (selection: { readonly exactPaths: readonly string[]; readonly pathPrefixes: readonly string[]; readonly tagPrefixes: readonly string[]; -}): KeepPathItem | undefined => { - if (selection.coversFullGraph) return undefined; +}): KeepPathItem => { const exactPaths = new Set(selection.exactPaths); const selectedScopes = new Set( microsoftGraphScopesForPresetIds(selection.presetIds, selection.customScopes), ); - return (path, pathItem) => - filterPathItem(path, pathItem, { - exactPaths, - pathPrefixes: selection.pathPrefixes, - tagPrefixes: selection.tagPrefixes, - selectedScopes, - }); + return (path, pathItem) => { + const kept = selection.coversFullGraph + ? pathItem + : filterPathItem(path, pathItem, { + exactPaths, + pathPrefixes: selection.pathPrefixes, + tagPrefixes: selection.tagPrefixes, + selectedScopes, + }); + return kept ? normalizeMicrosoftGraphContentPathItem(kept) : null; + }; }; /** diff --git a/packages/plugins/openapi/src/sdk/__snapshots__/real-specs.test.ts.snap b/packages/plugins/openapi/src/sdk/__snapshots__/real-specs.test.ts.snap index 56ae81379..3ddd103e2 100644 --- a/packages/plugins/openapi/src/sdk/__snapshots__/real-specs.test.ts.snap +++ b/packages/plugins/openapi/src/sdk/__snapshots__/real-specs.test.ts.snap @@ -4,7 +4,7 @@ exports[`Real specs: Cloudflare API > preserves extraction baseline for represen { "inputSchemaOperationCount": 2715, "operationCount": 2726, - "outputSchemaOperationCount": 2669, + "outputSchemaOperationCount": 2670, "selectedOperations": [ { "hasInputSchema": true, diff --git a/packages/plugins/openapi/src/sdk/extract.test.ts b/packages/plugins/openapi/src/sdk/extract.test.ts new file mode 100644 index 000000000..a10cd83bc --- /dev/null +++ b/packages/plugins/openapi/src/sdk/extract.test.ts @@ -0,0 +1,90 @@ +import { describe, expect, it } from "@effect/vitest"; +import { Effect, Option } from "effect"; + +import { extract } from "./extract"; +import { parse } from "./parse"; + +describe("OpenAPI extract response bodies", () => { + it.effect("extracts success responses declared with the wildcard 2XX status key", () => + Effect.gen(function* () { + // OpenAPI allows wildcard status keys like `2XX`; Microsoft Graph + // declares every success response this way (no numeric 200/201 keys at + // all), so the extractor must treat them as success responses. + const doc = yield* parse( + JSON.stringify({ + openapi: "3.0.3", + info: { title: "Wildcard", version: "1.0.0" }, + servers: [{ url: "https://api.example.com" }], + paths: { + "/files/{id}": { + get: { + operationId: "downloadFile", + parameters: [ + { name: "id", in: "path", required: true, schema: { type: "string" } }, + ], + responses: { + "2XX": { + description: "File contents", + content: { + "application/octet-stream": { + schema: { type: "string", format: "binary" }, + }, + }, + }, + "4XX": { description: "error" }, + }, + }, + }, + }, + }), + ); + + const result = yield* extract(doc); + const operation = result.operations.find((op) => op.operationId === "downloadFile"); + expect(operation).toBeDefined(); + + const responseBody = Option.getOrUndefined(operation!.responseBody); + expect(responseBody).toBeDefined(); + expect(responseBody!.contentType).toBe("application/octet-stream"); + expect(Option.getOrUndefined(responseBody!.fileHint)?.kind).toBe("binaryResponse"); + }), + ); + + it.effect("prefers exact 2xx status codes over the 2XX wildcard", () => + Effect.gen(function* () { + const doc = yield* parse( + JSON.stringify({ + openapi: "3.0.3", + info: { title: "Wildcard", version: "1.0.0" }, + servers: [{ url: "https://api.example.com" }], + paths: { + "/things": { + get: { + operationId: "listThings", + responses: { + "2XX": { + description: "Generic success", + content: { + "text/plain": { schema: { type: "string" } }, + }, + }, + "200": { + description: "Listed", + content: { + "application/json": { schema: { type: "object" } }, + }, + }, + }, + }, + }, + }, + }), + ); + + const result = yield* extract(doc); + const operation = result.operations.find((op) => op.operationId === "listThings"); + const responseBody = Option.getOrUndefined(operation!.responseBody); + expect(responseBody?.contentType).toBe("application/json"); + }), + ); +}); diff --git a/packages/plugins/openapi/src/sdk/extract.ts b/packages/plugins/openapi/src/sdk/extract.ts index 0c4f214ac..ea3533342 100644 --- a/packages/plugins/openapi/src/sdk/extract.ts +++ b/packages/plugins/openapi/src/sdk/extract.ts @@ -233,9 +233,13 @@ const extractResponseBody = ( ): OperationResponseBody | undefined => { if (!operation.responses) return undefined; + // Success responses may use exact codes ("200"), the OpenAPI wildcard status + // key ("2XX" — Microsoft Graph declares every success response this way), or + // fall through to "default". Prefer exact codes, then the wildcard, then default. const entries = Object.entries(operation.responses); const preferred = [ ...entries.filter(([s]) => /^2\d\d$/.test(s)).sort(([a], [b]) => a.localeCompare(b)), + ...entries.filter(([s]) => /^2xx$/i.test(s)), ...entries.filter(([s]) => s === "default"), ];