diff --git a/packages/worker-utils/package.json b/packages/worker-utils/package.json index 14e4c1845e..1313df23d1 100644 --- a/packages/worker-utils/package.json +++ b/packages/worker-utils/package.json @@ -18,6 +18,7 @@ "./cloud-agent-next-client": "./src/cloud-agent-next-client.ts", "./kilo-model-id": "./src/kilo-model-id.ts", "./cloud-agent-queue-report": "./src/cloud-agent-queue-report.ts", + "./cloud-agent-failure": "./src/cloud-agent-failure.ts", "./security-auto-analysis-policy": "./src/security-auto-analysis-policy.ts", "./dependabot-dismissal-target": "./src/dependabot-dismissal-target.ts" }, diff --git a/packages/worker-utils/src/cloud-agent-failure.test.ts b/packages/worker-utils/src/cloud-agent-failure.test.ts new file mode 100644 index 0000000000..4d066f709e --- /dev/null +++ b/packages/worker-utils/src/cloud-agent-failure.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it } from 'vitest'; +import { + CLOUD_AGENT_FAILURE_CODES, + CLOUD_AGENT_FAILURE_STAGES, + CloudAgentCallbackFailureSchema, + CloudAgentSafeFailureSchema, + isWorkspaceFailureSubtype, + WORKSPACE_FAILURE_SUBTYPES, +} from './cloud-agent-failure.js'; + +describe('CloudAgentCallbackFailureSchema', () => { + it('retains failures accepted by the strict producer contract', () => { + const failure = { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + attempts: 2, + message: 'Repository clone timed out', + }; + + expect(CloudAgentCallbackFailureSchema.parse(failure)).toEqual(failure); + }); + + it.each([ + { code: 'future_failure_code', message: 'Future failure' }, + { code: 'workspace_setup_failed', subtype: 'future_workspace_failure' }, + { code: 'assistant_error', futureField: true }, + { attempts: -1 }, + { message: 'x'.repeat(4_097) }, + ])('discards unsupported or malformed structured failures: %o', failure => { + expect(CloudAgentCallbackFailureSchema.parse(failure)).toBeUndefined(); + }); +}); + +describe('CloudAgentSafeFailureSchema', () => { + it('accepts every shared contract value', () => { + for (const stage of CLOUD_AGENT_FAILURE_STAGES) { + expect(CloudAgentSafeFailureSchema.safeParse({ stage }).success).toBe(true); + } + for (const code of CLOUD_AGENT_FAILURE_CODES) { + expect(CloudAgentSafeFailureSchema.safeParse({ code }).success).toBe(true); + } + for (const subtype of WORKSPACE_FAILURE_SUBTYPES) { + expect( + CloudAgentSafeFailureSchema.safeParse({ code: 'workspace_setup_failed', subtype }).success + ).toBe(true); + expect(isWorkspaceFailureSubtype(subtype)).toBe(true); + } + }); + + it('requires workspace_setup_failed when subtype is present', () => { + expect(CloudAgentSafeFailureSchema.safeParse({ subtype: 'git_clone_timeout' }).success).toBe( + false + ); + expect( + CloudAgentSafeFailureSchema.safeParse({ + code: 'assistant_error', + subtype: 'git_clone_timeout', + }).success + ).toBe(false); + }); + + it('enforces strict optional field bounds', () => { + expect(CloudAgentSafeFailureSchema.safeParse({}).success).toBe(true); + expect(CloudAgentSafeFailureSchema.safeParse({ attempts: 0, message: 'x' }).success).toBe(true); + expect(CloudAgentSafeFailureSchema.safeParse({ attempts: -1 }).success).toBe(false); + expect(CloudAgentSafeFailureSchema.safeParse({ attempts: 1.5 }).success).toBe(false); + expect(CloudAgentSafeFailureSchema.safeParse({ message: '' }).success).toBe(false); + expect(CloudAgentSafeFailureSchema.safeParse({ message: 'x'.repeat(4_097) }).success).toBe( + false + ); + expect(CloudAgentSafeFailureSchema.safeParse({ extra: true }).success).toBe(false); + expect(isWorkspaceFailureSubtype('not_allowlisted')).toBe(false); + }); +}); diff --git a/packages/worker-utils/src/cloud-agent-failure.ts b/packages/worker-utils/src/cloud-agent-failure.ts new file mode 100644 index 0000000000..f3d05c3659 --- /dev/null +++ b/packages/worker-utils/src/cloud-agent-failure.ts @@ -0,0 +1,83 @@ +import { z } from 'zod'; + +export const CLOUD_AGENT_FAILURE_STAGES = [ + 'pre_dispatch', + 'post_dispatch_no_activity', + 'agent_activity', + 'interruption', + 'unknown', +] as const; + +export const CloudAgentFailureStageSchema = z.enum(CLOUD_AGENT_FAILURE_STAGES); +export type CloudAgentFailureStage = z.infer; + +export const CLOUD_AGENT_FAILURE_CODES = [ + 'sandbox_connect_failed', + 'workspace_setup_failed', + 'kilo_server_failed', + 'wrapper_start_failed', + 'invalid_delivery_request', + 'session_metadata_missing', + 'model_missing', + 'delivery_failure_unknown', + 'wrapper_disconnected', + 'wrapper_no_output', + 'wrapper_ping_timeout', + 'wrapper_error_before_activity', + 'assistant_error', + 'wrapper_error_after_activity', + 'missing_assistant_reply', + 'user_interrupt', + 'container_shutdown', + 'system_interrupt', + 'unclassified', +] as const; + +export const CloudAgentFailureCodeSchema = z.enum(CLOUD_AGENT_FAILURE_CODES); +export type CloudAgentFailureCode = z.infer; + +export const WORKSPACE_FAILURE_SUBTYPES = [ + 'git_clone_timeout', + 'git_checkout_timeout', + 'git_authentication_failed', + 'git_network_failed', + 'git_pack_corrupt', + 'git_checkout_conflict', + 'git_branch_missing', + 'sandbox_storage_full', + 'kilo_import_timeout', + 'kilo_import_failed', + 'setup_command_timeout', + 'setup_command_failed', + 'workspace_setup_unknown', +] as const; + +export const WorkspaceFailureSubtypeSchema = z.enum(WORKSPACE_FAILURE_SUBTYPES); +export type WorkspaceFailureSubtype = z.infer; + +export const CLOUD_AGENT_SAFE_FAILURE_MESSAGE_MAX_LENGTH = 4_096; + +export const CloudAgentSafeFailureSchema = z + .object({ + stage: CloudAgentFailureStageSchema.optional(), + code: CloudAgentFailureCodeSchema.optional(), + subtype: WorkspaceFailureSubtypeSchema.optional(), + attempts: z.number().int().nonnegative().optional(), + message: z.string().min(1).max(CLOUD_AGENT_SAFE_FAILURE_MESSAGE_MAX_LENGTH).optional(), + }) + .strict() + .refine(failure => failure.subtype === undefined || failure.code === 'workspace_setup_failed', { + message: 'Workspace failure subtype requires workspace_setup_failed failure code', + path: ['subtype'], + }); + +export type CloudAgentSafeFailure = z.infer; + +export const CloudAgentCallbackFailureSchema = z.preprocess(failure => { + const parsed = CloudAgentSafeFailureSchema.safeParse(failure); + return parsed.success ? parsed.data : undefined; +}, CloudAgentSafeFailureSchema.optional()); + +export function isWorkspaceFailureSubtype(value: unknown): value is WorkspaceFailureSubtype { + return WorkspaceFailureSubtypeSchema.safeParse(value).success; +} diff --git a/packages/worker-utils/src/cloud-agent-queue-report.ts b/packages/worker-utils/src/cloud-agent-queue-report.ts index 83310f4592..3ea708c10d 100644 --- a/packages/worker-utils/src/cloud-agent-queue-report.ts +++ b/packages/worker-utils/src/cloud-agent-queue-report.ts @@ -1,4 +1,8 @@ import { z } from 'zod'; +import { + CloudAgentFailureCodeSchema, + CloudAgentFailureStageSchema, +} from './cloud-agent-failure.js'; export const CloudAgentRunStatuses = [ 'queued', @@ -37,35 +41,6 @@ export const DIAGNOSTIC_RETENTION_MS = 30 * 24 * 60 * 60 * 1000; const IsoTimestampSchema = z.string().datetime({ offset: true }); const OperationalIdentifierSchema = z.string().min(1).max(MAX_OPERATIONAL_IDENTIFIER_LENGTH); const WrapperRunIdentifierSchema = OperationalIdentifierSchema.regex(/^wr_[A-Za-z0-9_-]+$/); -const CloudAgentFailureStageSchema = z.enum([ - 'pre_dispatch', - 'post_dispatch_no_activity', - 'agent_activity', - 'interruption', - 'unknown', -]); -const CloudAgentFailureCodeSchema = z.enum([ - 'sandbox_connect_failed', - 'workspace_setup_failed', - 'kilo_server_failed', - 'wrapper_start_failed', - 'invalid_delivery_request', - 'session_metadata_missing', - 'model_missing', - 'delivery_failure_unknown', - 'wrapper_disconnected', - 'wrapper_no_output', - 'wrapper_ping_timeout', - 'wrapper_error_before_activity', - 'assistant_error', - 'wrapper_error_after_activity', - 'missing_assistant_reply', - 'user_interrupt', - 'container_shutdown', - 'system_interrupt', - 'unclassified', -]); - const validFailureClassifications = new Set( CloudAgentRunFailureClassifications.map( classification => `${classification.failureStage}:${classification.failureCode}` diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index fa87d6b7aa..3ef0403bf1 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1459,6 +1459,9 @@ importers: typescript: specifier: 'catalog:' version: 5.9.3 + vitest: + specifier: 'catalog:' + version: 4.1.6(@opentelemetry/api@1.9.1)(@types/node@25.5.2)(@vitest/coverage-v8@4.1.6)(@vitest/ui@4.1.6)(esbuild@0.27.4)(jiti@2.7.0)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.4) wrangler: specifier: 'catalog:' version: 4.98.0(@cloudflare/workers-types@4.20260605.1)(bufferutil@4.1.0)(utf-8-validate@6.0.6) diff --git a/services/auto-triage-infra/package.json b/services/auto-triage-infra/package.json index 1635858f4a..2e7257fdd7 100644 --- a/services/auto-triage-infra/package.json +++ b/services/auto-triage-infra/package.json @@ -14,6 +14,7 @@ "@cloudflare/workers-types": "catalog:", "@typescript/native-preview": "catalog:", "typescript": "catalog:", + "vitest": "catalog:", "wrangler": "catalog:" }, "dependencies": { diff --git a/services/auto-triage-infra/src/triage-orchestrator.test.ts b/services/auto-triage-infra/src/triage-orchestrator.test.ts new file mode 100644 index 0000000000..c23becf2f3 --- /dev/null +++ b/services/auto-triage-infra/src/triage-orchestrator.test.ts @@ -0,0 +1,143 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('cloudflare:workers', () => ({ + DurableObject: class DurableObject { + protected ctx: unknown; + protected env: unknown; + + constructor(ctx: unknown, env: unknown) { + this.ctx = ctx; + this.env = env; + } + }, +})); + +import { TriageOrchestrator } from './triage-orchestrator'; +import { classificationCallbackPayloadSchema, type TriageTicket } from './types'; + +const callbackSecret = 'callback-secret'; +const cloudAgentSessionId = 'agent_triage'; + +function createTicket(): TriageTicket { + return { + ticketId: 'ticket-1', + authToken: 'auth-token', + sessionInput: { + repoFullName: 'kilocode/example', + issueNumber: 42, + issueTitle: 'Failure callback', + issueBody: null, + duplicateThreshold: 0.8, + autoFixThreshold: 0.9, + modelSlug: 'test-model', + }, + owner: { type: 'user', id: 'user-1', userId: 'user-1' }, + status: 'analyzing', + cloudAgentSessionId, + callbackSecret, + updatedAt: '2026-06-10T00:00:00.000Z', + }; +} + +function createHarness() { + let storedState = createTicket(); + const put = vi.fn(async (_key: string, value: TriageTicket) => { + storedState = structuredClone(value); + }); + const deleteAlarm = vi.fn(async () => {}); + const context = { + storage: { + get: async () => structuredClone(storedState), + put, + deleteAlarm, + }, + } as unknown as DurableObjectState; + const environment = { + API_URL: 'https://api.example.com', + INTERNAL_API_SECRET: 'internal-secret', + }; + const orchestrator = new TriageOrchestrator(context, environment as never); + + return { orchestrator, getStoredState: () => storedState, put, deleteAlarm }; +} + +describe('TriageOrchestrator classification failure callbacks', () => { + beforeEach(() => { + vi.stubGlobal( + 'fetch', + vi.fn(async () => new Response(null, { status: 200 })) + ); + }); + + it('persists the structured failure message instead of the legacy error message', async () => { + const harness = createHarness(); + + await harness.orchestrator.completeClassification(callbackSecret, { + cloudAgentSessionId, + status: 'failed', + errorMessage: 'legacy wrapper error', + failure: { + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + message: 'Repository clone timed out', + }, + }); + + expect(harness.getStoredState()).toMatchObject({ + status: 'failed', + errorMessage: 'Repository clone timed out', + }); + expect(fetch).toHaveBeenCalledWith( + 'https://api.example.com/api/internal/triage-status/ticket-1', + expect.objectContaining({ + body: JSON.stringify({ + status: 'failed', + errorMessage: 'Repository clone timed out', + }), + }) + ); + }); + + it.each([ + { failure: { code: 'future_failure_code' } }, + { failure: { subtype: 'future_workspace_failure' } }, + { failure: { extra: true } }, + { failure: { attempts: -1 } }, + { failure: { message: 'x'.repeat(4_097) } }, + ])('discards incompatible failure and retains the legacy payload: %o', extension => { + expect( + classificationCallbackPayloadSchema.parse({ + cloudAgentSessionId, + status: 'failed', + errorMessage: 'legacy wrapper error', + ...extension, + }) + ).toEqual({ + cloudAgentSessionId, + status: 'failed', + errorMessage: 'legacy wrapper error', + failure: undefined, + }); + }); + + it('persists the legacy error message when structured failure is absent', async () => { + const harness = createHarness(); + + await harness.orchestrator.completeClassification(callbackSecret, { + cloudAgentSessionId, + status: 'failed', + errorMessage: 'legacy wrapper error', + }); + + expect(harness.getStoredState()).toMatchObject({ + status: 'failed', + errorMessage: 'legacy wrapper error', + }); + expect(fetch).toHaveBeenCalledWith( + 'https://api.example.com/api/internal/triage-status/ticket-1', + expect.objectContaining({ + body: JSON.stringify({ status: 'failed', errorMessage: 'legacy wrapper error' }), + }) + ); + }); +}); diff --git a/services/auto-triage-infra/src/triage-orchestrator.ts b/services/auto-triage-infra/src/triage-orchestrator.ts index 5c6aae9d0c..f10f995195 100644 --- a/services/auto-triage-infra/src/triage-orchestrator.ts +++ b/services/auto-triage-infra/src/triage-orchestrator.ts @@ -157,6 +157,7 @@ export class TriageOrchestrator extends DurableObject { if (payload.status !== 'completed') { const errorMessage = + payload.failure?.message ?? payload.errorMessage ?? `Classification session ended with status '${payload.status}' without an error message.`; await this.updateStatus('failed', { errorMessage }); diff --git a/services/auto-triage-infra/src/types.ts b/services/auto-triage-infra/src/types.ts index d9b2f8372b..bebd56b30a 100644 --- a/services/auto-triage-infra/src/types.ts +++ b/services/auto-triage-infra/src/types.ts @@ -5,6 +5,7 @@ import { z } from 'zod'; import type { TriageOrchestrator } from './triage-orchestrator'; import type { Owner, MCPServerConfig } from '@kilocode/worker-utils'; +import { CloudAgentCallbackFailureSchema } from '@kilocode/worker-utils/cloud-agent-failure'; export type { Owner, MCPServerConfig }; @@ -149,6 +150,7 @@ export const classificationCallbackPayloadSchema = z.object({ cloudAgentSessionId: z.string(), status: z.enum(['completed', 'failed', 'interrupted']), errorMessage: z.string().optional(), + failure: CloudAgentCallbackFailureSchema, lastAssistantMessageText: z.string().optional(), }); diff --git a/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.ts b/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.ts index 80ade47f2b..f0e7ce6b43 100644 --- a/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.ts +++ b/services/cloud-agent-next/src/agent-sandbox/cloudflare/cloudflare-agent-sandbox.ts @@ -43,6 +43,7 @@ import { ExecutionError } from '../../execution/errors.js'; import { isSandboxFilesystemUnusableError, SandboxCapacityInspectionError, + WorkspaceCapacityAdmissionRejectedError, } from '../../workspace-errors.js'; const PREPARE_WORKSPACE_TIMEOUT_MS = 10 * 60 * 1000; @@ -182,23 +183,47 @@ export class CloudflareAgentSandbox implements AgentSandbox { const sandbox = await this.getSandbox({ sleepAfter: SANDBOX_SLEEP_AFTER_SECONDS }); if (this.requiresPreparedDevcontainerRuntime(request)) { - const preparedWorkspace = await withWorkspacePreparationTimeout( - this.sessionService.prepareWorkspace({ - sandbox, - sandboxId, - orgId, - userId, - sessionId: sessionId as ServiceSessionId, - kilocodeModel: plan.agent.model, - env: this.env, - metadata: plan.workspace.metadata, - onProgress: request.onProgress, - }), - 'devcontainer workspace preparation' - ); + let preparedWorkspace; + try { + preparedWorkspace = await withWorkspacePreparationTimeout( + this.sessionService.prepareWorkspace({ + sandbox, + sandboxId, + orgId, + userId, + sessionId: sessionId as ServiceSessionId, + kilocodeModel: plan.agent.model, + env: this.env, + metadata: plan.workspace.metadata, + onProgress: request.onProgress, + }), + 'devcontainer workspace preparation' + ); + } catch (error) { + if (error instanceof WorkspaceCapacityAdmissionRejectedError) throw error; + const storageFull = + error instanceof SandboxCapacityInspectionError || + isSandboxFilesystemUnusableError(error); + throw ExecutionError.workspaceSetupFailed( + storageFull ? 'Sandbox storage is full' : 'Devcontainer workspace preparation failed', + error, + { + subtype: storageFull ? 'sandbox_storage_full' : 'workspace_setup_unknown', + safeFailureMessage: storageFull + ? 'Sandbox storage is full' + : 'Devcontainer workspace preparation failed', + } + ); + } if (!preparedWorkspace.devcontainer || !preparedWorkspace.ready.devcontainer) { throw ExecutionError.workspaceSetupFailed( - 'Devcontainer workspace preparation did not resolve runtime metadata' + 'Devcontainer workspace preparation did not resolve runtime metadata', + undefined, + { + subtype: 'workspace_setup_unknown', + safeFailureMessage: + 'Devcontainer workspace preparation did not resolve runtime metadata', + } ); } let wrapper: Awaited>; diff --git a/services/cloud-agent-next/src/callbacks/delivery.test.ts b/services/cloud-agent-next/src/callbacks/delivery.test.ts index fa4e5b38bc..fb65c217d1 100644 --- a/services/cloud-agent-next/src/callbacks/delivery.test.ts +++ b/services/cloud-agent-next/src/callbacks/delivery.test.ts @@ -164,18 +164,30 @@ describe('deliverCallbackJob', () => { expect(result.type).toBe('success'); }); - it('should send correct payload to fetch', async () => { + it('forwards optional structured failure unchanged', async () => { const mockFetch = vi.fn().mockResolvedValue(new Response('', { status: 200 })); globalThis.fetch = mockFetch; const target: CallbackTarget = { url: 'https://example.com/callback' }; + const payload: ExecutionCallbackPayload = { + ...mockPayload, + status: 'failed', + errorMessage: 'legacy error', + failure: { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + attempts: 2, + message: 'Repository clone timed out', + }, + }; - await deliverCallbackJob(target, mockPayload, 1); + await deliverCallbackJob(target, payload, 1); expect(mockFetch).toHaveBeenCalledWith( 'https://example.com/callback', expect.objectContaining({ method: 'POST', - body: JSON.stringify(mockPayload), + body: JSON.stringify(payload), }) ); }); diff --git a/services/cloud-agent-next/src/callbacks/queue-payload.test.ts b/services/cloud-agent-next/src/callbacks/queue-payload.test.ts index 37aba1d149..fec53187d1 100644 --- a/services/cloud-agent-next/src/callbacks/queue-payload.test.ts +++ b/services/cloud-agent-next/src/callbacks/queue-payload.test.ts @@ -113,12 +113,19 @@ describe('fitCallbackJobToQueueLimit', () => { expect(result.serializedByteLength).toBeLessThanOrEqual(CALLBACK_QUEUE_MAX_SERIALIZED_BYTES); }); - it('truncates oversized failure errors so the terminal callback is still delivered', () => { + it('truncates oversized failure errors while preserving structured failure fields', () => { const errorMessage = 'provider failure: '.repeat(CALLBACK_QUEUE_MAX_SERIALIZED_BYTES); const job = callbackJob(''); job.payload.status = 'failed'; delete job.payload.lastAssistantMessageText; job.payload.errorMessage = errorMessage; + job.payload.failure = { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + attempts: 3, + message: 'Repository clone timed out', + }; const result = fitCallbackJobToQueueLimit(job); @@ -130,6 +137,7 @@ describe('fitCallbackJobToQueueLimit', () => { originalUtf8ByteLength: new TextEncoder().encode(errorMessage).byteLength, retainedUtf8ByteLength: new TextEncoder().encode(result.job.payload.errorMessage).byteLength, }); + expect(result.job.payload.failure).toEqual(job.payload.failure); expect(actualSerializedCallbackJobByteLength(result.job)).toBe(result.serializedByteLength); expect(result.serializedByteLength).toBeLessThanOrEqual(CALLBACK_QUEUE_MAX_SERIALIZED_BYTES); }); diff --git a/services/cloud-agent-next/src/callbacks/types.ts b/services/cloud-agent-next/src/callbacks/types.ts index d64253245c..24e32d3e50 100644 --- a/services/cloud-agent-next/src/callbacks/types.ts +++ b/services/cloud-agent-next/src/callbacks/types.ts @@ -1,3 +1,5 @@ +import type { SafeFailureProjection } from '../session/safe-failure-projection.js'; + export type CallbackTarget = { url: string; headers?: Record; @@ -17,6 +19,7 @@ export type ExecutionCallbackPayload = { messageId?: string; status: 'completed' | 'failed' | 'interrupted'; errorMessage?: string; + failure?: SafeFailureProjection; /** Present when errorMessage was shortened to fit the callback queue. */ errorMessageTruncation?: CallbackTextTruncation; lastSeenBranch?: string; diff --git a/services/cloud-agent-next/src/execution/errors.ts b/services/cloud-agent-next/src/execution/errors.ts index f930db5b81..2ba3d2aae4 100644 --- a/services/cloud-agent-next/src/execution/errors.ts +++ b/services/cloud-agent-next/src/execution/errors.ts @@ -6,6 +6,8 @@ * - 4xx/5xx: Non-retryable errors */ +import type { WorkspaceFailureSubtype } from '../shared/wrapper-bootstrap.js'; + /** * Error codes for transient/retryable failures (503). * Client should retry with backoff. @@ -38,6 +40,8 @@ export type ExecutionErrorOptions = { retryable: boolean; /** Original error that caused this (for logging/debugging) */ cause?: unknown; + workspaceFailureSubtype?: WorkspaceFailureSubtype; + safeFailureMessage?: string; }; /** @@ -47,12 +51,16 @@ export type ExecutionErrorOptions = { export class ExecutionError extends Error { readonly code: ExecutionErrorCode; readonly retryable: boolean; + readonly workspaceFailureSubtype?: WorkspaceFailureSubtype; + readonly safeFailureMessage?: string; constructor(code: ExecutionErrorCode, message: string, options: ExecutionErrorOptions) { super(message, { cause: options.cause }); this.name = 'ExecutionError'; this.code = code; this.retryable = options.retryable; + this.workspaceFailureSubtype = options.workspaceFailureSubtype; + this.safeFailureMessage = options.safeFailureMessage; } /** @@ -65,8 +73,21 @@ export class ExecutionError extends Error { /** * Create a retryable error for workspace setup failures. */ - static workspaceSetupFailed(message: string, cause?: unknown): ExecutionError { - return new ExecutionError('WORKSPACE_SETUP_FAILED', message, { retryable: true, cause }); + static workspaceSetupFailed( + message: string, + cause?: unknown, + options?: { + subtype?: WorkspaceFailureSubtype; + safeFailureMessage?: string; + retryable?: boolean; + } + ): ExecutionError { + return new ExecutionError('WORKSPACE_SETUP_FAILED', message, { + retryable: options?.retryable ?? true, + cause, + workspaceFailureSubtype: options?.subtype, + safeFailureMessage: options?.safeFailureMessage, + }); } /** diff --git a/services/cloud-agent-next/src/execution/orchestrator.test.ts b/services/cloud-agent-next/src/execution/orchestrator.test.ts index 30b7105f2c..1ee3537b6d 100644 --- a/services/cloud-agent-next/src/execution/orchestrator.test.ts +++ b/services/cloud-agent-next/src/execution/orchestrator.test.ts @@ -227,6 +227,24 @@ describe('ExecutionOrchestrator AgentSandbox delivery', () => { expect(prompt).not.toHaveBeenCalled(); }); + it('preserves non-retryable workspace setup failures from the wrapper', async () => { + const { orchestrator, prompt } = createOrchestrator(); + prompt.mockRejectedValueOnce( + new WrapperError('Requested repository branch was not found', 'WORKSPACE_SETUP_FAILED', 503, { + workspaceFailureSubtype: 'git_branch_missing', + safeDetail: 'Requested repository branch was not found', + retryable: false, + }) + ); + + await expect(orchestrator.execute(basePlan)).rejects.toMatchObject({ + code: 'WORKSPACE_SETUP_FAILED', + retryable: false, + workspaceFailureSubtype: 'git_branch_missing', + safeFailureMessage: 'Requested repository branch was not found', + } satisfies Partial); + }); + it('keeps ordinary wrapper bootstrap failure retryable', async () => { const { orchestrator, ensureWrapper } = createOrchestrator(); ensureWrapper.mockRejectedValueOnce(new Error('wrapper unavailable')); diff --git a/services/cloud-agent-next/src/execution/orchestrator.ts b/services/cloud-agent-next/src/execution/orchestrator.ts index 51eda4ffc7..369d8e4e65 100644 --- a/services/cloud-agent-next/src/execution/orchestrator.ts +++ b/services/cloud-agent-next/src/execution/orchestrator.ts @@ -206,7 +206,11 @@ export class ExecutionOrchestrator { .warn('ExecutionOrchestrator wrapper dispatch failed'); if (error instanceof WrapperError) { if (error.code === 'WORKSPACE_SETUP_FAILED') { - throw ExecutionError.workspaceSetupFailed(error.message, error); + throw ExecutionError.workspaceSetupFailed(error.message, error, { + subtype: error.workspaceFailureSubtype, + safeFailureMessage: error.safeDetail, + retryable: error.retryable, + }); } if (error.code === 'KILO_SERVER_FAILED') { throw ExecutionError.kiloServerFailed(error.message, error); diff --git a/services/cloud-agent-next/src/kilo/wrapper-client.test.ts b/services/cloud-agent-next/src/kilo/wrapper-client.test.ts index d809dd46a9..d044998c01 100644 --- a/services/cloud-agent-next/src/kilo/wrapper-client.test.ts +++ b/services/cloud-agent-next/src/kilo/wrapper-client.test.ts @@ -25,6 +25,8 @@ import { import type { ExecutionSession, SandboxInstance } from '../types.js'; import type { WrapperInstanceLease } from '../agent-sandbox/protocol.js'; import { WRAPPER_VERSION } from '../shared/wrapper-version.js'; +import type { WrapperSessionReadyRequest } from '../shared/wrapper-bootstrap.js'; +import { logger } from '../logger.js'; vi.mock('./ports.js', () => ({ randomPort: vi.fn(() => 10000 + Math.floor(Math.random() * 50000)), @@ -288,6 +290,88 @@ describe('WrapperClient', () => { expect('executeSession' in client).toBe(false); expect(session.exec).not.toHaveBeenCalled(); }); + + it('propagates validated workspace failure diagnostics', async () => { + const transport: WrapperTransport = { + request: vi.fn().mockResolvedValue( + Response.json( + { + error: 'WORKSPACE_SETUP_FAILED', + message: 'Workspace setup failed', + subtype: 'git_clone_timeout', + detail: 'Repository clone timed out after 120 seconds', + retryable: false, + }, + { status: 503 } + ) + ), + }; + const client = new WrapperClient({ + session: createMockSession(createSuccessResponse({})), + port: defaultPort, + transport, + }); + + await expect( + client.ensureSessionReady({} as WrapperSessionReadyRequest) + ).rejects.toMatchObject({ + code: 'WORKSPACE_SETUP_FAILED', + workspaceFailureSubtype: 'git_clone_timeout', + safeDetail: 'Repository clone timed out after 120 seconds', + retryable: false, + }); + }); + + it('keeps old workspace failure responses compatible', async () => { + const transport: WrapperTransport = { + request: vi + .fn() + .mockResolvedValue( + Response.json( + { error: 'WORKSPACE_SETUP_FAILED', message: 'Workspace setup failed' }, + { status: 503 } + ) + ), + }; + const client = new WrapperClient({ + session: createMockSession(createSuccessResponse({})), + port: defaultPort, + transport, + }); + + await expect( + client.ensureSessionReady({} as WrapperSessionReadyRequest) + ).rejects.toMatchObject({ + code: 'WORKSPACE_SETUP_FAILED', + workspaceFailureSubtype: undefined, + safeDetail: undefined, + }); + }); + + it('rejects invalid workspace failure diagnostics at the shared boundary', async () => { + const transport: WrapperTransport = { + request: vi.fn().mockResolvedValue( + Response.json( + { + error: 'WORKSPACE_SETUP_FAILED', + message: 'Workspace setup failed', + subtype: 'credential_leak', + detail: 'x'.repeat(8_193), + }, + { status: 503 } + ) + ), + }; + const client = new WrapperClient({ + session: createMockSession(createSuccessResponse({})), + port: defaultPort, + transport, + }); + + await expect( + client.ensureSessionReady({} as WrapperSessionReadyRequest) + ).rejects.toMatchObject({ code: 'PARSE_ERROR' }); + }); }); // ------------------------------------------------------------------------- @@ -901,8 +985,7 @@ describe('WrapperClient', () => { // ensureRunning makes ONE attempt (port retry lives in ensureWrapper) expect(session.startProcess).toHaveBeenCalledTimes(1); - // getLogs should be called on the single failed attempt - expect(getLogsMock).toHaveBeenCalledTimes(1); + expect(getLogsMock).not.toHaveBeenCalled(); // pkill should be called to clean up the failed process const execCalls = (session.exec as ReturnType).mock.calls; const pkillCalls = execCalls.filter(call => String(call[0]).includes('pkill')); @@ -934,22 +1017,30 @@ describe('WrapperClient', () => { expect(session.startProcess).toHaveBeenCalledTimes(1); }); - it('preserves sandbox waitForPort failures as the not-ready cause', async () => { - const session = createMockSession(createCurlError(7, 'Connection refused')); - const sandboxStartupError = new Error('Process exited before ready'); - Object.assign(sandboxStartupError, { - name: 'ProcessExitedBeforeReadyError', - httpStatus: 500, + it('does not expose startup diagnostics in errors or structured logs', async () => { + const startupSecret = 'startup-token-secret'; + const stdoutSecret = 'stdout-token-secret'; + const stderrSecret = 'stderr-token-secret'; + const wrapperLogSecret = 'wrapper-log-token-secret'; + const session = createMockSession((command: string) => { + if (command.includes('cat ')) { + return { exitCode: 0, stdout: wrapperLogSecret }; + } + return createCurlError(7, 'Connection refused'); + }); + const getLogsMock = vi.fn().mockResolvedValue({ + stdout: stdoutSecret, + stderr: stderrSecret, }); - (session.startProcess as ReturnType).mockResolvedValue({ - id: 'mock-process-1', - waitForPort: vi.fn().mockRejectedValue(sandboxStartupError), - getLogs: vi.fn().mockResolvedValue({ stdout: '', stderr: '' }), + id: 'mock-process-id', + waitForPort: vi.fn().mockRejectedValue(new Error(startupSecret)), + getLogs: getLogsMock, }); - + const loggerError = vi.spyOn(logger, 'error').mockImplementation(() => logger); const client = new WrapperClient({ session, port: defaultPort }); + let thrown: unknown; try { await client.ensureRunning({ agentSessionId, @@ -957,40 +1048,29 @@ describe('WrapperClient', () => { maxWaitMs: 100, workspacePath: '/workspace/test', }); - expect.fail('Expected ensureRunning to throw'); } catch (error) { - expect(error).toBeInstanceOf(WrapperNotReadyError); - expect(error).toHaveProperty('cause', sandboxStartupError); + thrown = error; } - }); - - it('calls getLogs on process when startup fails', async () => { - const session = createMockSession(createCurlError(7, 'Connection refused')); - const getLogsMock = vi.fn().mockResolvedValue({ - stdout: 'wrapper output before crash', - stderr: 'illegal instruction', + expect(thrown).toBeInstanceOf(WrapperNotReadyError); + expect(thrown).toMatchObject({ + message: 'Wrapper did not become ready', + code: 'NOT_READY', }); - - (session.startProcess as ReturnType).mockResolvedValue({ - id: 'mock-process-id', - waitForPort: vi.fn().mockRejectedValue(new Error('Process exited with code 132')), - getLogs: getLogsMock, + expect((thrown as Error).cause).toBeUndefined(); + expect(getLogsMock).not.toHaveBeenCalled(); + expect(session.exec).not.toHaveBeenCalledWith(expect.stringContaining('cat ')); + const logged = JSON.stringify(loggerError.mock.calls); + for (const secret of [startupSecret, stdoutSecret, stderrSecret, wrapperLogSecret]) { + expect(String(thrown)).not.toContain(secret); + expect(logged).not.toContain(secret); + } + expect(loggerError).toHaveBeenCalledWith('Wrapper startup failed', { + port: defaultPort, + processId: 'mock-process-id', + timeoutMs: 100, }); - - const client = new WrapperClient({ session, port: defaultPort }); - - await expect( - client.ensureRunning({ - agentSessionId, - userId, - maxWaitMs: 100, - workspacePath: '/workspace/test', - }) - ).rejects.toThrow(WrapperNotReadyError); - - // getLogs should be called on the single failed attempt - expect(getLogsMock).toHaveBeenCalledTimes(1); + loggerError.mockRestore(); }); it('uses default wrapper path and calls startProcess', async () => { diff --git a/services/cloud-agent-next/src/kilo/wrapper-client.ts b/services/cloud-agent-next/src/kilo/wrapper-client.ts index 2b9208528b..513171e01a 100644 --- a/services/cloud-agent-next/src/kilo/wrapper-client.ts +++ b/services/cloud-agent-next/src/kilo/wrapper-client.ts @@ -25,11 +25,13 @@ import { import { KILO_AGENT_SESSION_LABEL, type DevContainerHandle } from './devcontainer.js'; import { WRAPPER_VERSION } from '../shared/wrapper-version.js'; import { shellQuote, validShellEnvEntries } from './utils.js'; -import type { - WrapperCommandRequest, - WrapperPromptRequest, - WrapperSessionReadyRequest, - WrapperSessionReadySuccessResponse, +import { + parseWrapperSessionReadyErrorResponse, + type WorkspaceFailureSubtype, + type WrapperCommandRequest, + type WrapperPromptRequest, + type WrapperSessionReadyRequest, + type WrapperSessionReadySuccessResponse, } from '../shared/wrapper-bootstrap.js'; // --------------------------------------------------------------------------- @@ -156,15 +158,28 @@ export type WrapperTransport = { // Error Classes // --------------------------------------------------------------------------- +export type WrapperErrorOptions = ErrorOptions & { + workspaceFailureSubtype?: WorkspaceFailureSubtype; + safeDetail?: string; + retryable?: boolean; +}; + export class WrapperError extends Error { + readonly workspaceFailureSubtype?: WorkspaceFailureSubtype; + readonly safeDetail?: string; + readonly retryable?: boolean; + constructor( message: string, public readonly code: string, public readonly statusCode: number, - options?: ErrorOptions + options?: WrapperErrorOptions ) { super(message, options); this.name = 'WrapperError'; + this.workspaceFailureSubtype = options?.workspaceFailureSubtype; + this.safeDetail = options?.safeDetail; + this.retryable = options?.retryable; } } @@ -179,8 +194,8 @@ export class WrapperFinalizingError extends WrapperError { } export class WrapperNotReadyError extends WrapperError { - constructor(message: string, options?: ErrorOptions) { - super(message, 'NOT_READY', 503, options); + constructor(message: string) { + super(message, 'NOT_READY', 503); this.name = 'WrapperNotReadyError'; } } @@ -479,13 +494,17 @@ export class WrapperClient { const parsed = JSON.parse(responseText) as T & { error?: string; message?: string; - retryable?: boolean; wrapperRunId?: string; }; // Check for error response if (parsed.error || !response.ok) { - const errorCode = parsed.error ?? `HTTP_${response.status}`; + const readyError = + path === '/session/ready' ? parseWrapperSessionReadyErrorResponse(parsed) : undefined; + if (path === '/session/ready' && parsed.error && !readyError) { + throw new Error('Invalid wrapper session-ready error response'); + } + const errorCode = readyError?.error ?? parsed.error ?? `HTTP_${response.status}`; const statusCode = ERROR_STATUS_CODES[errorCode] ?? response.status ?? 500; logger .withFields({ method, path, port: this.port, errorCode, statusCode }) @@ -499,12 +518,21 @@ export class WrapperClient { } if (errorCode === 'WRAPPER_FINALIZING') { throw new WrapperFinalizingError( - parsed.message ?? 'Wrapper batch is finalizing', - parsed.wrapperRunId + readyError?.message ?? parsed.message ?? 'Wrapper batch is finalizing', + readyError?.wrapperRunId ?? parsed.wrapperRunId ); } - throw new WrapperError(parsed.message ?? errorCode, errorCode, statusCode); + throw new WrapperError( + readyError?.message ?? parsed.message ?? errorCode, + errorCode, + statusCode, + { + workspaceFailureSubtype: readyError?.subtype, + safeDetail: readyError?.detail, + retryable: readyError?.retryable, + } + ); } return parsed; @@ -675,60 +703,19 @@ export class WrapperClient { logger.debug('WrapperClient: wrapper is ready', { port: this.port, processId: proc.id }); return { started: true }; - } catch (error) { - const startupError = error instanceof Error ? error : new Error(String(error)); - + } catch { if (envFileWritten && envFilePath) { try { await this.session.exec(`rm -f ${shellQuote(envFilePath)}`); - } catch (cleanupError) { - logger.warn('Failed to clean up wrapper env file after startup failure', { - envFilePath, - error: cleanupError instanceof Error ? cleanupError.message : String(cleanupError), - }); - } - } - - // Capture process stdout/stderr for diagnostics (best-effort) - let stdout: string | undefined; - let stderr: string | undefined; - if (proc) { - try { - let logsTimeoutId: ReturnType | undefined; - const logs = await Promise.race([ - proc.getLogs(), - new Promise((_, reject) => { - logsTimeoutId = setTimeout(() => reject(new Error('getLogs timed out')), 5_000); - }), - ]); - clearTimeout(logsTimeoutId); - stdout = logs.stdout; - stderr = logs.stderr; - } catch (logError) { - logger.debug('Failed to read wrapper process logs', { + } catch { + logger.warn('Wrapper startup env cleanup failed', { port: this.port, - processId: proc.id, - error: logError instanceof Error ? logError.message : String(logError), + processId: proc?.id, + timeoutMs: maxWaitMs, }); } } - // Read the wrapper's own log file for richer diagnostics (logToFile output) - let wrapperFileLog: string | undefined; - try { - const quotedWrapperLogPath = `'${wrapperLogPath.replace(/'/g, "'\\''")}'`; - const logResult = await this.session.exec(`cat ${quotedWrapperLogPath} 2>/dev/null`); - const content = logResult.stdout?.trim(); - if (content) { - wrapperFileLog = content; - } - } catch (logFileError) { - logger.debug('Failed to read wrapper log file', { - wrapperLogPath, - error: logFileError instanceof Error ? logFileError.message : String(logFileError), - }); - } - // Kill the failed process (proc.kill() is unreliable in the sandbox SDK, // so use pkill -f against the session marker). try { @@ -737,27 +724,13 @@ export class WrapperClient { // Process may already be dead - ignore } - const diagParts = [ - startupError.message, - stdout ? `stdout: ${stdout}` : undefined, - stderr ? `stderr: ${stderr}` : undefined, - wrapperFileLog ? `wrapperFileLog: ${wrapperFileLog}` : undefined, - ] - .filter(Boolean) - .join(' | '); - logger.error('Wrapper startup failed', { port: this.port, - error: startupError.message, - stdout, - stderr, - wrapperFileLog, + processId: proc?.id, + timeoutMs: maxWaitMs, }); - throw new WrapperNotReadyError( - `Wrapper did not become ready on port ${this.port} within ${maxWaitMs}ms: ${diagParts}`, - { cause: startupError } - ); + throw new WrapperNotReadyError('Wrapper did not become ready'); } } diff --git a/services/cloud-agent-next/src/persistence/CloudAgentSession.ts b/services/cloud-agent-next/src/persistence/CloudAgentSession.ts index 9c0b5cc389..7e97bf3e0b 100644 --- a/services/cloud-agent-next/src/persistence/CloudAgentSession.ts +++ b/services/cloud-agent-next/src/persistence/CloudAgentSession.ts @@ -132,7 +132,11 @@ import { type AgentRuntimeAcceptedDelivery, type AgentRuntimeOrchestrator, } from '../session/agent-runtime.js'; -import { createWrapperSupervisor, type WrapperSupervisor } from '../session/wrapper-supervisor.js'; +import { + createWrapperSupervisor, + type WrapperSupervisor, + type WrapperTerminalEvent, +} from '../session/wrapper-supervisor.js'; import { emitRunStateReport } from '../telemetry/queue-reports.js'; import { createAgentSandbox } from '../agent-sandbox/factory.js'; import type { @@ -2959,13 +2963,7 @@ export class CloudAgentSession extends DurableObject { logger.withFields({ sessionId, executionId }).info('Execution complete - session is idle'); } - async handleWrapperTerminalEvent(params: { - wrapperRunId: string; - status: 'completed' | 'failed' | 'interrupted'; - error?: string; - gateResult?: 'pass' | 'fail'; - messageIds?: string[]; - }): Promise { + async handleWrapperTerminalEvent(params: WrapperTerminalEvent): Promise { await this.resolveSessionId(); await this.getWrapperSupervisor().onTerminalEvent(params); } diff --git a/services/cloud-agent-next/src/router/schemas.ts b/services/cloud-agent-next/src/router/schemas.ts index b82632e582..8e91d6e8f7 100644 --- a/services/cloud-agent-next/src/router/schemas.ts +++ b/services/cloud-agent-next/src/router/schemas.ts @@ -18,11 +18,8 @@ import { } from '../persistence/schemas.js'; import { AgentModeSchema, BUILTIN_AGENT_MODES, Limits } from '../schema.js'; import { MESSAGE_ID_FORMAT_DESCRIPTION, MESSAGE_ID_PATTERN } from '../session/message-id.js'; -import { - SessionMessageCompletionSourceSchema, - SessionMessageFailureCodeSchema, - SessionMessageFailureStageSchema, -} from '../session/session-message-state.js'; +import { SessionMessageCompletionSourceSchema } from '../session/session-message-state.js'; +import { SafeFailureProjectionSchema } from '../session/safe-failure-projection.js'; // Re-export schemas from types.ts and persistence/schemas.ts for convenience export { sessionIdSchema, githubRepoSchema, gitUrlSchema, envVarsSchema }; @@ -873,14 +870,7 @@ export const GetMessageResultOutput = z acceptedAt: z.number().optional(), terminalAt: z.number().optional(), completionSource: SessionMessageCompletionSourceSchema.optional(), - failure: z - .object({ - stage: SessionMessageFailureStageSchema.optional(), - code: SessionMessageFailureCodeSchema.optional(), - attempts: z.number().int().nonnegative().optional(), - }) - .strict() - .optional(), + failure: SafeFailureProjectionSchema.optional(), gateResult: z.enum(['pass', 'fail']).optional(), assistant: z .object({ diff --git a/services/cloud-agent-next/src/sandbox-recovery.test.ts b/services/cloud-agent-next/src/sandbox-recovery.test.ts index 1fd319c366..49ba041b80 100644 --- a/services/cloud-agent-next/src/sandbox-recovery.test.ts +++ b/services/cloud-agent-next/src/sandbox-recovery.test.ts @@ -57,16 +57,11 @@ describe('sandbox recovery', () => { expect(isSandboxInternalServerError(error)).toBe(true); }); - it('classifies wrapper not-ready errors caused by sandbox startup 500s', () => { - const cause = new Error('Process exited before ready'); - Object.assign(cause, { - name: 'ProcessExitedBeforeReadyError', - httpStatus: 500, - }); - - const error = new WrapperNotReadyError('Wrapper did not become ready', { cause }); + it('does not attach sandbox startup failures to wrapper not-ready errors', () => { + const error = new WrapperNotReadyError('Wrapper did not become ready'); - expect(isSandboxInternalServerError(error)).toBe(true); + expect(error.cause).toBeUndefined(); + expect(isSandboxInternalServerError(error)).toBe(false); }); it('does not classify execution errors by wrapper message alone', () => { diff --git a/services/cloud-agent-next/src/session/agent-runtime.test.ts b/services/cloud-agent-next/src/session/agent-runtime.test.ts index bead19c619..fb9c9917f3 100644 --- a/services/cloud-agent-next/src/session/agent-runtime.test.ts +++ b/services/cloud-agent-next/src/session/agent-runtime.test.ts @@ -7,7 +7,7 @@ import type { WorkspaceReady, } from '../execution/types.js'; import { WrapperFinalizingError } from '../kilo/wrapper-client.js'; -import { createAgentRuntime } from './agent-runtime.js'; +import { createAgentRuntime, WRAPPER_NO_OUTPUT_TIMEOUT_MS } from './agent-runtime.js'; import { getWrapperLease, getWrapperRuntimeState } from './wrapper-runtime-state.js'; import type { SessionMetadata } from '../persistence/session-metadata.js'; @@ -186,8 +186,11 @@ describe('AgentRuntime', () => { ]); expect(wrapperState.wrapperRunId).toBe(result.wrapperRunId); expect(wrapperState.wrapperIdleDeadlineAt).toBeUndefined(); - expect(wrapperState.noOutputDeadlineAt).toEqual(expect.any(Number)); - expect(wrapperState.nextPingAt).toEqual(expect.any(Number)); + const [{ acceptedAt }] = accepted; + expect(acceptedAt).toEqual(expect.any(Number)); + if (acceptedAt === undefined) throw new Error('Expected accepted delivery timestamp'); + expect(wrapperState.noOutputDeadlineAt).toBe(acceptedAt + WRAPPER_NO_OUTPUT_TIMEOUT_MS); + expect(wrapperState.nextPingAt).toBe(acceptedAt + 60_000); }); it('fences the dispatching message until acceptance bookkeeping completes', async () => { diff --git a/services/cloud-agent-next/src/session/agent-runtime.ts b/services/cloud-agent-next/src/session/agent-runtime.ts index 6279362085..6cbd3aeff0 100644 --- a/services/cloud-agent-next/src/session/agent-runtime.ts +++ b/services/cloud-agent-next/src/session/agent-runtime.ts @@ -38,7 +38,8 @@ import { type WrapperLease, } from './wrapper-runtime-state.js'; -export const WRAPPER_NO_OUTPUT_TIMEOUT_MS = 5 * 60 * 1000; +// Allow the five-minute provider timeout to finish event delivery and stable-idle handling before our watchdog fires. +export const WRAPPER_NO_OUTPUT_TIMEOUT_MS = 330_000; export const WRAPPER_PING_INTERVAL_MS = 60_000; export const WRAPPER_STARTUP_TIMEOUT_MS = 10 * 60 * 1000; diff --git a/services/cloud-agent-next/src/session/message-result.test.ts b/services/cloud-agent-next/src/session/message-result.test.ts index 4a0bf643f7..d0daba7bc1 100644 --- a/services/cloud-agent-next/src/session/message-result.test.ts +++ b/services/cloud-agent-next/src/session/message-result.test.ts @@ -183,6 +183,32 @@ describe('resolveSessionMessageResult', () => { }); }); + it('projects a safe generic message instead of raw non-workspace failure text', async () => { + const storage = createFakeStorage(); + await putSessionMessageState( + storage, + lifecycleState(messageA, { + status: 'failed', + terminalAt: 4, + failureStage: 'agent_activity', + failureCode: 'assistant_error', + error: 'provider body token=secret', + failureReason: 'private reason', + }) + ); + + const resolved = await resolveSessionMessageResult(storage, messageA); + + expect(resolved).toMatchObject({ + type: 'found', + result: { + failure: { code: 'assistant_error', message: 'Assistant request failed' }, + }, + }); + expect(JSON.stringify(resolved)).not.toContain('secret'); + expect(JSON.stringify(resolved)).not.toContain('private reason'); + }); + it('projects only safe structured terminal fields without assistant lookup for failures', async () => { const storage = createFakeStorage(); await putSessionMessageState( @@ -192,9 +218,11 @@ describe('resolveSessionMessageResult', () => { terminalAt: 4, completionSource: 'wrapper_failure', failureStage: 'agent_activity', - failureCode: 'assistant_error', + failureCode: 'workspace_setup_failed', + failureSubtype: 'git_clone_timeout', + safeFailureMessage: 'Clone exceeded the safe deadline', attempts: 2, - error: 'private raw error', + error: 'private raw error token=secret', failureReason: 'private reason', terminalEffects: { event: 'pending', @@ -212,7 +240,13 @@ describe('resolveSessionMessageResult', () => { queuedAt: 1, terminalAt: 4, completionSource: 'wrapper_failure', - failure: { stage: 'agent_activity', code: 'assistant_error', attempts: 2 }, + failure: { + stage: 'agent_activity', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + attempts: 2, + message: 'Repository clone timed out: Clone exceeded the safe deadline', + }, }, }); }); diff --git a/services/cloud-agent-next/src/session/message-result.ts b/services/cloud-agent-next/src/session/message-result.ts index de2c82d7e8..a4c0c9be79 100644 --- a/services/cloud-agent-next/src/session/message-result.ts +++ b/services/cloud-agent-next/src/session/message-result.ts @@ -6,11 +6,10 @@ import { import { lookupSessionMessageState, type SessionMessageCompletionSource, - type SessionMessageFailureCode, - type SessionMessageFailureStage, type SessionMessageState, type SessionMessageStorage, } from './session-message-state.js'; +import { projectSafeFailure, type SafeFailureProjection } from './safe-failure-projection.js'; export type SafeMessageResult = { messageId: string; @@ -20,11 +19,7 @@ export type SafeMessageResult = { acceptedAt?: number; terminalAt?: number; completionSource?: SessionMessageCompletionSource; - failure?: { - stage?: SessionMessageFailureStage; - code?: SessionMessageFailureCode; - attempts?: number; - }; + failure?: SafeFailureProjection; gateResult?: 'pass' | 'fail'; }; @@ -50,23 +45,8 @@ type ResolvedSessionMessageResult = type MessageResultStorage = SessionMessageStorage & SessionQueueStorage; -function projectFailure(state: SessionMessageState): SafeMessageResult['failure'] { - if ( - state.failureStage === undefined && - state.failureCode === undefined && - state.attempts === undefined - ) { - return undefined; - } - return { - stage: state.failureStage, - code: state.failureCode, - attempts: state.attempts, - }; -} - function projectLifecycleState(state: SessionMessageState): ResolvedSessionMessageResult { - const failure = projectFailure(state); + const failure = projectSafeFailure(state); const assistantLookup: AssistantLookup | undefined = state.status === 'completed' && state.assistantMessageId ? { diff --git a/services/cloud-agent-next/src/session/message-settlement-outbox.test.ts b/services/cloud-agent-next/src/session/message-settlement-outbox.test.ts index 68f545e632..d7bcea7990 100644 --- a/services/cloud-agent-next/src/session/message-settlement-outbox.test.ts +++ b/services/cloud-agent-next/src/session/message-settlement-outbox.test.ts @@ -10,6 +10,7 @@ import type { } from '../notifications-binding.js'; import type { SessionMetadata } from '../persistence/session-metadata.js'; import { + buildCloudMessageFailedPayload, createMessageSettlementOutbox, type MessageSettlementOutboxStorage, } from './message-settlement-outbox.js'; @@ -339,6 +340,108 @@ describe('MessageSettlementOutbox', () => { }); }); + it('omits raw terminal text from live events and reconnect callback repair', async () => { + const rawError = 'provider response Bearer secret-provider-token'; + const rawReason = 'internal failure reason secret-reason'; + const harness = createHarness(); + await putSessionMessageState(harness.storage, { + ...acceptedMessageState(firstMessageId, { url: 'https://example.com/safe-failure' }), + status: 'failed', + terminalAt: 10, + completionSource: 'wrapper_failure', + failureStage: 'agent_activity', + failureCode: 'assistant_error', + safeFailureMessage: 'Assistant request timed out', + error: rawError, + failureReason: rawReason, + terminalEffects: { + event: 'pending', + callback: { disposition: 'pending', allowWithoutObservedIdle: true }, + push: { disposition: 'not-required' }, + }, + }); + + await harness.outbox.repairTerminalEffects(); + + const eventPayload = JSON.parse(harness.events[0].payload); + expect(eventPayload).toMatchObject({ + reason: 'The message failed', + error: 'Assistant request timed out', + failure: { + code: 'assistant_error', + message: 'Assistant request timed out', + }, + }); + expect(harness.callbackJobs[0].payload).toMatchObject({ + errorMessage: 'Assistant request timed out', + failure: { + code: 'assistant_error', + message: 'Assistant request timed out', + }, + }); + expect(JSON.stringify({ eventPayload, callback: harness.callbackJobs[0] })).not.toContain( + 'secret-' + ); + }); + + it('preserves allowlisted legacy reasons and replaces arbitrary reasons with status text', () => { + const state = { + ...acceptedMessageState(firstMessageId), + status: 'failed' as const, + failureReason: 'wrapper_protocol_error', + failureCode: 'wrapper_error_after_activity' as const, + }; + + expect(buildCloudMessageFailedPayload(state)).toMatchObject({ + reason: 'wrapper_protocol_error', + error: 'Agent wrapper failed while processing the message', + failure: { code: 'wrapper_error_after_activity' }, + }); + expect( + buildCloudMessageFailedPayload({ + ...state, + failureReason: 'private reason token=secret', + }) + ).toMatchObject({ + reason: 'The message failed', + error: 'Agent wrapper failed while processing the message', + failure: { code: 'wrapper_error_after_activity' }, + }); + }); + + it('uses only safe projected failure text in failed pushes', async () => { + const harness = createHarness({ metadata: pushMetadata }); + await putSessionMessageState(harness.storage, { + ...acceptedMessageState(firstMessageId), + status: 'failed', + terminalAt: 10, + completionSource: 'wrapper_failure', + failureReason: 'assistant_error', + failureStage: 'agent_activity', + failureCode: 'assistant_error', + safeFailureMessage: 'Assistant request timed out', + error: 'provider response Bearer push-secret', + terminalEffects: { + event: 'accounted', + callback: { disposition: 'not-required' }, + push: { disposition: 'pending' }, + }, + }); + + await harness.outbox.repairTerminalEffects(); + + expect(harness.pushJobs).toEqual([ + { + userId: 'user_outbox', + cliSessionId: 'ses_outbox', + executionId: firstMessageId, + status: 'failed', + body: 'Failed: Assistant request timed out', + }, + ]); + expect(JSON.stringify(harness.pushJobs)).not.toContain('push-secret'); + }); + it('repairs a persisted terminal state after terminal event insertion fails once', async () => { const harness = createHarness({ failTerminalEventOnce: true }); await putSessionMessageState(harness.storage, acceptedMessageState(firstMessageId)); @@ -489,9 +592,14 @@ describe('MessageSettlementOutbox', () => { await harness.outbox.terminalizeSessionMessageOnce(secondMessageId, { kind: 'failed', - reason: 'assistant_error', - error: 'provider failed', + reason: 'workspace setup failed internally', + error: 'raw clone output token=secret', completionSource: 'assistant_message_event', + failureStage: 'pre_dispatch', + failureCode: 'workspace_setup_failed', + failureSubtype: 'git_clone_timeout', + safeFailureMessage: 'Clone exceeded the safe deadline', + attempts: 2, }); expect(harness.callbackJobs).toHaveLength(1); @@ -501,8 +609,16 @@ describe('MessageSettlementOutbox', () => { messageId: secondMessageId, idempotencyKey: secondMessageId, status: 'failed', - errorMessage: 'provider failed', + errorMessage: 'Repository clone timed out: Clone exceeded the safe deadline', + failure: { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + attempts: 2, + message: 'Repository clone timed out: Clone exceeded the safe deadline', + }, }); + expect(JSON.stringify(harness.callbackJobs[0])).not.toContain('token=secret'); }); it('finalizes a terminal wrapper-run callback while the next run remains pending', async () => { diff --git a/services/cloud-agent-next/src/session/message-settlement-outbox.ts b/services/cloud-agent-next/src/session/message-settlement-outbox.ts index 8a9ade5224..3dded0c3b8 100644 --- a/services/cloud-agent-next/src/session/message-settlement-outbox.ts +++ b/services/cloud-agent-next/src/session/message-settlement-outbox.ts @@ -18,12 +18,24 @@ import { type SessionMessageStorage, type TerminalizeParams, } from './session-message-state.js'; +import { projectSafeFailure, type SafeFailureProjection } from './safe-failure-projection.js'; import type { AssistantMessagePart, LatestAssistantMessage } from './types.js'; const CURRENT_IDLE_BATCH_CALLBACK_KEY = 'idle_batch_callback_current'; const IDLE_BATCH_CALLBACK_PREFIX = 'idle_batch_callback:'; const CALLBACK_ENQUEUE_RETRY_DELAY_MS = 30_000; const PUSH_DISPATCH_RETRY_DELAY_MS = 30_000; +const LEGACY_FAILURE_REASONS = new Set([ + 'exhausted', + 'wrapper_failure', + 'wrapper_disconnected', + 'wrapper_protocol_error', + 'assistant_error', + 'missing_assistant_reply', + 'startup-failed', + 'wrapper_error', + 'interrupted', +]); type IdleBatchCallbackState = { batchId: string; @@ -48,6 +60,45 @@ type PersistedMessageEvent = { entityId: string; }; +export type CloudMessageFailedPayload = { + messageId: string; + status: 'failed' | 'interrupted'; + delivery: 'sent' | 'queued'; + accepted: boolean; + completionSource?: string; + reason?: string; + attempts?: number; + error?: string; + failure?: SafeFailureProjection; +}; + +export function buildCloudMessageFailedPayload( + state: SessionMessageState +): CloudMessageFailedPayload { + const wasAccepted = state.status === 'accepted' || state.acceptedAt !== undefined; + if (state.status !== 'failed' && state.status !== 'interrupted') { + throw new Error(`Cannot build failure payload for ${state.status} message`); + } + const failure = projectSafeFailure(state); + const fallbackMessage = + state.status === 'failed' ? 'The message failed' : 'The message was interrupted'; + const reason = + state.failureReason && LEGACY_FAILURE_REASONS.has(state.failureReason) + ? state.failureReason + : fallbackMessage; + return { + messageId: state.messageId, + status: state.status, + delivery: wasAccepted ? 'sent' : 'queued', + accepted: wasAccepted, + completionSource: state.completionSource, + reason, + attempts: state.attempts, + error: failure?.message ?? fallbackMessage, + failure, + }; +} + export type MessageSettlementOutboxStorage = SessionQueueStorage & SessionMessageStorage; export type FinalizeIdleBatchCallbackOptions = { @@ -320,28 +371,9 @@ export function createMessageSettlementOutbox( }); } - async function emitSessionMessageFailed( - state: SessionMessageState, - extra?: { error?: string } - ): Promise { + async function emitSessionMessageFailed(state: SessionMessageState): Promise { const sessionId = await requireSessionId(); - const wasAccepted = state.status === 'accepted' || state.acceptedAt !== undefined; - const payload: Record = { - messageId: state.messageId, - status: state.status, - delivery: wasAccepted ? 'sent' : 'queued', - accepted: wasAccepted, - completionSource: state.completionSource, - reason: state.failureReason, - }; - if (state.attempts !== undefined) { - payload.attempts = state.attempts; - } - if (extra?.error !== undefined) { - payload.error = extra.error; - } else if (state.error) { - payload.error = state.error; - } + const payload = buildCloudMessageFailedPayload(state); ensureTerminalMessageEvent({ entityId: `terminal-message/${state.messageId}`, sessionId, @@ -403,13 +435,20 @@ export function createMessageSettlementOutbox( } } + const failure = projectSafeFailure(state); + const legacyErrorMessage = + status === 'completed' + ? undefined + : (failure?.message ?? + (status === 'failed' ? 'The message failed' : 'The message was interrupted')); const payload: CallbackJob['payload'] = { sessionId, cloudAgentSessionId: sessionId, executionId: state.messageId, messageId: state.messageId, status, - errorMessage: state.error, + errorMessage: legacyErrorMessage, + failure, lastSeenBranch: metadata?.repository?.upstreamBranch, kiloSessionId: metadata?.auth.kiloSessionId, gateResult: state.gateResult, @@ -510,12 +549,17 @@ export function createMessageSettlementOutbox( } try { + const failureMessage = + state.status === 'completed' + ? undefined + : (projectSafeFailure(state)?.message ?? + (state.status === 'failed' ? 'The message failed' : 'The message was interrupted')); const result = await sendPushNotification({ userId: metadata.identity.userId, cliSessionId, executionId: state.messageId, status: state.status, - body: buildCloudAgentPushBody(state.status, lastAssistantMessageText, state.error), + body: buildCloudAgentPushBody(state.status, lastAssistantMessageText, failureMessage), }); if (result.dispatched) { return 'accounted'; @@ -666,7 +710,7 @@ export function createMessageSettlementOutbox( if (state.status === 'completed') { await emitSessionMessageCompleted(state, { gateResult: state.gateResult }); } else if (state.status === 'failed' || state.status === 'interrupted') { - await emitSessionMessageFailed(state, { error: state.error }); + await emitSessionMessageFailed(state); } await putSessionMessageState(storage, { ...state, diff --git a/services/cloud-agent-next/src/session/pending-messages.test.ts b/services/cloud-agent-next/src/session/pending-messages.test.ts index 784808c098..3ba2ffe34b 100644 --- a/services/cloud-agent-next/src/session/pending-messages.test.ts +++ b/services/cloud-agent-next/src/session/pending-messages.test.ts @@ -467,32 +467,76 @@ describe('recordPendingFlushFailure', () => { expect(exhausted.message.lastFlushFailureCode).toBe('BAD_REQUEST'); }); - it('preserves a structured retryable delivery cause through unknown-code exhaustion', async () => { + it.each([ + { label: 'missing', code: undefined }, + { label: 'UNKNOWN', code: 'UNKNOWN' as const }, + { label: 'INTERNAL', code: 'INTERNAL' as const }, + ])( + 'preserves a structured retryable delivery cause through $label-code exhaustion', + async ({ code }) => { + const storage = createMemoryStorage(); + let message = makeMessage(); + await storePendingSessionMessage(storage, message); + + const retry = await recordPendingFlushFailure( + storage, + message, + 'workspace temporarily failed', + 100_000, + { + policy: 'warm-followup', + code: 'WORKSPACE_SETUP_FAILED', + subtype: 'git_clone_timeout', + safeFailureMessage: 'Repository clone timed out', + } + ); + message = retry.message; + const exhausted = await recordPendingFlushFailure( + storage, + message, + 'retry transport failed without a specific cause', + 102_000, + { policy: 'warm-followup', code } + ); + + expect(exhausted.exhausted).toBe(true); + expect(exhausted.message).toMatchObject({ + lastFlushFailureCode: 'WORKSPACE_SETUP_FAILED', + lastFlushError: 'workspace temporarily failed', + lastFlushFailureSubtype: 'git_clone_timeout', + safeFailureMessage: 'Repository clone timed out', + }); + } + ); + + it('replaces the failure code and error together for a newer specific cause', async () => { const storage = createMemoryStorage(); - let message = makeMessage(); + const message = makeMessage({ + flushAttempts: 1, + lastFlushFailureCode: 'WORKSPACE_SETUP_FAILED', + lastFlushError: 'workspace temporarily failed', + lastFlushFailureSubtype: 'git_clone_timeout', + safeFailureMessage: 'Repository clone timed out', + }); await storePendingSessionMessage(storage, message); - const retry = await recordPendingFlushFailure( + const result = await recordPendingFlushFailure( storage, message, - 'workspace temporarily failed', - 100_000, + 'wrapper failed to start', + 102_000, { policy: 'warm-followup', - code: 'WORKSPACE_SETUP_FAILED', + code: 'WRAPPER_START_FAILED', } ); - message = retry.message; - const exhausted = await recordPendingFlushFailure( - storage, - message, - 'retry transport failed without code', - 102_000, - { policy: 'warm-followup' } - ); - expect(exhausted.exhausted).toBe(true); - expect(exhausted.message.lastFlushFailureCode).toBe('WORKSPACE_SETUP_FAILED'); + expect(result.message).toMatchObject({ + lastFlushFailureCode: 'WRAPPER_START_FAILED', + lastFlushError: 'wrapper failed to start', + }); + expect(result.message.lastFlushFailureSubtype).toBeUndefined(); + expect(result.message.safeFailureMessage).toBeUndefined(); }); it('keeps the message in storage when not exhausted', async () => { diff --git a/services/cloud-agent-next/src/session/pending-messages.ts b/services/cloud-agent-next/src/session/pending-messages.ts index f39b35333c..0026f53f23 100644 --- a/services/cloud-agent-next/src/session/pending-messages.ts +++ b/services/cloud-agent-next/src/session/pending-messages.ts @@ -9,6 +9,11 @@ import { logger } from '../logger.js'; import { AttachmentsSchema, CallbackTargetSchema } from '../persistence/schemas.js'; import { Limits } from '../schema.js'; import { MESSAGE_ID_FORMAT_DESCRIPTION, MESSAGE_ID_PATTERN } from './message-id.js'; +import { + isWorkspaceFailureSubtype, + WRAPPER_READY_ERROR_DETAIL_MAX_LENGTH, + type WorkspaceFailureSubtype, +} from '../shared/wrapper-bootstrap.js'; export const PENDING_SESSION_MESSAGE_LIMIT = 10; export const PENDING_FLUSH_RETRY_BASE_DELAY_MS = 2_000; @@ -78,12 +83,16 @@ const PendingFlushFailureCodeSchema = z.enum([ 'UNKNOWN', ]); export type PendingFlushFailureCode = z.infer; +const WorkspaceFailureSubtypeSchema = z.custom(isWorkspaceFailureSubtype); +const SafeFailureMessageSchema = z.string().max(WRAPPER_READY_ERROR_DETAIL_MAX_LENGTH); const PendingDeliverySchema = z.object({ queuedAt: z.number(), flushAttempts: z.number().int().min(0).optional(), nextFlushAttemptAt: z.number().optional(), lastFlushError: z.string().optional(), lastFlushFailureCode: PendingFlushFailureCodeSchema.optional(), + lastFlushFailureSubtype: WorkspaceFailureSubtypeSchema.optional(), + safeFailureMessage: SafeFailureMessageSchema.optional(), disposition: PendingDeliveryDispositionSchema.optional(), }); export const PendingSessionMessageV2Schema = z.object({ @@ -130,6 +139,8 @@ const LegacyPendingSessionMessageSchema = z nextFlushAttemptAt: z.number().optional(), lastFlushError: z.string().optional(), lastFlushFailureCode: PendingFlushFailureCodeSchema.optional(), + lastFlushFailureSubtype: WorkspaceFailureSubtypeSchema.optional(), + safeFailureMessage: SafeFailureMessageSchema.optional(), deliveryDisposition: PendingDeliveryDispositionSchema.optional(), }) .passthrough(); @@ -149,6 +160,8 @@ export type PendingSessionMessage = { nextFlushAttemptAt?: number; lastFlushError?: string; lastFlushFailureCode?: PendingFlushFailureCode; + lastFlushFailureSubtype?: WorkspaceFailureSubtype; + safeFailureMessage?: string; deliveryDisposition?: 'terminalization-pending'; clientRequestId?: string; legacyExecutionId?: string; @@ -246,6 +259,8 @@ function decodeLegacyPendingMessage( nextFlushAttemptAt: message.nextFlushAttemptAt, lastFlushError: message.lastFlushError, lastFlushFailureCode: message.lastFlushFailureCode, + lastFlushFailureSubtype: message.lastFlushFailureSubtype, + safeFailureMessage: message.safeFailureMessage, deliveryDisposition: message.deliveryDisposition, clientRequestId: message.clientRequestId, legacyExecutionId: typeof message.executionId === 'string' ? message.executionId : undefined, @@ -275,6 +290,8 @@ function decodePendingMessage( nextFlushAttemptAt: message.delivery.nextFlushAttemptAt, lastFlushError: message.delivery.lastFlushError, lastFlushFailureCode: message.delivery.lastFlushFailureCode, + lastFlushFailureSubtype: message.delivery.lastFlushFailureSubtype, + safeFailureMessage: message.delivery.safeFailureMessage, deliveryDisposition: message.delivery.disposition, }; } @@ -312,6 +329,8 @@ export function createPendingSessionMessage(params: { nextFlushAttemptAt?: number; lastFlushError?: string; lastFlushFailureCode?: PendingFlushFailureCode; + lastFlushFailureSubtype?: WorkspaceFailureSubtype; + safeFailureMessage?: string; deliveryDisposition?: 'terminalization-pending'; }): PendingSessionMessage { const legacy = LegacyPendingSessionMessageSchema.parse(params); @@ -374,6 +393,8 @@ function serializePendingSessionMessage( nextFlushAttemptAt: normalized.nextFlushAttemptAt, lastFlushError: normalized.lastFlushError, lastFlushFailureCode: normalized.lastFlushFailureCode, + lastFlushFailureSubtype: normalized.lastFlushFailureSubtype, + safeFailureMessage: normalized.safeFailureMessage, disposition: normalized.deliveryDisposition, }, callbackSnapshot: normalized.callbackSnapshot, @@ -390,6 +411,8 @@ function serializePendingSessionMessage( nextFlushAttemptAt: normalized.nextFlushAttemptAt, lastFlushError: normalized.lastFlushError, lastFlushFailureCode: normalized.lastFlushFailureCode, + lastFlushFailureSubtype: normalized.lastFlushFailureSubtype, + safeFailureMessage: normalized.safeFailureMessage, deliveryDisposition: normalized.deliveryDisposition, }); } @@ -475,6 +498,9 @@ export async function recordPendingFlushFailure( | 'PENDING_QUEUE_FULL' | 'MODEL_MISSING' | 'UNKNOWN'; + subtype?: WorkspaceFailureSubtype; + safeFailureMessage?: string; + retryable?: boolean; } ): Promise { if (options.code === undefined || options.code === 'UNKNOWN') { @@ -486,10 +512,25 @@ export async function recordPendingFlushFailure( }) .warn('Pending flush failure with unknown error code; treating as retryable'); } - const flushFailureCode = - options.code === undefined || options.code === 'INTERNAL' - ? (message.lastFlushFailureCode ?? options.code ?? 'UNKNOWN') - : options.code; + const preservesExistingFailure = + message.lastFlushFailureCode !== undefined && + (options.code === undefined || + options.code === 'INTERNAL' || + (options.code === 'UNKNOWN' && message.lastFlushFailureCode !== 'INTERNAL')); + const flushFailureCode = preservesExistingFailure + ? message.lastFlushFailureCode + : (options.code ?? 'UNKNOWN'); + const flushError = preservesExistingFailure ? (message.lastFlushError ?? error) : error; + const failureSubtype = preservesExistingFailure + ? message.lastFlushFailureSubtype + : options.code === 'WORKSPACE_SETUP_FAILED' + ? options.subtype + : undefined; + const safeFailureMessage = preservesExistingFailure + ? message.safeFailureMessage + : options.code === 'WORKSPACE_SETUP_FAILED' + ? options.safeFailureMessage + : undefined; const attempts = flushFailureCode === 'SANDBOX_CONNECT_FAILED' && message.lastFlushFailureCode !== 'SANDBOX_CONNECT_FAILED' @@ -501,7 +542,7 @@ export async function recordPendingFlushFailure( : options.policy === 'cold-init' ? COLD_INIT_RETRY_DELAYS_MS : WARM_FOLLOWUP_RETRY_DELAYS_MS; - const retryable = isRetryableFlushCode(flushFailureCode); + const retryable = options.retryable ?? isRetryableFlushCode(flushFailureCode); const exhausted = !retryable || attempts > retryDelays.length; const retryDelay = retryDelays[attempts - 1]; const nextFlushAttemptAt = exhausted || retryDelay === undefined ? undefined : now + retryDelay; @@ -509,8 +550,10 @@ export async function recordPendingFlushFailure( ...message, flushAttempts: attempts, nextFlushAttemptAt, - lastFlushError: error, + lastFlushError: flushError, lastFlushFailureCode: flushFailureCode, + lastFlushFailureSubtype: failureSubtype, + safeFailureMessage, deliveryDisposition: exhausted ? 'terminalization-pending' : undefined, }; await replaceStoredPendingSessionMessage(storage, message, updated); diff --git a/services/cloud-agent-next/src/session/safe-failure-projection.test.ts b/services/cloud-agent-next/src/session/safe-failure-projection.test.ts new file mode 100644 index 0000000000..94d0633fe4 --- /dev/null +++ b/services/cloud-agent-next/src/session/safe-failure-projection.test.ts @@ -0,0 +1,128 @@ +import { CLOUD_AGENT_FAILURE_CODES } from '@kilocode/worker-utils/cloud-agent-failure'; +import { describe, expect, it } from 'vitest'; +import { + SAFE_FAILURE_MESSAGE_MAX_LENGTH, + SafeFailureProjectionSchema, + classifyAssistantFailureMessage, + genericFailureMessage, + projectSafeFailure, +} from './safe-failure-projection.js'; + +describe('projectSafeFailure', () => { + it('projects structured fields while omitting raw failure text', () => { + const durableState = { + failureStage: 'agent_activity' as const, + failureCode: 'assistant_error' as const, + attempts: 2, + error: 'Bearer secret-token', + failureReason: 'provider body with secret-token', + }; + + expect(projectSafeFailure(durableState)).toStrictEqual({ + stage: 'agent_activity', + code: 'assistant_error', + attempts: 2, + message: 'Assistant request failed', + }); + }); + + it.each(CLOUD_AGENT_FAILURE_CODES)('always derives a bounded message for %s', failureCode => { + const failure = projectSafeFailure({ failureCode }); + + expect(failure?.message).toBe(genericFailureMessage(failureCode)); + expect(failure?.message?.length).toBeLessThanOrEqual(SAFE_FAILURE_MESSAGE_MAX_LENGTH); + expect(SafeFailureProjectionSchema.parse(failure)).toEqual(failure); + }); + + it.each([ + ['git_clone_timeout', 'Repository clone timed out'], + ['git_authentication_failed', 'Repository authentication failed'], + ['setup_command_failed', 'Setup command failed'], + ['workspace_setup_unknown', 'Workspace setup failed'], + ] as const)('derives an allowlisted message for %s', (failureSubtype, message) => { + expect(projectSafeFailure({ failureCode: 'workspace_setup_failed', failureSubtype })).toEqual({ + code: 'workspace_setup_failed', + subtype: failureSubtype, + message, + }); + }); + + it('includes bounded safe detail without duplicating the generic message', () => { + expect( + projectSafeFailure({ + failureCode: 'workspace_setup_failed', + failureSubtype: 'setup_command_failed', + safeFailureMessage: 'Setup command failed (exit code 2)', + }) + ).toEqual({ + code: 'workspace_setup_failed', + subtype: 'setup_command_failed', + message: 'Setup command failed (exit code 2)', + }); + }); + + it('combines distinct safe detail with the generic message within the public bound', () => { + const failure = projectSafeFailure({ + failureCode: 'workspace_setup_failed', + failureSubtype: 'git_clone_timeout', + safeFailureMessage: `Safe diagnostic ${'x'.repeat(SAFE_FAILURE_MESSAGE_MAX_LENGTH)}`, + }); + + expect(failure?.message).toMatch(/^Repository clone timed out: Safe diagnostic /); + expect(failure?.message?.length).toBeLessThanOrEqual(SAFE_FAILURE_MESSAGE_MAX_LENGTH); + expect(SafeFailureProjectionSchema.parse(failure)).toEqual(failure); + }); + + it('uses an explicitly supplied bounded safe message for non-workspace failures', () => { + expect( + projectSafeFailure({ + failureCode: 'assistant_error', + safeFailureMessage: `Assistant request timed out${'x'.repeat(SAFE_FAILURE_MESSAGE_MAX_LENGTH)}`, + }) + ).toEqual({ + code: 'assistant_error', + message: `Assistant request timed out${'x'.repeat( + SAFE_FAILURE_MESSAGE_MAX_LENGTH - 'Assistant request timed out'.length + )}`, + }); + }); + + it('rejects invalid subtype, attempts, message bounds, and unknown fields', () => { + expect(() => SafeFailureProjectionSchema.parse({ subtype: 'not_allowlisted' })).toThrow(); + expect(() => SafeFailureProjectionSchema.parse({ attempts: -1 })).toThrow(); + expect(() => + SafeFailureProjectionSchema.parse({ + message: 'x'.repeat(SAFE_FAILURE_MESSAGE_MAX_LENGTH + 1), + }) + ).toThrow(); + expect(() => SafeFailureProjectionSchema.parse({ error: 'raw secret' })).toThrow(); + }); +}); + +describe('classifyAssistantFailureMessage', () => { + it.each([ + ['Payment Required: token=secret', 'Assistant request failed: insufficient credits'], + ['usage_limit_exceeded for account secret', 'Assistant request failed: insufficient credits'], + ['Model not found: private/provider-model', 'Assistant request failed: model not found'], + ['429 Too Many Requests: provider body', 'Assistant request was rate limited'], + ['upstream request timed out: private body', 'Assistant request timed out'], + ['403 Forbidden: private policy', 'Assistant request was not authorized'], + ['400 invalid request: prompt secret', 'Assistant request was invalid'], + ['503 Service Unavailable: internal host', 'Assistant service is unavailable'], + ['provider exploded with token=secret', 'Assistant request failed'], + ])('maps raw assistant text to allowlisted wording', (source, expected) => { + const result = classifyAssistantFailureMessage(source); + + expect(result).toBe(expected); + expect(result).not.toContain('secret'); + expect(result).not.toContain('private'); + }); + + it('classifies nested provider errors without returning their source text', () => { + expect( + classifyAssistantFailureMessage({ + data: { message: 'deadline exceeded: Bearer private-provider-token' }, + }) + ).toBe('Assistant request timed out'); + }); +}); diff --git a/services/cloud-agent-next/src/session/safe-failure-projection.ts b/services/cloud-agent-next/src/session/safe-failure-projection.ts new file mode 100644 index 0000000000..9ca3851535 --- /dev/null +++ b/services/cloud-agent-next/src/session/safe-failure-projection.ts @@ -0,0 +1,154 @@ +import { + CLOUD_AGENT_SAFE_FAILURE_MESSAGE_MAX_LENGTH, + CloudAgentSafeFailureSchema, + type CloudAgentFailureCode, + type CloudAgentSafeFailure, + type WorkspaceFailureSubtype, +} from '@kilocode/worker-utils/cloud-agent-failure'; +import type { + SessionMessageFailureCode, + SessionMessageFailureStage, +} from './session-message-state.js'; + +export const SAFE_FAILURE_MESSAGE_MAX_LENGTH = CLOUD_AGENT_SAFE_FAILURE_MESSAGE_MAX_LENGTH; +export const SafeFailureProjectionSchema = CloudAgentSafeFailureSchema; +export type SafeFailureProjection = CloudAgentSafeFailure; + +export type SafeFailureProjectionSource = { + failureStage?: SessionMessageFailureStage; + failureCode?: SessionMessageFailureCode; + failureSubtype?: WorkspaceFailureSubtype; + attempts?: number; + safeFailureMessage?: string; +}; + +const GENERIC_FAILURE_MESSAGES = { + sandbox_connect_failed: 'Could not connect to the sandbox', + workspace_setup_failed: 'Workspace setup failed', + kilo_server_failed: 'Kilo server failed to start', + wrapper_start_failed: 'Agent wrapper failed to start', + invalid_delivery_request: 'The message could not be delivered', + session_metadata_missing: 'Session metadata is unavailable', + model_missing: 'No model was selected', + delivery_failure_unknown: 'The message could not be delivered', + wrapper_disconnected: 'Agent wrapper disconnected', + wrapper_no_output: 'Agent wrapper produced no output', + wrapper_ping_timeout: 'Agent wrapper stopped responding', + wrapper_error_before_activity: 'Agent wrapper failed before processing the message', + assistant_error: 'Assistant request failed', + wrapper_error_after_activity: 'Agent wrapper failed while processing the message', + missing_assistant_reply: 'No assistant reply was produced', + user_interrupt: 'The message was interrupted by the user', + container_shutdown: 'The agent container shut down', + system_interrupt: 'The message was interrupted', + unclassified: 'The message failed', +} as const satisfies Record; + +const WORKSPACE_FAILURE_MESSAGES = { + git_clone_timeout: 'Repository clone timed out', + git_checkout_timeout: 'Repository checkout timed out', + git_authentication_failed: 'Repository authentication failed', + git_network_failed: 'Repository network request failed', + git_pack_corrupt: 'Repository data is corrupt', + git_checkout_conflict: 'Repository checkout conflict', + git_branch_missing: 'Requested repository branch was not found', + sandbox_storage_full: 'Workspace setup failed: sandbox storage full', + kilo_import_timeout: 'Session import timed out', + kilo_import_failed: 'Session import failed', + setup_command_timeout: 'Setup command timed out', + setup_command_failed: 'Setup command failed', + workspace_setup_unknown: 'Workspace setup failed', +} as const satisfies Record; + +export function genericFailureMessage(code: CloudAgentFailureCode): string { + return GENERIC_FAILURE_MESSAGES[code]; +} + +export function workspaceFailureMessage(subtype: WorkspaceFailureSubtype): string { + return WORKSPACE_FAILURE_MESSAGES[subtype]; +} + +export function classifyAssistantFailureMessage(source: unknown): string { + const message = extractErrorMessage(source).toLocaleLowerCase(); + if ( + /\b(payment required|insufficient (?:credits?|balance|funds)|usage[_ -]?limit[_ -]?exceeded)\b/.test( + message + ) + ) { + return 'Assistant request failed: insufficient credits'; + } + if (/\b(model (?:was )?not found|unknown model|invalid model)\b/.test(message)) { + return 'Assistant request failed: model not found'; + } + if (/\b(rate limit|rate_limit|too many requests|429)\b/.test(message)) { + return 'Assistant request was rate limited'; + } + if (/\b(timed? out|timeout|deadline exceeded)\b/.test(message)) { + return 'Assistant request timed out'; + } + if (/\b(unauthorized|forbidden|authorization|authentication|401|403)\b/.test(message)) { + return 'Assistant request was not authorized'; + } + if (/\b(invalid request|bad request|malformed request|400)\b/.test(message)) { + return 'Assistant request was invalid'; + } + if (/\b(service unavailable|temporarily unavailable|overloaded|502|503|504)\b/.test(message)) { + return 'Assistant service is unavailable'; + } + return GENERIC_FAILURE_MESSAGES.assistant_error; +} + +function extractErrorMessage(source: unknown): string { + if (typeof source === 'string') return source; + if (typeof source !== 'object' || source === null) return ''; + if ('data' in source && typeof source.data === 'object' && source.data !== null) { + if ('message' in source.data && typeof source.data.message === 'string') { + return source.data.message; + } + } + if ('message' in source && typeof source.message === 'string') return source.message; + return ''; +} + +function boundedWorkspaceMessage(subtype: WorkspaceFailureSubtype, safeDetail?: string): string { + const genericMessage = workspaceFailureMessage(subtype); + const detail = safeDetail?.trim(); + if (!detail) return genericMessage; + if (detail.toLocaleLowerCase().includes(genericMessage.toLocaleLowerCase())) { + return detail.slice(0, SAFE_FAILURE_MESSAGE_MAX_LENGTH); + } + const prefix = `${genericMessage}: `; + return `${prefix}${detail.slice(0, SAFE_FAILURE_MESSAGE_MAX_LENGTH - prefix.length)}`; +} + +export function projectSafeFailure( + source: SafeFailureProjectionSource +): SafeFailureProjection | undefined { + const subtype = + source.failureCode === 'workspace_setup_failed' ? source.failureSubtype : undefined; + const suppliedMessage = source.safeFailureMessage + ?.trim() + .slice(0, SAFE_FAILURE_MESSAGE_MAX_LENGTH); + const message = subtype + ? boundedWorkspaceMessage(subtype, suppliedMessage) + : suppliedMessage || + (source.failureCode === undefined ? undefined : genericFailureMessage(source.failureCode)); + + if ( + source.failureStage === undefined && + source.failureCode === undefined && + subtype === undefined && + source.attempts === undefined && + message === undefined + ) { + return undefined; + } + + return { + ...(source.failureStage === undefined ? {} : { stage: source.failureStage }), + ...(source.failureCode === undefined ? {} : { code: source.failureCode }), + ...(subtype === undefined ? {} : { subtype }), + ...(source.attempts === undefined ? {} : { attempts: source.attempts }), + ...(message === undefined ? {} : { message }), + }; +} diff --git a/services/cloud-agent-next/src/session/session-message-queue.test.ts b/services/cloud-agent-next/src/session/session-message-queue.test.ts index 005c796326..55b746e71b 100644 --- a/services/cloud-agent-next/src/session/session-message-queue.test.ts +++ b/services/cloud-agent-next/src/session/session-message-queue.test.ts @@ -8,6 +8,7 @@ import type { import type { SessionMetadata } from '../persistence/session-metadata.js'; import { SANDBOX_WORKSPACE_PROBE_TIMEOUT_MESSAGE } from '../sandbox-recovery.js'; import type { SessionId, UserId } from '../types/ids.js'; +import { buildCloudMessageFailedPayload } from './message-settlement-outbox.js'; import { createSessionMessageQueue, flushNextPendingSessionMessage, @@ -255,6 +256,37 @@ describe('recordPendingFlushFailure backoff progression', () => { expect(delays).toEqual([2_000, undefined]); }); + it('honors an explicit non-retryable override while preserving workspace failure details', async () => { + const storage = createMemoryStorage(); + const message = createPendingSessionMessage({ + messageId: 'msg_018f1e2d3c4bBranchMissingA', + role: 'user', + content: 'test', + createdAt: 1, + }); + await storePendingSessionMessage(storage, message); + + const result = await recordPendingFlushFailure(storage, message, 'branch missing', 100_000, { + policy: 'cold-init', + code: 'WORKSPACE_SETUP_FAILED', + subtype: 'git_branch_missing', + safeFailureMessage: 'Requested repository branch was not found', + retryable: false, + }); + + expect(result).toMatchObject({ + attempts: 1, + exhausted: true, + nextFlushAttemptAt: undefined, + message: { + lastFlushError: 'branch missing', + lastFlushFailureCode: 'WORKSPACE_SETUP_FAILED', + lastFlushFailureSubtype: 'git_branch_missing', + safeFailureMessage: 'Requested repository branch was not found', + }, + }); + }); + it('does not retry non-retryable failure codes', async () => { const storage = createMemoryStorage(); const message = createPendingSessionMessage({ @@ -1394,10 +1426,48 @@ describe('SessionMessageQueue', () => { }); }); + it('terminalizes a non-retryable branch-missing failure on its first attempt', async () => { + const error = 'Requested repository branch was not found'; + const harness = createQueueHarness({ + deliver: async () => + Promise.reject( + ExecutionError.workspaceSetupFailed(error, undefined, { + subtype: 'git_branch_missing', + safeFailureMessage: error, + retryable: false, + }) + ), + }); + await harness.queue.admitSubmittedMessage({ + userId: 'user_test' as UserId, + turn: { type: 'prompt', id: FIRST_MESSAGE_ID, prompt: 'checkout strict branch' }, + }); + + await harness.queue.drainNextPendingMessage(); + + const [pending] = await listPendingSessionMessages(harness.storage); + expect(pending).toBeUndefined(); + expect(harness.terminalizations).toHaveLength(1); + expect(harness.terminalizations[0]?.params).toMatchObject({ + kind: 'failed', + error, + failureStage: 'pre_dispatch', + failureCode: 'workspace_setup_failed', + failureSubtype: 'git_branch_missing', + safeFailureMessage: error, + }); + }); + it('preserves a thrown workspace setup failure through retry exhaustion', async () => { const error = 'Git clone failed: No space left on device'; const harness = createQueueHarness({ - deliver: async () => Promise.reject(ExecutionError.workspaceSetupFailed(error)), + deliver: async () => + Promise.reject( + ExecutionError.workspaceSetupFailed(error, undefined, { + subtype: 'sandbox_storage_full', + safeFailureMessage: 'Sandbox storage is full', + }) + ), }); await harness.queue.admitSubmittedMessage({ userId: 'user_test' as UserId, @@ -1408,6 +1478,8 @@ describe('SessionMessageQueue', () => { const [pending] = await listPendingSessionMessages(harness.storage); expect(pending?.lastFlushFailureCode).toBe('WORKSPACE_SETUP_FAILED'); expect(pending?.lastFlushError).toBe(error); + expect(pending?.lastFlushFailureSubtype).toBe('sandbox_storage_full'); + expect(pending?.safeFailureMessage).toBe('Sandbox storage is full'); if (pending?.nextFlushAttemptAt === undefined) { throw new Error('Expected workspace setup failure to be retried before terminalization'); } @@ -1421,6 +1493,8 @@ describe('SessionMessageQueue', () => { error, failureStage: 'pre_dispatch', failureCode: 'workspace_setup_failed', + failureSubtype: 'sandbox_storage_full', + safeFailureMessage: 'Sandbox storage is full', }); }); @@ -1452,7 +1526,7 @@ describe('SessionMessageQueue', () => { }); }); - it('clears an earlier typed cause when exhaustion becomes ambiguous', async () => { + it('preserves an earlier workspace setup cause when exhaustion becomes ambiguous', async () => { const deliver = vi .fn<(_plan: MessageDeliveryRequest) => Promise>() .mockRejectedValueOnce( @@ -1479,8 +1553,9 @@ describe('SessionMessageQueue', () => { expect(harness.terminalizations.at(-1)?.params).toMatchObject({ kind: 'failed', + error: 'workspace temporarily unavailable', failureStage: 'pre_dispatch', - failureCode: 'delivery_failure_unknown', + failureCode: 'workspace_setup_failed', }); }); @@ -1512,11 +1587,21 @@ describe('SessionMessageQueue', () => { terminalAt: 30, completionSource: 'delivery_failure', failureReason: 'exhausted', - error: 'delivery exhausted', + error: 'delivery exhausted with token=raw-secret', + failureStage: 'pre_dispatch', + failureCode: 'workspace_setup_failed', + failureSubtype: 'git_clone_timeout', + safeFailureMessage: 'Clone exceeded the safe deadline', attempts: 2, }); const snapshots = await harness.queue.snapshotForStreamConnect(); + const persisted = await getSessionMessageState(harness.storage, FIRST_MESSAGE_ID); + expect(persisted).toBeDefined(); + if (!persisted) return; + const livePayload = buildCloudMessageFailedPayload(persisted); + expect(snapshots[0]?.terminalFailure).toMatchObject(livePayload); + expect(JSON.stringify({ snapshots, livePayload })).not.toContain('raw-secret'); expect(snapshots).toEqual([ { @@ -1524,11 +1609,22 @@ describe('SessionMessageQueue', () => { content: 'failed before acceptance', timestamp: 10, terminalFailure: { + messageId: FIRST_MESSAGE_ID, status: 'failed', + delivery: 'queued', + accepted: false, completionSource: 'delivery_failure', reason: 'exhausted', - error: 'delivery exhausted', + error: 'Repository clone timed out: Clone exceeded the safe deadline', attempts: 2, + + failure: { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + attempts: 2, + message: 'Repository clone timed out: Clone exceeded the safe deadline', + }, timestamp: 30, }, }, diff --git a/services/cloud-agent-next/src/session/session-message-queue.ts b/services/cloud-agent-next/src/session/session-message-queue.ts index 5ddc603a77..6efab7d068 100644 --- a/services/cloud-agent-next/src/session/session-message-queue.ts +++ b/services/cloud-agent-next/src/session/session-message-queue.ts @@ -49,6 +49,7 @@ import { type SessionMessageState, } from './session-message-state.js'; import type { QueuedMessageSnapshot } from '../websocket/stream.js'; +import { buildCloudMessageFailedPayload } from './message-settlement-outbox.js'; export const PENDING_FLUSH_DEBOUNCE_MS = 1_000; @@ -207,7 +208,7 @@ function classifyDeliveryFailure(code: PendingFlushFailureCode | undefined): { } function knownPreDispatchExecutionFailureCode(error: unknown): RetryableResultCode | undefined { - if (!isExecutionError(error) || !error.retryable) return undefined; + if (!isExecutionError(error)) return undefined; switch (error.code) { case 'SANDBOX_CONNECT_FAILED': case 'WORKSPACE_SETUP_FAILED': @@ -472,7 +473,13 @@ export async function flushNextPendingSessionMessage(params: { message, error instanceof Error ? error.message : String(error), Date.now(), - { policy, code: code ?? 'UNKNOWN' } + { + policy, + code: code ?? 'UNKNOWN', + subtype: isExecutionError(error) ? error.workspaceFailureSubtype : undefined, + safeFailureMessage: isExecutionError(error) ? error.safeFailureMessage : undefined, + retryable: isExecutionError(error) ? error.retryable : undefined, + } ); return toFailureResult(failure, totalCount); } @@ -954,6 +961,8 @@ export function createSessionMessageQueue( error: flushResult.message.lastFlushError ?? 'Pending message delivery failed', completionSource: 'delivery_failure', ...failure, + failureSubtype: flushResult.message.lastFlushFailureSubtype, + safeFailureMessage: flushResult.message.safeFailureMessage, attempts: flushResult.attempts, }, { allowIdleBatchWithoutObservedIdle: true } @@ -1011,11 +1020,7 @@ export function createSessionMessageQueue( content: state.prompt, timestamp: state.queuedAt ?? state.createdAt, terminalFailure: { - status: state.status, - completionSource: state.completionSource, - reason: state.failureReason, - error: state.error, - attempts: state.attempts, + ...buildCloudMessageFailedPayload(state), timestamp: state.terminalAt ?? state.createdAt, }, })), diff --git a/services/cloud-agent-next/src/session/session-message-state.test.ts b/services/cloud-agent-next/src/session/session-message-state.test.ts index e696d8c732..0eab8b1ab6 100644 --- a/services/cloud-agent-next/src/session/session-message-state.test.ts +++ b/services/cloud-agent-next/src/session/session-message-state.test.ts @@ -569,6 +569,48 @@ describe('terminalizeMessageOnce', () => { expect(result.state?.failureReason).toBe('interrupted'); }); + it('stores validated workspace failure diagnostics without changing the raw error', async () => { + const storage = createFakeStorage(); + await putSessionMessageState( + storage, + createQueuedSessionMessageState(createIntent(VALID_MESSAGE_ID, 'hello'), undefined, 1000) + ); + + const result = await terminalizeMessageOnce(storage, VALID_MESSAGE_ID, { + kind: 'failed', + reason: 'exhausted', + error: 'raw compatibility error', + completionSource: 'delivery_failure', + failureCode: 'workspace_setup_failed', + failureSubtype: 'git_clone_timeout', + safeFailureMessage: 'Repository clone timed out', + }); + + expect(result.state).toMatchObject({ + error: 'raw compatibility error', + failureSubtype: 'git_clone_timeout', + safeFailureMessage: 'Repository clone timed out', + }); + }); + + it('rejects workspace failure subtypes on non-workspace failures', async () => { + const storage = createFakeStorage(); + await putSessionMessageState( + storage, + createQueuedSessionMessageState(createIntent(VALID_MESSAGE_ID, 'hello'), undefined, 1000) + ); + + await expect( + terminalizeMessageOnce(storage, VALID_MESSAGE_ID, { + kind: 'failed', + reason: 'exhausted', + completionSource: 'delivery_failure', + failureCode: 'wrapper_start_failed', + failureSubtype: 'git_clone_timeout', + }) + ).rejects.toThrow('Workspace failure subtype requires workspace_setup_failed failure code'); + }); + it('returns changed=false for unknown messageId', async () => { const storage = createFakeStorage(); const result = await terminalizeMessageOnce( diff --git a/services/cloud-agent-next/src/session/session-message-state.ts b/services/cloud-agent-next/src/session/session-message-state.ts index cf5babf0a7..2f626491f0 100644 --- a/services/cloud-agent-next/src/session/session-message-state.ts +++ b/services/cloud-agent-next/src/session/session-message-state.ts @@ -1,10 +1,21 @@ import { z } from 'zod'; +import { + CloudAgentFailureCodeSchema, + CloudAgentFailureStageSchema, + WorkspaceFailureSubtypeSchema, + type CloudAgentFailureCode, + type CloudAgentFailureStage, +} from '@kilocode/worker-utils/cloud-agent-failure'; import type { CloudAgentRunStateReport } from '@kilocode/worker-utils/cloud-agent-queue-report'; import type { CallbackTarget } from '../callbacks/index.js'; import type { ExecutionMode, SessionMessageIntent } from '../execution/types.js'; import { renderExecutionTurnContent } from '../execution/types.js'; import { AttachmentsSchema } from '../persistence/schemas.js'; import { MESSAGE_ID_FORMAT_DESCRIPTION, MESSAGE_ID_PATTERN } from './message-id.js'; +import { + WRAPPER_READY_ERROR_DETAIL_MAX_LENGTH, + type WorkspaceFailureSubtype, +} from '../shared/wrapper-bootstrap.js'; import { getWrapperRuntimeState, hasCompleteWrapperRunMessageIndex, @@ -28,50 +39,22 @@ export type SessionMessageCompletionSource = z.infer = T; type CloudAgentRunFailureStage = NonNullable; type CloudAgentRunFailureCode = NonNullable; - -export const SessionMessageFailureStageSchema = z.enum([ - 'pre_dispatch', - 'post_dispatch_no_activity', - 'agent_activity', - 'interruption', - 'unknown', -] as const satisfies readonly CloudAgentRunFailureStage[]); -export type SessionMessageFailureStage = - AssertTrue< - CloudAgentRunFailureStage extends z.infer - ? true - : false - > extends true - ? z.infer - : never; - -export const SessionMessageFailureCodeSchema = z.enum([ - 'sandbox_connect_failed', - 'workspace_setup_failed', - 'kilo_server_failed', - 'wrapper_start_failed', - 'invalid_delivery_request', - 'session_metadata_missing', - 'model_missing', - 'delivery_failure_unknown', - 'wrapper_disconnected', - 'wrapper_no_output', - 'wrapper_ping_timeout', - 'wrapper_error_before_activity', - 'assistant_error', - 'wrapper_error_after_activity', - 'missing_assistant_reply', - 'user_interrupt', - 'container_shutdown', - 'system_interrupt', - 'unclassified', -] as const satisfies readonly CloudAgentRunFailureCode[]); -export type SessionMessageFailureCode = - AssertTrue< - CloudAgentRunFailureCode extends z.infer ? true : false - > extends true - ? z.infer - : never; +type StageContractMatchesReport = AssertTrue< + CloudAgentRunFailureStage extends CloudAgentFailureStage ? true : false +>; +type CodeContractMatchesReport = AssertTrue< + CloudAgentRunFailureCode extends CloudAgentFailureCode ? true : false +>; + +export const SessionMessageFailureStageSchema = CloudAgentFailureStageSchema; +export type SessionMessageFailureStage = StageContractMatchesReport extends true + ? CloudAgentFailureStage + : never; + +export const SessionMessageFailureCodeSchema = CloudAgentFailureCodeSchema; +export type SessionMessageFailureCode = CodeContractMatchesReport extends true + ? CloudAgentFailureCode + : never; export type SessionMessageDispatchAcceptanceKind = 'observed' | 'inferred_from_terminal'; export type LegacyAdmissionConstraints = { @@ -116,6 +99,8 @@ export type SessionMessageState = { completionSource?: SessionMessageCompletionSource; failureStage?: SessionMessageFailureStage; failureCode?: SessionMessageFailureCode; + failureSubtype?: WorkspaceFailureSubtype; + safeFailureMessage?: string; error?: string; failureReason?: string; attempts?: number; @@ -219,6 +204,8 @@ export const SessionMessageStateSchema = z completionSource: SessionMessageCompletionSourceSchema.optional(), failureStage: SessionMessageFailureStageSchema.optional(), failureCode: SessionMessageFailureCodeSchema.optional(), + failureSubtype: WorkspaceFailureSubtypeSchema.optional(), + safeFailureMessage: z.string().max(WRAPPER_READY_ERROR_DETAIL_MAX_LENGTH).optional(), error: z.string().optional(), failureReason: z.string().optional(), attempts: z.number().int().nonnegative().optional(), @@ -264,7 +251,14 @@ export const SessionMessageStateSchema = z }) .optional(), }) - .passthrough(); + .passthrough() + .refine( + state => state.failureSubtype === undefined || state.failureCode === 'workspace_setup_failed', + { + message: 'Workspace failure subtype requires workspace_setup_failed failure code', + path: ['failureSubtype'], + } + ); export type SessionMessageStorage = { get(key: string): Promise; @@ -514,6 +508,8 @@ export type MarkMessageFailedParams = { completionSource: SessionMessageCompletionSource; failureStage?: SessionMessageFailureStage; failureCode?: SessionMessageFailureCode; + failureSubtype?: WorkspaceFailureSubtype; + safeFailureMessage?: string; attempts?: number; }; @@ -535,6 +531,8 @@ export async function markMessageFailed( completionSource: params.completionSource, failureStage: params.failureStage, failureCode: params.failureCode, + failureSubtype: params.failureSubtype, + safeFailureMessage: params.safeFailureMessage, attempts: params.attempts, }; await putSessionMessageState(storage, updated); @@ -722,6 +720,8 @@ export type TerminalizeParams = completionSource: SessionMessageCompletionSource; failureStage?: SessionMessageFailureStage; failureCode?: SessionMessageFailureCode; + failureSubtype?: WorkspaceFailureSubtype; + safeFailureMessage?: string; attempts?: number; } | { @@ -782,6 +782,8 @@ export async function terminalizeMessageOnce( completionSource: params.completionSource, failureStage: params.failureStage, failureCode: params.failureCode, + failureSubtype: params.failureSubtype, + safeFailureMessage: params.safeFailureMessage, attempts: params.attempts, terminalEffects, }; diff --git a/services/cloud-agent-next/src/session/wrapper-supervisor.test.ts b/services/cloud-agent-next/src/session/wrapper-supervisor.test.ts index a981375b32..05530266d6 100644 --- a/services/cloud-agent-next/src/session/wrapper-supervisor.test.ts +++ b/services/cloud-agent-next/src/session/wrapper-supervisor.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it, vi } from 'vitest'; import type { CallbackJob } from '../callbacks/types.js'; +import { WRAPPER_NO_OUTPUT_TIMEOUT_MS } from './agent-runtime.js'; import type { SessionMetadata } from '../persistence/session-metadata.js'; import { createMessageSettlementOutbox, @@ -572,6 +573,134 @@ describe('WrapperSupervisor', () => { } ); + it.each([ + 'Payment Required', + 'usage_limit_exceeded', + 'Too Many Requests', + 'Model not found: kilo/anthropic/claude-haiku-4.5', + ])('classifies an explicit assistant request failure as assistant_error: %s', async error => { + const harness = createHarness([liveRuntimeState(), OWNED_WRAPPER_LEASE]); + await putSessionMessageState(harness.storage, acceptedMessage()); + + await harness.supervisor.onTerminalEvent({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'failed', + error, + errorSource: 'assistant', + }); + + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'failed', + failureReason: 'assistant_error', + error, + completionSource: 'wrapper_failure', + failureStage: 'agent_activity', + failureCode: 'assistant_error', + safeFailureMessage: + error === 'Payment Required' || error === 'usage_limit_exceeded' + ? 'Assistant request failed: insufficient credits' + : error === 'Too Many Requests' + ? 'Assistant request was rate limited' + : 'Assistant request failed: model not found', + }); + }); + + it.each([ + { + label: 'before activity', + message: acceptedMessage(), + failureStage: 'post_dispatch_no_activity' as const, + failureCode: 'wrapper_error_before_activity' as const, + }, + { + label: 'after activity', + message: { ...acceptedMessage(), agentActivityObservedAt: 2_500 }, + failureStage: 'agent_activity' as const, + failureCode: 'wrapper_error_after_activity' as const, + }, + ])( + 'keeps a genuine wrapper terminal failure classified as wrapper error $label', + async testCase => { + const harness = createHarness([liveRuntimeState(), OWNED_WRAPPER_LEASE]); + await putSessionMessageState(harness.storage, testCase.message); + + await harness.supervisor.onTerminalEvent({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'failed', + error: 'Wrapper process exited unexpectedly', + }); + + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'failed', + failureReason: 'wrapper_error', + failureStage: testCase.failureStage, + failureCode: testCase.failureCode, + }); + } + ); + + it.each(['SIGTERM', 'SIGINT'])( + 'classifies container shutdown %s separately from other system interruptions', + async signal => { + const harness = createHarness([liveRuntimeState(), OWNED_WRAPPER_LEASE]); + await putSessionMessageState(harness.storage, acceptedMessage()); + + await harness.supervisor.onTerminalEvent({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'interrupted', + error: `Container shutdown: ${signal}`, + }); + + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'interrupted', + error: `Container shutdown: ${signal}`, + completionSource: 'interrupt', + failureStage: 'interruption', + failureCode: 'container_shutdown', + }); + } + ); + + it('classifies structured container shutdown without parsing the error text', async () => { + const harness = createHarness([liveRuntimeState(), OWNED_WRAPPER_LEASE]); + await putSessionMessageState(harness.storage, acceptedMessage()); + + await harness.supervisor.onTerminalEvent({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'interrupted', + error: 'Wrapper received a termination signal', + interruptionSource: 'container_shutdown', + }); + + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'interrupted', + error: 'Wrapper received a termination signal', + failureStage: 'interruption', + failureCode: 'container_shutdown', + }); + }); + + it.each(['aborted via API', 'Session stopped'])( + 'keeps unrelated wrapper interruption as system_interrupt: %s', + async error => { + const harness = createHarness([liveRuntimeState(), OWNED_WRAPPER_LEASE]); + await putSessionMessageState(harness.storage, acceptedMessage()); + + await harness.supervisor.onTerminalEvent({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'interrupted', + error, + }); + + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'interrupted', + error, + failureStage: 'interruption', + failureCode: 'system_interrupt', + }); + } + ); + it.each([ { status: 'failed' as const, expected: 'failed' as const }, { status: 'interrupted' as const, expected: 'interrupted' as const }, @@ -640,14 +769,64 @@ describe('WrapperSupervisor', () => { expect(observedStopBeforeEffects).toBe(true); }); - it('fails accepted current work and requests durable cleanup on liveness expiry', async () => { + it('lets stable-idle assistant failure settle before no-output maintenance can overwrite it', async () => { + const acceptedAt = 2_000; + const providerErrorAt = acceptedAt + 300_800; + const stableIdleAt = providerErrorAt + 3_000; + const noOutputDeadlineAt = acceptedAt + WRAPPER_NO_OUTPUT_TIMEOUT_MS; const harness = createHarness([ - liveRuntimeState({ noOutputDeadlineAt: 9_000, nextPingAt: 30_000 }), + liveRuntimeState({ noOutputDeadlineAt, nextPingAt: noOutputDeadlineAt + 1 }), OWNED_WRAPPER_LEASE, ]); await putSessionMessageState(harness.storage, acceptedMessage()); - await harness.supervisor.runMaintenance(10_000); + await harness.supervisor.runMaintenance(providerErrorAt); + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'accepted', + }); + + await harness.supervisor.runMaintenance(stableIdleAt - 1); + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'accepted', + }); + + await harness.supervisor.onTerminalEvent({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'failed', + error: 'Provider request timed out', + errorSource: 'assistant', + }); + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'failed', + failureReason: 'assistant_error', + error: 'Provider request timed out', + failureCode: 'assistant_error', + }); + + await harness.supervisor.runMaintenance(noOutputDeadlineAt); + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'failed', + failureReason: 'assistant_error', + error: 'Provider request timed out', + failureCode: 'assistant_error', + }); + }); + + it('fails genuinely silent accepted work at the no-output deadline', async () => { + const acceptedAt = 2_000; + const noOutputDeadlineAt = acceptedAt + WRAPPER_NO_OUTPUT_TIMEOUT_MS; + const harness = createHarness([ + liveRuntimeState({ noOutputDeadlineAt, nextPingAt: noOutputDeadlineAt + 1 }), + OWNED_WRAPPER_LEASE, + ]); + await putSessionMessageState(harness.storage, acceptedMessage()); + + await harness.supervisor.runMaintenance(noOutputDeadlineAt - 1); + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'accepted', + }); + + await harness.supervisor.runMaintenance(noOutputDeadlineAt); const state = await getSessionMessageState(harness.storage, MESSAGE_ID); const runtimeState = await getWrapperRuntimeState(harness.storage); @@ -669,6 +848,24 @@ describe('WrapperSupervisor', () => { expect(harness.events.map(event => event.streamEventType)).toEqual(['cloud.message.failed']); }); + it('terminates an unresponsive wrapper on ping timeout before no-output expires', async () => { + const pingDeadlineAt = 92_000; + const noOutputDeadlineAt = 332_000; + const harness = createHarness([ + liveRuntimeState({ pingDeadlineAt, noOutputDeadlineAt }), + OWNED_WRAPPER_LEASE, + ]); + await putSessionMessageState(harness.storage, acceptedMessage()); + + await harness.supervisor.runMaintenance(pingDeadlineAt); + + await expect(getSessionMessageState(harness.storage, MESSAGE_ID)).resolves.toMatchObject({ + status: 'failed', + error: 'Wrapper did not respond to liveness ping', + failureCode: 'wrapper_ping_timeout', + }); + }); + it('releases a finalizing callback on liveness expiry but holds a queued follow-up until physical absence', async () => { const harness = createHarness( [ @@ -734,6 +931,18 @@ describe('WrapperSupervisor', () => { expect(harness.requestPendingDrainIfNeeded).toHaveBeenCalledOnce(); }); + it('schedules the updated no-output deadline when it is the next liveness deadline', async () => { + const noOutputDeadlineAt = 332_000; + const harness = createHarness([ + liveRuntimeState({ noOutputDeadlineAt, nextPingAt: noOutputDeadlineAt + 1 }), + ]); + await putSessionMessageState(harness.storage, acceptedMessage()); + + await expect(harness.supervisor.nextMaintenanceDeadlines()).resolves.toContain( + noOutputDeadlineAt + ); + }); + it('aggregates concurrent physical, liveness, disconnect, and cleanup deadlines', async () => { const harness = createHarness([ liveRuntimeState({ @@ -1355,7 +1564,7 @@ describe('WrapperSupervisor', () => { finalizingWrapperRunId: WRAPPER_RUN_ID, wrapperIdleDeadlineAt: 50_000, lastWrapperMessageAt: 2_000, - noOutputDeadlineAt: 302_000, + noOutputDeadlineAt: 332_000, }); }); diff --git a/services/cloud-agent-next/src/session/wrapper-supervisor.ts b/services/cloud-agent-next/src/session/wrapper-supervisor.ts index 57f36b12ef..5869fe919e 100644 --- a/services/cloud-agent-next/src/session/wrapper-supervisor.ts +++ b/services/cloud-agent-next/src/session/wrapper-supervisor.ts @@ -9,6 +9,7 @@ import type { import type { AgentRuntime } from './agent-runtime.js'; import { WRAPPER_NO_OUTPUT_TIMEOUT_MS, WRAPPER_PING_INTERVAL_MS } from './agent-runtime.js'; import type { MessageSettlementOutbox } from './message-settlement-outbox.js'; +import { classifyAssistantFailureMessage } from './safe-failure-projection.js'; import { countPendingSessionMessages, type SessionQueueStorage } from './pending-messages.js'; import type { SessionMessageQueue } from './session-message-queue.js'; import { @@ -87,6 +88,8 @@ export type WrapperTerminalEvent = { wrapperRunId: string; status: 'completed' | 'failed' | 'interrupted'; error?: string; + errorSource?: 'assistant'; + interruptionSource?: 'container_shutdown'; gateResult?: 'pass' | 'fail'; messageIds?: string[]; }; @@ -201,6 +204,18 @@ function getAssistantErrorMessage(error: unknown): string | undefined { return 'Assistant message failed'; } +function getWrapperInterruptionFailureCode( + interruptionSource: WrapperTerminalEvent['interruptionSource'], + error: string | undefined +): 'container_shutdown' | 'system_interrupt' { + if (interruptionSource === 'container_shutdown') return 'container_shutdown'; + + // Preserve classification for wrappers already running during deployment. + return error === 'Container shutdown: SIGTERM' || error === 'Container shutdown: SIGINT' + ? 'container_shutdown' + : 'system_interrupt'; +} + export function createWrapperSupervisor( dependencies: WrapperSupervisorDependencies ): WrapperSupervisor { @@ -752,6 +767,7 @@ export function createWrapperSupervisor( completionSource: 'idle_reconciliation', failureStage: 'agent_activity', failureCode: 'assistant_error', + safeFailureMessage: classifyAssistantFailureMessage(assistantError), }); } else if (assistantMessage) { await observeCorrelatedAgentActivity?.(messageId); @@ -910,7 +926,8 @@ export function createWrapperSupervisor( } async function onTerminalEvent(params: WrapperTerminalEvent): Promise { - const { wrapperRunId, status, error, gateResult, messageIds } = params; + const { wrapperRunId, status, error, errorSource, interruptionSource, gateResult, messageIds } = + params; const sessionId = getSessionIdForLogs(); const state = await getWrapperRuntimeState(storage); if ( @@ -930,7 +947,8 @@ export function createWrapperSupervisor( sessionId, wrapperRunId, status, - error, + errorSource, + interruptionSource, gateResult, messageCount: messageIds?.length, }) @@ -946,6 +964,19 @@ export function createWrapperSupervisor( const acceptedMessages = await listNonTerminalAcceptedMessages(storage, wrapperRunId); for (const message of acceptedMessages) { if (status === 'failed') { + if (errorSource === 'assistant') { + await messageSettlementOutbox.terminalizeSessionMessageOnce(message.messageId, { + kind: 'failed', + reason: 'assistant_error', + error: error ?? 'Assistant request failed', + completionSource: 'wrapper_failure', + failureStage: 'agent_activity', + failureCode: 'assistant_error', + safeFailureMessage: classifyAssistantFailureMessage(error), + }); + continue; + } + const activityObserved = message.agentActivityObservedAt !== undefined; await messageSettlementOutbox.terminalizeSessionMessageOnce(message.messageId, { kind: 'failed', @@ -965,7 +996,7 @@ export function createWrapperSupervisor( error: error ?? 'Wrapper interrupted', completionSource: 'interrupt', failureStage: 'interruption', - failureCode: 'system_interrupt', + failureCode: getWrapperInterruptionFailureCode(interruptionSource, error), }); } } diff --git a/services/cloud-agent-next/src/shared/protocol.ts b/services/cloud-agent-next/src/shared/protocol.ts index 3d417ac574..cae3a92484 100644 --- a/services/cloud-agent-next/src/shared/protocol.ts +++ b/services/cloud-agent-next/src/shared/protocol.ts @@ -19,8 +19,8 @@ export type StreamEventType = | 'status' // Status message (e.g., "Auto-committing...") | 'heartbeat' // Keep-alive during idle periods | 'pong' // Response to ping command from DO - | 'error' // Error occurred { error: string, fatal: boolean } - | 'interrupted' // User/signal interrupt + | 'error' // Error occurred { error: string, fatal: boolean, errorSource?: 'assistant' } + | 'interrupted' // User/signal interrupt { reason?: string, interruptionSource?: 'container_shutdown' } | 'complete' // Execution finished { exitCode, currentBranch?, messageIds } | 'wrapper_finalizing' // Wrapper sealed the current run batch before post-processing | 'wrapper_resumed' // Wrapper reconnected after disconnect (may have lost events) diff --git a/services/cloud-agent-next/src/shared/wrapper-bootstrap.ts b/services/cloud-agent-next/src/shared/wrapper-bootstrap.ts index 3894556fc8..7b5921e7a9 100644 --- a/services/cloud-agent-next/src/shared/wrapper-bootstrap.ts +++ b/services/cloud-agent-next/src/shared/wrapper-bootstrap.ts @@ -1,3 +1,12 @@ +import { isWorkspaceFailureSubtype } from '@kilocode/worker-utils/cloud-agent-failure'; + +export { + isWorkspaceFailureSubtype, + WORKSPACE_FAILURE_SUBTYPES, + type WorkspaceFailureSubtype, +} from '@kilocode/worker-utils/cloud-agent-failure'; +import type { WorkspaceFailureSubtype } from '@kilocode/worker-utils/cloud-agent-failure'; + export type WrapperCommitCoAuthor = { name: string; email: string; @@ -145,6 +154,9 @@ export type WrapperSessionReadySuccessResponse = { workspaceReady: WrapperWorkspaceReady; }; +export const WRAPPER_READY_ERROR_MESSAGE_MAX_LENGTH = 4_096; +export const WRAPPER_READY_ERROR_DETAIL_MAX_LENGTH = 8_192; + export type WrapperSessionReadyErrorResponse = { status: 'error'; error: { @@ -153,7 +165,9 @@ export type WrapperSessionReadyErrorResponse = { | 'WRAPPER_FINALIZING' | 'WORKSPACE_SETUP_FAILED' | 'KILO_SERVER_FAILED'; + subtype?: WorkspaceFailureSubtype; message: string; + detail?: string; retryable?: boolean; wrapperRunId?: string; }; @@ -171,6 +185,56 @@ function hasString(value: Record, key: string): boolean { return typeof value[key] === 'string' && value[key].length > 0; } +export type FlattenedWrapperSessionReadyError = { + error: WrapperSessionReadyErrorResponse['error']['code']; + message: string; +} & Omit; + +export function parseWrapperSessionReadyErrorResponse( + value: unknown +): FlattenedWrapperSessionReadyError | undefined { + if (!isRecord(value) || !hasString(value, 'error') || !hasString(value, 'message')) + return undefined; + if ( + typeof value.message !== 'string' || + value.message.length > WRAPPER_READY_ERROR_MESSAGE_MAX_LENGTH + ) { + return undefined; + } + if (value.subtype !== undefined && !isWorkspaceFailureSubtype(value.subtype)) return undefined; + if ( + value.detail !== undefined && + (typeof value.detail !== 'string' || + value.detail.length > WRAPPER_READY_ERROR_DETAIL_MAX_LENGTH) + ) { + return undefined; + } + if (value.retryable !== undefined && typeof value.retryable !== 'boolean') return undefined; + if (value.wrapperRunId !== undefined && typeof value.wrapperRunId !== 'string') return undefined; + if ( + value.error !== 'INVALID_REQUEST' && + value.error !== 'WRAPPER_FINALIZING' && + value.error !== 'WORKSPACE_SETUP_FAILED' && + value.error !== 'KILO_SERVER_FAILED' + ) { + return undefined; + } + return { + error: value.error, + message: value.message, + ...(value.subtype !== undefined ? { subtype: value.subtype } : {}), + ...(value.detail !== undefined ? { detail: value.detail } : {}), + ...(value.retryable !== undefined ? { retryable: value.retryable } : {}), + ...(value.wrapperRunId !== undefined ? { wrapperRunId: value.wrapperRunId } : {}), + }; +} + +export function isWrapperSessionReadyErrorResponse( + value: unknown +): value is FlattenedWrapperSessionReadyError { + return parseWrapperSessionReadyErrorResponse(value) !== undefined; +} + function isWrapperDevContainerMetadata(value: unknown): value is WrapperDevContainerMetadata { if (!isRecord(value)) return false; if (!hasString(value, 'workspacePath')) return false; diff --git a/services/cloud-agent-next/src/telemetry/queue-reports.test.ts b/services/cloud-agent-next/src/telemetry/queue-reports.test.ts index 7e2399a5a1..3a92a6a447 100644 --- a/services/cloud-agent-next/src/telemetry/queue-reports.test.ts +++ b/services/cloud-agent-next/src/telemetry/queue-reports.test.ts @@ -2,6 +2,23 @@ import { describe, expect, it, vi } from 'vitest'; import type { CloudAgentQueueReport } from '@kilocode/worker-utils/cloud-agent-queue-report'; import { emitRunStateReport } from './queue-reports.js'; import type { SessionMessageState } from '../session/session-message-state.js'; +import type { WorkspaceFailureSubtype } from '../shared/wrapper-bootstrap.js'; + +const WORKSPACE_FAILURE_DIAGNOSTICS = [ + ['git_clone_timeout', 'Repository clone timed out'], + ['git_checkout_timeout', 'Repository checkout timed out'], + ['git_authentication_failed', 'Repository authentication failed'], + ['git_network_failed', 'Repository network request failed'], + ['git_pack_corrupt', 'Repository data is corrupt'], + ['git_checkout_conflict', 'Repository checkout conflict'], + ['git_branch_missing', 'Requested repository branch was not found'], + ['sandbox_storage_full', 'Workspace setup failed: sandbox storage full'], + ['kilo_import_timeout', 'Session import timed out'], + ['kilo_import_failed', 'Session import failed'], + ['setup_command_timeout', 'Setup command timed out'], + ['setup_command_failed', 'Setup command failed'], + ['workspace_setup_unknown', 'Workspace setup failed'], +] satisfies ReadonlyArray; const state: SessionMessageState = { messageId: 'msg_018f1e2d3c4bReportMsgAbCdEF', @@ -63,29 +80,78 @@ describe('Cloud Agent report emitter', () => { expect(JSON.stringify(reports)).not.toContain('model/test'); }); - it('emits a safe disk-full diagnostic for a workspace setup failure', async () => { + it.each(WORKSPACE_FAILURE_DIAGNOSTICS)( + 'emits the allowlisted diagnostic for workspace subtype %s', + async (failureSubtype, expectedDiagnostic) => { + const reports: CloudAgentQueueReport[] = []; + await emitRunStateReport({ + queue: { send: async report => void reports.push(report) }, + cloudAgentSessionId: 'agent_report', + state: { + ...state, + acceptedAt: undefined, + dispatchAcceptanceKind: undefined, + agentActivityObservedAt: undefined, + wrapperRunId: undefined, + failureStage: 'pre_dispatch', + failureCode: 'workspace_setup_failed', + failureSubtype, + error: 'raw error with credential password=hunter2 and process output', + safeFailureMessage: 'bounded but secret-bearing token=super-secret', + }, + }); + + expect(reports[0]?.run.diagnostic).toEqual({ + errorMessageRedacted: expectedDiagnostic, + errorExpiresAt: new Date(5 + 30 * 24 * 60 * 60 * 1000).toISOString(), + }); + expect(JSON.stringify(reports)).not.toContain('hunter2'); + expect(JSON.stringify(reports)).not.toContain('super-secret'); + expect(JSON.stringify(reports)).not.toContain('process output'); + } + ); + + it.each([ + ['absent', undefined], + ['unknown', 'future_workspace_subtype'], + ])('falls back for an %s workspace subtype', async (_name, failureSubtype) => { const reports: CloudAgentQueueReport[] = []; await emitRunStateReport({ queue: { send: async report => void reports.push(report) }, cloudAgentSessionId: 'agent_report', state: { ...state, - acceptedAt: undefined, - dispatchAcceptanceKind: undefined, - agentActivityObservedAt: undefined, - wrapperRunId: undefined, failureStage: 'pre_dispatch', failureCode: 'workspace_setup_failed', - error: 'Git clone failed: No space left on device while checking out secret-repository', + failureSubtype: failureSubtype as WorkspaceFailureSubtype | undefined, + error: 'credential=raw-secret', + safeFailureMessage: 'provider body with token=safe-message-secret', }, }); - expect(reports[0]?.run.diagnostic).toEqual({ - errorMessageRedacted: 'Workspace setup failed: sandbox storage full', - errorExpiresAt: new Date(5 + 30 * 24 * 60 * 60 * 1000).toISOString(), + expect(reports[0]?.run.diagnostic?.errorMessageRedacted).toBe('Workspace setup failed'); + expect(JSON.stringify(reports)).not.toContain('raw-secret'); + expect(JSON.stringify(reports)).not.toContain('safe-message-secret'); + }); + + it('classifies storage-full from subtype independently of raw error text', async () => { + const reports: CloudAgentQueueReport[] = []; + await emitRunStateReport({ + queue: { send: async report => void reports.push(report) }, + cloudAgentSessionId: 'agent_report', + state: { + ...state, + failureStage: 'pre_dispatch', + failureCode: 'workspace_setup_failed', + failureSubtype: 'sandbox_storage_full', + error: 'unrelated secret-bearing failure text', + }, }); - expect(JSON.stringify(reports)).not.toContain('secret-repository'); - expect(JSON.stringify(reports)).not.toContain('No space left on device'); + + expect(reports[0]?.run.diagnostic?.errorMessageRedacted).toBe( + 'Workspace setup failed: sandbox storage full' + ); + expect(JSON.stringify(reports)).not.toContain('unrelated secret-bearing failure text'); }); it('emits a safe insufficient-credit diagnostic for the wrapper terminal text', async () => { diff --git a/services/cloud-agent-next/src/telemetry/queue-reports.ts b/services/cloud-agent-next/src/telemetry/queue-reports.ts index 558c1044fd..0f6f7e678b 100644 --- a/services/cloud-agent-next/src/telemetry/queue-reports.ts +++ b/services/cloud-agent-next/src/telemetry/queue-reports.ts @@ -5,7 +5,9 @@ import { type CloudAgentRunStateReport, } from '@kilocode/worker-utils/cloud-agent-queue-report'; import { logger } from '../logger.js'; +import { workspaceFailureMessage } from '../session/safe-failure-projection.js'; import type { SessionMessageState } from '../session/session-message-state.js'; +import { isWorkspaceFailureSubtype } from '../shared/wrapper-bootstrap.js'; type ReportQueue = { send(report: CloudAgentQueueReport): Promise; @@ -68,11 +70,10 @@ function diagnosticForFailedRun( let errorMessageRedacted = state.failureCode === undefined ? undefined : FAILED_RUN_DIAGNOSTIC_MESSAGES[state.failureCode]; - if ( - state.failureCode === 'workspace_setup_failed' && - state.error?.toLowerCase().includes('no space left on device') - ) { - errorMessageRedacted = 'Workspace setup failed: sandbox storage full'; + if (state.failureCode === 'workspace_setup_failed') { + errorMessageRedacted = isWorkspaceFailureSubtype(state.failureSubtype) + ? workspaceFailureMessage(state.failureSubtype) + : 'Workspace setup failed'; } else if (isKnownInsufficientCreditFailure(state)) { errorMessageRedacted = 'Model request failed: insufficient credits'; } diff --git a/services/cloud-agent-next/src/websocket/ingest.test.ts b/services/cloud-agent-next/src/websocket/ingest.test.ts index 0fcaf2d547..22530aac49 100644 --- a/services/cloud-agent-next/src/websocket/ingest.test.ts +++ b/services/cloud-agent-next/src/websocket/ingest.test.ts @@ -982,26 +982,19 @@ describe('createIngestHandler', () => { await handler.handleIngestMessage(ws, message); expect(doContext.terminalizeSessionMessageOnce).not.toHaveBeenCalled(); - expect(warn).toHaveBeenCalledWith( - 'Invalid cloud.message.completed event payload', - expect.anything() - ); + expect(warn).toHaveBeenCalledWith('Invalid cloud.message.completed event payload'); warn.mockRestore(); }); - it('terminalizes as failed on assistant message.updated with error', async () => { + it('publishes and persists a safe assistant message error while terminalizing with raw data', async () => { const state = createFakeState(); const doContext = createNewPathDOContext(); - const handler = createIngestHandler( - state, - createFakeEventQueries(), - SESSION_ID, - vi.fn(), - doContext - ); + const eventQueries = createFakeEventQueries(); + const broadcast = vi.fn(); + const handler = createIngestHandler(state, eventQueries, SESSION_ID, broadcast, doContext); const ws = createFakeWebSocket(makeNewPathAttachment()); - + const secretError = '429 rate limit exceeded provider-body=secret-token'; const message = JSON.stringify({ streamEventType: 'kilocode', data: { @@ -1009,9 +1002,10 @@ describe('createIngestHandler', () => { properties: { info: { id: 'asst_333', + sessionID: 'kilo_session_333', role: 'assistant', parentID: 'msg_user_333', - error: 'rate limit exceeded', + error: { data: { message: secretError, responseBody: 'secret-response' } }, }, }, }, @@ -1020,18 +1014,78 @@ describe('createIngestHandler', () => { await handler.handleIngestMessage(ws, message); + const safePayload = JSON.stringify({ + event: 'message.updated', + properties: { + info: { + id: 'asst_333', + sessionID: 'kilo_session_333', + role: 'assistant', + parentID: 'msg_user_333', + error: 'Assistant request was rate limited', + }, + }, + }); + expect(eventQueries.upsert).toHaveBeenCalledWith( + expect.objectContaining({ entityId: 'message/asst_333', payload: safePayload }) + ); + expect(broadcast).toHaveBeenCalledWith(expect.objectContaining({ payload: safePayload })); + expect(JSON.stringify(vi.mocked(broadcast).mock.calls)).not.toContain('secret-token'); + expect(JSON.stringify(vi.mocked(eventQueries.upsert).mock.calls)).not.toContain( + 'secret-response' + ); expect(doContext.terminalizeSessionMessageOnce).toHaveBeenCalledWith( 'msg_user_333', expect.objectContaining({ kind: 'failed', assistantMessageId: 'asst_333', completionSource: 'assistant_message_event', - error: 'rate limit exceeded', + error: secretError, + safeFailureMessage: 'Assistant request was rate limited', }), WRAPPER_RUN_ID ); }); + it('publishes and persists a safe session error with session correlation intact', async () => { + const eventQueries = createFakeEventQueries(); + const broadcast = vi.fn(); + const handler = createIngestHandler( + createFakeState(), + eventQueries, + SESSION_ID, + broadcast, + createNewPathDOContext() + ); + const ws = createFakeWebSocket(makeNewPathAttachment()); + + await handler.handleIngestMessage( + ws, + makeStreamMessage('kilocode', { + event: 'session.error', + properties: { + sessionID: 'kilo_session_error', + error: { data: { message: 'Payment Required api-key=secret-session-key' } }, + }, + }) + ); + + const safePayload = JSON.stringify({ + event: 'session.error', + properties: { + sessionID: 'kilo_session_error', + error: 'Assistant request failed: insufficient credits', + }, + }); + expect(eventQueries.insert).toHaveBeenCalledWith( + expect.objectContaining({ payload: safePayload }) + ); + expect(broadcast).toHaveBeenCalledWith(expect.objectContaining({ payload: safePayload })); + expect(JSON.stringify(vi.mocked(eventQueries.insert).mock.calls)).not.toContain( + 'secret-session-key' + ); + }); + it('terminalizes object-shaped assistant errors with completion as failed', async () => { const state = createFakeState(); const doContext = createNewPathDOContext(); @@ -1070,6 +1124,7 @@ describe('createIngestHandler', () => { assistantMessageId: 'asst_object_completed', completionSource: 'assistant_message_event', error: 'provider failed', + safeFailureMessage: 'Assistant request failed', }), WRAPPER_RUN_ID ); @@ -1112,6 +1167,7 @@ describe('createIngestHandler', () => { assistantMessageId: 'asst_object_pending', completionSource: 'assistant_message_event', error: 'provider failed early', + safeFailureMessage: 'Assistant request failed', }), WRAPPER_RUN_ID ); @@ -1219,6 +1275,141 @@ describe('createIngestHandler', () => { messageIds: undefined, }); }); + + it('publishes and persists a safe fatal assistant wrapper error while forwarding raw data', async () => { + const doContext = createNewPathDOContext(); + const eventQueries = createFakeEventQueries(); + const broadcast = vi.fn(); + const handler = createIngestHandler( + createFakeState(), + eventQueries, + SESSION_ID, + broadcast, + doContext + ); + const ws = createFakeWebSocket(makeNewPathAttachment()); + const rawError = 'Payment Required provider-body=secret-wrapper-token'; + + await handler.handleIngestMessage( + ws, + makeStreamMessage('error', { + fatal: true, + error: rawError, + message: 'another secret', + errorSource: 'assistant', + arbitrary: 'must be dropped', + }) + ); + + const safeMessage = 'Assistant request failed: insufficient credits'; + const safePayload = JSON.stringify({ + fatal: true, + errorSource: 'assistant', + error: safeMessage, + message: safeMessage, + }); + expect(eventQueries.insert).toHaveBeenCalledWith( + expect.objectContaining({ payload: safePayload }) + ); + expect(broadcast).toHaveBeenCalledWith(expect.objectContaining({ payload: safePayload })); + expect(broadcast).toHaveBeenCalledWith( + expect.objectContaining({ + stream_event_type: 'cloud.status', + payload: JSON.stringify({ cloudStatus: { type: 'error', message: safeMessage } }), + }) + ); + expect(JSON.stringify(vi.mocked(broadcast).mock.calls)).not.toContain('secret-wrapper-token'); + expect(JSON.stringify(vi.mocked(eventQueries.insert).mock.calls)).not.toContain( + 'must be dropped' + ); + expect(doContext.wrapperSupervisor.onTerminalEvent).toHaveBeenCalledWith({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'failed', + error: rawError, + errorSource: 'assistant', + }); + }); + + it('keeps unclassified fatal events as wrapper failures', async () => { + const doContext = createNewPathDOContext(); + const handler = createIngestHandler( + createFakeState(), + createFakeEventQueries(), + SESSION_ID, + vi.fn(), + doContext + ); + const ws = createFakeWebSocket(makeNewPathAttachment()); + + await handler.handleIngestMessage( + ws, + makeStreamMessage('error', { + fatal: true, + error: 'Wrapper process exited unexpectedly', + }) + ); + + expect(doContext.wrapperSupervisor.onTerminalEvent).toHaveBeenCalledWith({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'failed', + error: 'Wrapper process exited unexpectedly', + errorSource: undefined, + }); + }); + + it.each([ + { + name: 'structured container shutdown', + input: { + reason: 'Container shutdown: SIGTERM secret-container-reason', + exitCode: 143, + interruptionSource: 'container_shutdown' as const, + arbitrary: 'drop-me', + }, + publicReason: 'Container shutdown', + }, + { + name: 'untrusted wrapper interruption', + input: { + reason: 'user token=secret-interruption-reason', + exitCode: 1, + arbitrary: 'drop-me', + }, + publicReason: 'Wrapper interrupted', + }, + ])('publishes and persists a bounded $name reason', async ({ input, publicReason }) => { + const doContext = createNewPathDOContext(); + const eventQueries = createFakeEventQueries(); + const broadcast = vi.fn(); + const handler = createIngestHandler( + createFakeState(), + eventQueries, + SESSION_ID, + broadcast, + doContext + ); + const ws = createFakeWebSocket(makeNewPathAttachment()); + + await handler.handleIngestMessage(ws, makeStreamMessage('interrupted', input)); + + const safePayload = JSON.stringify({ + reason: publicReason, + exitCode: input.exitCode, + ...(input.interruptionSource ? { interruptionSource: input.interruptionSource } : {}), + }); + expect(eventQueries.insert).toHaveBeenCalledWith( + expect.objectContaining({ payload: safePayload }) + ); + expect(broadcast).toHaveBeenCalledWith(expect.objectContaining({ payload: safePayload })); + expect(JSON.stringify(vi.mocked(broadcast).mock.calls)).not.toContain('secret-'); + expect(JSON.stringify(vi.mocked(eventQueries.insert).mock.calls)).not.toContain('drop-me'); + expect(doContext.wrapperSupervisor.onTerminalEvent).toHaveBeenCalledWith({ + wrapperRunId: WRAPPER_RUN_ID, + status: 'interrupted', + error: input.reason, + interruptionSource: input.interruptionSource, + }); + }); }); describe('handleIngestRequest — new-path sessionId validation', () => { diff --git a/services/cloud-agent-next/src/websocket/ingest.ts b/services/cloud-agent-next/src/websocket/ingest.ts index 7e88e28934..0e656ccb60 100644 --- a/services/cloud-agent-next/src/websocket/ingest.ts +++ b/services/cloud-agent-next/src/websocket/ingest.ts @@ -27,8 +27,9 @@ import { import type { CompleteEventData, KilocodeEventData, CloudStatusData } from '../shared/protocol.js'; import type { SlashCommandInfo } from '../shared/slash-commands.js'; import { logger } from '../logger.js'; -import type { WrapperSupervisor } from '../session/wrapper-supervisor.js'; +import type { WrapperSupervisor, WrapperTerminalEvent } from '../session/wrapper-supervisor.js'; import type { TerminalizeParams } from '../session/session-message-state.js'; +import { classifyAssistantFailureMessage } from '../session/safe-failure-projection.js'; // --------------------------------------------------------------------------- // Ingest Attachment @@ -54,12 +55,14 @@ const kilocodeEventSchema = z const interruptedEventSchema = z.object({ reason: z.string().optional(), exitCode: z.number().optional(), + interruptionSource: z.literal('container_shutdown').optional(), }); const errorEventSchema = z.object({ fatal: z.boolean().optional(), error: z.string().optional(), message: z.string().optional(), + errorSource: z.literal('assistant').optional(), }); const cloudMessageCompletedEventSchema = z.object({ @@ -86,6 +89,78 @@ function getAssistantErrorMessage(error: unknown): string | undefined { return 'Assistant message failed'; } +function sanitizeKilocodeEventData(data: unknown): unknown { + if (typeof data !== 'object' || data === null) return data; + const eventData = data as Record; + if (typeof eventData.properties !== 'object' || eventData.properties === null) return data; + const properties = eventData.properties as Record; + + if (eventData.event === 'message.updated') { + if (typeof properties.info !== 'object' || properties.info === null) return data; + const info = properties.info as Record; + if (info.role !== 'assistant' || info.error === undefined || info.error === null) return data; + return { + ...eventData, + properties: { + ...properties, + info: { + ...info, + error: classifyAssistantFailureMessage(info.error), + }, + }, + }; + } + + if (eventData.event === 'session.error') { + return { + ...eventData, + properties: { + ...properties, + error: classifyAssistantFailureMessage(properties.error), + }, + }; + } + + return data; +} + +function sanitizePublicEventData(eventType: string, data: unknown): unknown { + if (eventType === 'kilocode') return sanitizeKilocodeEventData(data); + + if (eventType === 'error') { + const parsed = errorEventSchema.safeParse(data); + if (!parsed.success) return {}; + const rawError = parsed.data.error ?? parsed.data.message; + const safeMessage = + parsed.data.errorSource === 'assistant' + ? classifyAssistantFailureMessage(rawError) + : 'Agent wrapper failed'; + return { + fatal: parsed.data.fatal, + ...(parsed.data.errorSource ? { errorSource: parsed.data.errorSource } : {}), + error: safeMessage, + message: safeMessage, + }; + } + + if (eventType === 'interrupted') { + const parsed = interruptedEventSchema.safeParse(data); + if (!parsed.success) return {}; + return { + reason: + parsed.data.interruptionSource === 'container_shutdown' + ? 'Container shutdown' + : 'Wrapper interrupted', + ...(parsed.data.exitCode === undefined ? {} : { exitCode: parsed.data.exitCode }), + ...(parsed.data.interruptionSource + ? { interruptionSource: parsed.data.interruptionSource } + : {}), + }; + } + + return data; +} + // --------------------------------------------------------------------------- // Persistence Allowlists // --------------------------------------------------------------------------- @@ -188,13 +263,7 @@ export function createIngestHandler( broadcastFn: (event: StoredEvent) => void, doContext: IngestDOContext ) { - async function forwardIngestTerminalEvent(params: { - wrapperRunId: string; - status: 'completed' | 'failed' | 'interrupted'; - error?: string; - gateResult?: 'pass' | 'fail'; - messageIds?: string[]; - }): Promise { + async function forwardIngestTerminalEvent(params: WrapperTerminalEvent): Promise { await doContext.wrapperSupervisor.onTerminalEvent(params); } @@ -451,7 +520,8 @@ export function createIngestHandler( : Date.now(); const eventType = ingestEvent.streamEventType; - const payload = JSON.stringify(ingestEvent.data ?? {}); + const publicEventData = sanitizePublicEventData(eventType, ingestEvent.data ?? {}); + const payload = JSON.stringify(publicEventData); const eventTypeStr: string = eventType; const now = Date.now(); @@ -488,10 +558,7 @@ export function createIngestHandler( ingestEvent.data ); if (!parsedCloudMessageCompleted.success) { - console.warn( - 'Invalid cloud.message.completed event payload', - parsedCloudMessageCompleted.error - ); + console.warn('Invalid cloud.message.completed event payload'); return; } @@ -612,7 +679,7 @@ export function createIngestHandler( ); ws.serializeAttachment(attachment); } else { - console.warn('Invalid kilocode event payload', parsedKilocode.error); + console.warn('Invalid kilocode event payload'); } } @@ -639,6 +706,7 @@ export function createIngestHandler( assistantMessageId: typeof info?.id === 'string' ? info.id : undefined, reason: 'assistant_error', error: assistantError, + safeFailureMessage: classifyAssistantFailureMessage(assistantError), completionSource: 'assistant_message_event', }, wrapperRunId @@ -695,7 +763,7 @@ export function createIngestHandler( const parsedComplete = completeEventSchema.safeParse(ingestEvent.data); if (!parsedComplete.success) { - console.warn('Invalid complete event payload', parsedComplete.error); + console.warn('Invalid complete event payload'); return; } await handleBranchCapture(parsedComplete.data as CompleteEventData, { @@ -723,7 +791,7 @@ export function createIngestHandler( if (eventType === 'interrupted') { const parsedInterrupted = interruptedEventSchema.safeParse(ingestEvent.data); if (!parsedInterrupted.success) { - console.warn('Invalid interrupted event payload', parsedInterrupted.error); + console.warn('Invalid interrupted event payload'); return; } const interruptedError = parsedInterrupted.data.reason ?? 'User interrupted'; @@ -731,6 +799,7 @@ export function createIngestHandler( wrapperRunId, status: 'interrupted', error: interruptedError, + interruptionSource: parsedInterrupted.data.interruptionSource, }); logger .withFields({ @@ -745,19 +814,23 @@ export function createIngestHandler( if (eventType === 'error') { const parsedError = errorEventSchema.safeParse(ingestEvent.data); if (!parsedError.success) { - console.warn('Invalid error event payload', parsedError.error); + console.warn('Invalid error event payload'); return; } const errorData = parsedError.data; if (errorData.fatal) { const fatalMessage = errorData.error ?? errorData.message ?? 'Fatal error'; + const safeFatalMessage = + errorData.errorSource === 'assistant' + ? classifyAssistantFailureMessage(fatalMessage) + : 'Agent wrapper failed'; broadcastFn({ id: 0 as EventId, execution_id: eventSourceId, session_id: sessionId, stream_event_type: 'cloud.status', payload: JSON.stringify({ - cloudStatus: { type: 'error', message: fatalMessage }, + cloudStatus: { type: 'error', message: safeFatalMessage }, } satisfies CloudStatusData), timestamp, }); @@ -765,6 +838,7 @@ export function createIngestHandler( wrapperRunId, status: 'failed', error: fatalMessage, + errorSource: errorData.errorSource, }); logger .withFields({ @@ -776,21 +850,9 @@ export function createIngestHandler( .warn('Fatal wrapper error event forwarded to session coordinator'); } } - } catch (error) { - logger - .withFields({ - sessionId, - error: error instanceof Error ? error.message : String(error), - }) - .error('Error processing wrapper ingest message'); - ws.send( - JSON.stringify( - createErrorMessage( - 'WS_INTERNAL_ERROR', - error instanceof Error ? error.message : 'Failed to process event' - ) - ) - ); + } catch { + logger.withFields({ sessionId }).error('Error processing wrapper ingest message'); + ws.send(JSON.stringify(createErrorMessage('WS_INTERNAL_ERROR', 'Failed to process event'))); } }, diff --git a/services/cloud-agent-next/src/websocket/stream.ts b/services/cloud-agent-next/src/websocket/stream.ts index f7d8d8a379..4f24b07437 100644 --- a/services/cloud-agent-next/src/websocket/stream.ts +++ b/services/cloud-agent-next/src/websocket/stream.ts @@ -25,6 +25,7 @@ import type { } from '../shared/protocol.js'; import type { SlashCommandInfo } from '../shared/slash-commands.js'; import { logger } from '../logger.js'; +import type { CloudMessageFailedPayload } from '../session/message-settlement-outbox.js'; /** * Approximate byte budget per replay round. @@ -88,14 +89,7 @@ export type QueuedMessageSnapshot = { messageId: string; content: string; timestamp: number; - terminalFailure?: { - status: 'failed' | 'interrupted'; - completionSource?: string; - reason?: string; - error?: string; - attempts?: number; - timestamp: number; - }; + terminalFailure?: CloudMessageFailedPayload & { timestamp: number }; }; /** Options for deriving current session state in the `connected` event. */ @@ -283,14 +277,8 @@ export function createStreamHandler( streamEventType: 'cloud.message.failed' as const, timestamp: new Date(msg.terminalFailure.timestamp).toISOString(), data: { - messageId: msg.messageId, - status: msg.terminalFailure.status, - delivery: 'queued', - accepted: false, - completionSource: msg.terminalFailure.completionSource, - reason: msg.terminalFailure.reason, - attempts: msg.terminalFailure.attempts, - error: msg.terminalFailure.error, + ...msg.terminalFailure, + timestamp: undefined, }, }) ); diff --git a/services/cloud-agent-next/test/integration/session/callback-outbox.test.ts b/services/cloud-agent-next/test/integration/session/callback-outbox.test.ts index 5a31f3e031..98c77ad829 100644 --- a/services/cloud-agent-next/test/integration/session/callback-outbox.test.ts +++ b/services/cloud-agent-next/test/integration/session/callback-outbox.test.ts @@ -316,6 +316,9 @@ describe('callback outbox — missing target or queue', () => { reason: 'assistant_error', error: 'provider failed', completionSource: 'assistant_message_event', + failureStage: 'agent_activity', + failureCode: 'assistant_error', + safeFailureMessage: 'Assistant request failed', }); await (instance as any).finalizeIdleBatchCallbackIfReady({ allowWithoutObservedIdle: true, @@ -334,7 +337,12 @@ describe('callback outbox — missing target or queue', () => { expect(queue.captured[0].payload.executionId).toBe(result.secondMessageId); expect(queue.captured[0].payload.idempotencyKey).toBe(result.secondMessageId); expect(queue.captured[0].payload.status).toBe('failed'); - expect(queue.captured[0].payload.errorMessage).toBe('provider failed'); + expect(queue.captured[0].payload.errorMessage).toBe('Assistant request failed'); + expect(queue.captured[0].payload.failure).toEqual({ + stage: 'agent_activity', + code: 'assistant_error', + message: 'Assistant request failed', + }); }); it('includes idempotencyKey set to messageId in callback payload', async () => { diff --git a/services/cloud-agent-next/test/integration/session/derive-queued-messages.test.ts b/services/cloud-agent-next/test/integration/session/derive-queued-messages.test.ts index 4fdf3878c8..2aea6c85f4 100644 --- a/services/cloud-agent-next/test/integration/session/derive-queued-messages.test.ts +++ b/services/cloud-agent-next/test/integration/session/derive-queued-messages.test.ts @@ -206,11 +206,15 @@ describe('deriveQueuedMessages (/stream connect catch-up)', () => { content: 'prompt that never reached the wrapper', timestamp: 1700000003000, terminalFailure: { + messageId: MSG_FOLLOWUP, status: 'failed', + delivery: 'queued', + accepted: false, completionSource: 'delivery_failure', reason: 'exhausted', - error: 'Pending message delivery failed', attempts: 4, + error: 'The message failed', + failure: { attempts: 4 }, timestamp: 1700000004000, }, }, diff --git a/services/cloud-agent-next/test/integration/session/execute-directly-failure.test.ts b/services/cloud-agent-next/test/integration/session/execute-directly-failure.test.ts index 51f9538712..0c62b7405d 100644 --- a/services/cloud-agent-next/test/integration/session/execute-directly-failure.test.ts +++ b/services/cloud-agent-next/test/integration/session/execute-directly-failure.test.ts @@ -444,9 +444,14 @@ describe('new-path liveness without executionId', () => { expect(failedPayload).toMatchObject({ messageId: 'msg_018f1e2d3c4bnewlivabcdefgh', status: 'failed', - error: 'Wrapper accepted the message but produced no output', + error: 'Agent wrapper produced no output', delivery: 'sent', accepted: true, + failure: { + stage: 'post_dispatch_no_activity', + code: 'wrapper_no_output', + message: 'Agent wrapper produced no output', + }, }); // Liveness deadlines must be cleared @@ -521,9 +526,14 @@ describe('new-path liveness without executionId', () => { expect(failedPayload).toMatchObject({ messageId: 'msg_018f1e2d3c4bnewpingabcdefg', status: 'failed', - error: 'Wrapper did not respond to liveness ping', + error: 'Agent wrapper stopped responding', delivery: 'sent', accepted: true, + failure: { + stage: 'post_dispatch_no_activity', + code: 'wrapper_ping_timeout', + message: 'Agent wrapper stopped responding', + }, }); expect(result.wrapperRuntimeState.pingDeadlineAt).toBeUndefined(); diff --git a/services/cloud-agent-next/test/integration/session/message-terminalization.test.ts b/services/cloud-agent-next/test/integration/session/message-terminalization.test.ts index d498a0a7e4..00051748a4 100644 --- a/services/cloud-agent-next/test/integration/session/message-terminalization.test.ts +++ b/services/cloud-agent-next/test/integration/session/message-terminalization.test.ts @@ -581,6 +581,8 @@ describe('message terminalization and stream events', () => { reason: 'missing_assistant_reply', error: 'No reply', completionSource: 'idle_reconciliation', + failureStage: 'agent_activity', + failureCode: 'missing_assistant_reply', }); const db = drizzle(state.storage, { logger: false }); @@ -596,7 +598,12 @@ describe('message terminalization and stream events', () => { const payload = JSON.parse(result.failedEvents[0].payload); expect(payload.messageId).toBe(result.messageId); expect(payload.status).toBe('failed'); - expect(payload.error).toBe('No reply'); + expect(payload.error).toBe('No assistant reply was produced'); + expect(payload.failure).toEqual({ + stage: 'agent_activity', + code: 'missing_assistant_reply', + message: 'No assistant reply was produced', + }); expect(payload.completionSource).toBe('idle_reconciliation'); }); @@ -842,6 +849,8 @@ describe('message terminalization and stream events', () => { kind: 'interrupted', error: 'User interrupted', completionSource: 'interrupt', + failureStage: 'interruption', + failureCode: 'user_interrupt', }); const db = drizzle(state.storage, { logger: false }); @@ -857,7 +866,12 @@ describe('message terminalization and stream events', () => { const payload = JSON.parse(result.failedEvents[0].payload); expect(payload.messageId).toBe(result.messageId); expect(payload.status).toBe('interrupted'); - expect(payload.error).toBe('User interrupted'); + expect(payload.error).toBe('The message was interrupted by the user'); + expect(payload.failure).toEqual({ + stage: 'interruption', + code: 'user_interrupt', + message: 'The message was interrupted by the user', + }); expect(payload.completionSource).toBe('interrupt'); }); diff --git a/services/cloud-agent-next/test/integration/session/pending-messages.test.ts b/services/cloud-agent-next/test/integration/session/pending-messages.test.ts index e5a5102977..5162eed2ac 100644 --- a/services/cloud-agent-next/test/integration/session/pending-messages.test.ts +++ b/services/cloud-agent-next/test/integration/session/pending-messages.test.ts @@ -744,10 +744,16 @@ describe('pending session messages', () => { const payload = failedEvent?.payload ?? {}; expect(payload).toMatchObject({ messageId: 'msg_018f1e2d3c4bAAAAAAAAAAAAAA', - error: 'wrapper still unavailable', + error: 'The message could not be delivered', delivery: 'queued', accepted: false, completionSource: 'delivery_failure', + failure: { + stage: 'pre_dispatch', + code: 'delivery_failure_unknown', + attempts: 2, + message: 'The message could not be delivered', + }, }); }); @@ -822,7 +828,12 @@ describe('pending session messages', () => { accepted: false, completionSource: 'interrupt', reason: 'interrupted', - error: 'Pending queued message interrupted by user', + error: 'The message was interrupted by the user', + failure: { + stage: 'interruption', + code: 'user_interrupt', + message: 'The message was interrupted by the user', + }, }, { messageId: 'msg_018f1e2d3c4bIntrBMsgAbCdEf', @@ -831,7 +842,12 @@ describe('pending session messages', () => { accepted: false, completionSource: 'interrupt', reason: 'interrupted', - error: 'Pending queued message interrupted by user', + error: 'The message was interrupted by the user', + failure: { + stage: 'interruption', + code: 'user_interrupt', + message: 'The message was interrupted by the user', + }, }, ]); }); diff --git a/services/cloud-agent-next/test/integration/session/push-notifications.test.ts b/services/cloud-agent-next/test/integration/session/push-notifications.test.ts index abce75258b..4da25b07cf 100644 --- a/services/cloud-agent-next/test/integration/session/push-notifications.test.ts +++ b/services/cloud-agent-next/test/integration/session/push-notifications.test.ts @@ -179,7 +179,7 @@ describe('CloudAgentSession push notification producer', () => { cliSessionId: KILO_SESSION_ID, executionId: FAILED_MESSAGE_ID, status: 'failed', - body: 'Failed: Provider unavailable', + body: 'Failed: The message failed', }, ]); }); @@ -246,14 +246,14 @@ describe('CloudAgentSession push notification producer', () => { cliSessionId: KILO_SESSION_ID, executionId: RETRY_MESSAGE_ID, status: 'failed', - body: 'Failed: Transient provider failure', + body: 'Failed: The message failed', }, { userId, cliSessionId: KILO_SESSION_ID, executionId: RETRY_MESSAGE_ID, status: 'failed', - body: 'Failed: Transient provider failure', + body: 'Failed: The message failed', }, ]); }); diff --git a/services/cloud-agent-next/test/unit/wrapper/reconnection.test.ts b/services/cloud-agent-next/test/unit/wrapper/reconnection.test.ts index ea704b13d3..a21f5ce652 100644 --- a/services/cloud-agent-next/test/unit/wrapper/reconnection.test.ts +++ b/services/cloud-agent-next/test/unit/wrapper/reconnection.test.ts @@ -1117,7 +1117,91 @@ describe('ingest WS reconnection', () => { event => event.streamEventType === 'kilocode' && event.data.event === 'session.error' ); expect(sessionErrors).toHaveLength(1); - expect(callbacks.onTerminalError).toHaveBeenCalledWith('Model not found: kilo/does-not-exist.'); + expect(callbacks.onTerminalError).toHaveBeenCalledWith({ + error: 'Model not found: kilo/does-not-exist.', + errorSource: 'assistant', + }); + }); + + it.each(['usage_limit_exceeded', 'Too Many Requests'])( + 'surfaces explicit assistant request failures as terminal errors: %s', + async errorMessage => { + const kiloClient = createMockKiloClient({ + subscribeEvents: vi.fn().mockResolvedValue({ + stream: createEventStream([ + { + type: 'session.error', + properties: { + sessionID: 'kilo_sess_456', + error: { + name: 'ProviderError', + data: { message: errorMessage }, + }, + }, + }, + ]), + }), + }); + + const manager = createManagerWithClient(kiloClient); + await openConnection(manager); + await vi.advanceTimersByTimeAsync(0); + + expect(callbacks.onTerminalError).toHaveBeenCalledWith({ + error: errorMessage, + errorSource: 'assistant', + }); + } + ); + + it.each(['usage_limit_exceeded', 'Too Many Requests'])( + 'does not terminate the root session for a child session rate-limit error: %s', + async errorMessage => { + const kiloClient = createMockKiloClient({ + subscribeEvents: vi.fn().mockResolvedValue({ + stream: createEventStream([ + { + type: 'session.error', + properties: { + sessionID: 'kilo_child_789', + error: { + name: 'ProviderError', + data: { message: errorMessage }, + }, + }, + }, + ]), + }), + }); + + const manager = createManagerWithClient(kiloClient); + await openConnection(manager); + await vi.advanceTimersByTimeAsync(0); + + expect(callbacks.onTerminalError).not.toHaveBeenCalled(); + } + ); + + it('does not treat incidental rate-limit text on non-error events as terminal', async () => { + const kiloClient = createMockKiloClient({ + subscribeEvents: vi.fn().mockResolvedValue({ + stream: createEventStream([ + { + type: 'session.updated', + properties: { + sessionID: 'kilo_sess_456', + error: 'Too Many Requests', + }, + }, + ]), + }), + }); + + const manager = createManagerWithClient(kiloClient); + await openConnection(manager); + await vi.advanceTimersByTimeAsync(0); + + expect(callbacks.onTerminalError).not.toHaveBeenCalled(); }); it('records explicit Kilo gate results from event properties', async () => { @@ -1337,33 +1421,51 @@ describe('ingest WS reconnection', () => { expect(kiloClient.rejectQuestion).not.toHaveBeenCalled(); }); - it('forwards payment-style events and reports terminal errors', async () => { - const kiloClient = createMockKiloClient({ - subscribeEvents: vi.fn().mockResolvedValue({ - stream: createEventStream([ - { - type: 'payment_required', - properties: { error: 'Insufficient credits', sessionID: 'kilo_sess_456' }, - }, - ]), - }), - }); + it.each([ + { eventType: 'payment_required', sessionID: 'kilo_child_789', terminal: false }, + { eventType: 'insufficient_funds', sessionID: 'kilo_child_789', terminal: false }, + { eventType: 'usage_limit_exceeded', sessionID: 'kilo_child_789', terminal: false }, + { eventType: 'payment_required', sessionID: 'kilo_sess_456', terminal: true }, + { eventType: 'insufficient_funds', sessionID: 'kilo_sess_456', terminal: true }, + { eventType: 'usage_limit_exceeded', sessionID: 'kilo_sess_456', terminal: true }, + { eventType: 'payment_required', sessionID: undefined, terminal: true }, + { eventType: 'insufficient_funds', sessionID: undefined, terminal: true }, + { eventType: 'usage_limit_exceeded', sessionID: undefined, terminal: true }, + ])( + 'scopes named terminal event $eventType with session $sessionID', + async ({ eventType, sessionID, terminal }) => { + const kiloClient = createMockKiloClient({ + subscribeEvents: vi.fn().mockResolvedValue({ + stream: createEventStream([ + { + type: eventType, + properties: { + error: 'Insufficient credits', + ...(sessionID ? { sessionID } : {}), + }, + }, + ]), + }), + }); - const manager = createManagerWithClient(kiloClient); - const ws = await openConnection(manager); - await vi.advanceTimersByTimeAsync(0); + const manager = createManagerWithClient(kiloClient); + const ws = await openConnection(manager); + await vi.advanceTimersByTimeAsync(0); - const paymentEvents = parseSentMessages(ws).filter( - event => event.streamEventType === 'kilocode' && event.data.event === 'payment_required' - ); - expect(paymentEvents).toHaveLength(1); - expect(paymentEvents[0].data).toMatchObject({ - event: 'payment_required', - error: 'Insufficient credits', - }); - expect(callbacks.onTerminalError).toHaveBeenCalledWith('Insufficient credits'); - expect(callbacks.onDisconnect).not.toHaveBeenCalled(); - }); + const terminalEvents = parseSentMessages(ws).filter( + event => event.streamEventType === 'kilocode' && event.data.event === eventType + ); + expect(terminalEvents).toHaveLength(1); + expect(callbacks.onTerminalError).toHaveBeenCalledTimes(terminal ? 1 : 0); + if (terminal) { + expect(callbacks.onTerminalError).toHaveBeenCalledWith({ + error: 'Insufficient credits', + errorSource: 'assistant', + }); + } + expect(callbacks.onDisconnect).not.toHaveBeenCalled(); + } + ); // ------------------------------------------------------------------------- // Test: close() clears event buffer to prevent stale events leaking diff --git a/services/cloud-agent-next/test/unit/wrapper/utils.test.ts b/services/cloud-agent-next/test/unit/wrapper/utils.test.ts index 6ddf9e69e2..573a418a2c 100644 --- a/services/cloud-agent-next/test/unit/wrapper/utils.test.ts +++ b/services/cloud-agent-next/test/unit/wrapper/utils.test.ts @@ -2,7 +2,7 @@ import { mkdtemp, readFile, rm, writeFile } from 'fs/promises'; import { tmpdir } from 'os'; import { join } from 'path'; import { afterEach, describe, expect, it } from 'vitest'; -import { git, runProcess } from '../../../wrapper/src/utils.js'; +import { createSafeProcessDiagnostic, git, runProcess } from '../../../wrapper/src/utils.js'; const createdRepos: string[] = []; @@ -16,12 +16,72 @@ async function createRepo(): Promise { } describe('runProcess', () => { - it('runs non-git commands with captured output', async () => { + it('runs non-git commands with captured output and elapsed time', async () => { const result = await runProcess(process.execPath, ['-e', 'console.log("hello")'], { timeoutMs: 5_000, }); - expect(result).toEqual({ stdout: 'hello\n', stderr: '', exitCode: 0 }); + expect(result.stdout).toBe('hello\n'); + expect(result.stderr).toBe(''); + expect(result.exitCode).toBe(0); + expect(result.elapsedMs).toBeGreaterThanOrEqual(0); + }); + + it('bounds output while retaining the most recent tail', async () => { + const result = await runProcess( + process.execPath, + ['-e', 'process.stderr.write("a".repeat(20000) + "latest-error")'], + { timeoutMs: 5_000, maxOutputBytes: 1_024 } + ); + + expect(Buffer.byteLength(result.stderr)).toBeLessThanOrEqual(1_024); + expect(result.stderr).toContain('latest-error'); + expect(result.stderrTruncated).toBe(true); + }); +}); + +describe('createSafeProcessDiagnostic', () => { + it('returns only allowlisted process metadata', () => { + const sensitiveValues = [ + 'bare-unlabeled-token', + 'private-file-content', + 'url-secret', + 'bearer-secret', + 'cookie-secret', + 'env-secret', + ]; + const detail = createSafeProcessDiagnostic({ + stdout: sensitiveValues.slice(0, 2).join('\n'), + stderr: [ + 'https://user:url-secret@example.com/repo.git', + 'Authorization: Bearer bearer-secret', + 'Cookie: session=cookie-secret', + 'SECRET_VALUE=env-secret', + ].join('\n'), + exitCode: 2, + elapsedMs: 42, + stdoutTruncated: true, + }); + + expect(detail).toBe('termination nonzero exit, exit code 2, elapsed 42ms, output truncated'); + for (const sensitiveValue of sensitiveValues) expect(detail).not.toContain(sensitiveValue); + }); + + it.each([ + { + result: { stdout: '', stderr: '', exitCode: 124, terminationReason: 'timeout' as const }, + expected: 'termination timeout', + }, + { + result: { stdout: '', stderr: '', exitCode: 124, terminationReason: 'abort' as const }, + expected: 'termination abort', + }, + { + result: { stdout: '', stderr: '', exitCode: 0, elapsedMs: 7 }, + expected: 'termination completed, elapsed 7ms', + }, + ])('reports structured termination metadata', ({ result, expected }) => { + expect(createSafeProcessDiagnostic(result)).toBe(expected); }); }); diff --git a/services/cloud-agent-next/wrapper/src/bootstrap-error.test.ts b/services/cloud-agent-next/wrapper/src/bootstrap-error.test.ts new file mode 100644 index 0000000000..d9a6ccff34 --- /dev/null +++ b/services/cloud-agent-next/wrapper/src/bootstrap-error.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from 'bun:test'; +import { kiloServerStartupError } from './bootstrap-error'; + +describe('kiloServerStartupError', () => { + it('classifies startup failures without exposing raw secrets', () => { + const error = kiloServerStartupError(); + + expect(error).toMatchObject({ + code: 'KILO_SERVER_FAILED', + message: 'Failed to start Kilo server', + retryable: true, + }); + expect(error.detail).toBeUndefined(); + expect(error.message).not.toContain('startup-secret'); + }); +}); diff --git a/services/cloud-agent-next/wrapper/src/bootstrap-error.ts b/services/cloud-agent-next/wrapper/src/bootstrap-error.ts new file mode 100644 index 0000000000..6be6d89fe7 --- /dev/null +++ b/services/cloud-agent-next/wrapper/src/bootstrap-error.ts @@ -0,0 +1,53 @@ +import type { WorkspaceFailureSubtype } from '../../src/shared/wrapper-bootstrap.js'; + +export type WrapperBootstrapErrorCode = 'WORKSPACE_SETUP_FAILED' | 'KILO_SERVER_FAILED'; + +export class WrapperBootstrapError extends Error { + readonly code: WrapperBootstrapErrorCode; + readonly subtype?: WorkspaceFailureSubtype; + readonly detail?: string; + readonly retryable: boolean; + + constructor(options: { + code: WrapperBootstrapErrorCode; + subtype?: WorkspaceFailureSubtype; + message: string; + detail?: string; + retryable: boolean; + }) { + super(options.message); + this.name = 'WrapperBootstrapError'; + this.code = options.code; + this.subtype = options.subtype; + this.detail = options.detail; + this.retryable = options.retryable; + } +} + +export function workspaceBootstrapError( + subtype: WorkspaceFailureSubtype, + message: string, + detail?: string, + retryable = true +): WrapperBootstrapError { + return new WrapperBootstrapError({ + code: 'WORKSPACE_SETUP_FAILED', + subtype, + message, + detail, + retryable, + }); +} + +export function kiloServerBootstrapError(message: string, detail?: string): WrapperBootstrapError { + return new WrapperBootstrapError({ + code: 'KILO_SERVER_FAILED', + message, + detail, + retryable: true, + }); +} + +export function kiloServerStartupError(): WrapperBootstrapError { + return kiloServerBootstrapError('Failed to start Kilo server'); +} diff --git a/services/cloud-agent-next/wrapper/src/connection.ts b/services/cloud-agent-next/wrapper/src/connection.ts index 3686c2e9e8..12f20e444b 100644 --- a/services/cloud-agent-next/wrapper/src/connection.ts +++ b/services/cloud-agent-next/wrapper/src/connection.ts @@ -162,9 +162,14 @@ export type ConnectionConfig = { kiloClient: WrapperKiloClient; }; +export type AssistantTerminalError = { + error: string; + errorSource: 'assistant'; +}; + export type ConnectionCallbacks = { - /** Called when a terminal error is detected */ - onTerminalError: (reason: string) => void; + /** Called when a terminal assistant request error is detected */ + onTerminalError: (error: AssistantTerminalError) => void; /** Called when a command is received from DO */ onCommand: (cmd: WrapperCommand) => void; /** Called when the connection unexpectedly closes */ @@ -737,7 +742,23 @@ export function createConnectionManager( * Check if an event represents a terminal error (payment/billing/quota/model resolution). */ function isTerminalError(eventType: string, properties: Record): boolean { - if (eventType === 'payment_required' || eventType === 'insufficient_funds') { + const eventSessionID = + typeof properties.sessionID === 'string' ? properties.sessionID : undefined; + if (eventSessionID && eventSessionID !== state.currentSession?.kiloSessionId) { + return false; + } + if ( + eventType === 'session.error' && + properties.sessionID !== state.currentSession?.kiloSessionId + ) { + return false; + } + + if ( + eventType === 'payment_required' || + eventType === 'insufficient_funds' || + eventType === 'usage_limit_exceeded' + ) { return true; } const error = properties.error; @@ -749,7 +770,10 @@ export function createConnectionManager( normalizedError.includes('credit') || normalizedError.includes('balance') || normalizedError.includes('quota') || - (eventType === 'session.error' && normalizedError.includes('model not found')) + (eventType === 'session.error' && + (normalizedError.includes('usage_limit_exceeded') || + normalizedError.includes('too many requests') || + normalizedError.includes('model not found'))) ) { return true; } @@ -1001,7 +1025,10 @@ export function createConnectionManager( // Terminal error detection if (isTerminalError(eventType, properties)) { - callbacks.onTerminalError(getTerminalErrorText(eventType, properties)); + callbacks.onTerminalError({ + error: getTerminalErrorText(eventType, properties), + errorSource: 'assistant', + }); return; } diff --git a/services/cloud-agent-next/wrapper/src/lifecycle.test.ts b/services/cloud-agent-next/wrapper/src/lifecycle.test.ts index 234821f6be..9ad2363204 100644 --- a/services/cloud-agent-next/wrapper/src/lifecycle.test.ts +++ b/services/cloud-agent-next/wrapper/src/lifecycle.test.ts @@ -56,4 +56,67 @@ describe('wrapper lifecycle drain races', () => { expect(events.map(event => event.streamEventType)).toContain('complete'); }); + + it('waits for three seconds of stable root idle before completing', async () => { + const state = new WrapperState(); + const events: IngestEvent[] = []; + state.bindSession(sessionContext); + state.setSendToIngestFn(event => events.push(event)); + state.acceptMessage('message-1', { + autoCommit: false, + condenseOnComplete: false, + }); + const lifecycle = createLifecycleManager( + { workspacePath: '/tmp' }, + { + state, + kiloClient: {} as WrapperKiloClient, + closeConnections: async () => {}, + isConnected: () => true, + reconnectEventSubscription: () => {}, + } + ); + + lifecycle.onSessionIdle(); + await wait(2_950); + expect(events.map(event => event.streamEventType)).not.toContain('complete'); + + await wait(150); + expect(events.map(event => event.streamEventType)).toContain('complete'); + }); + + it('requires a fresh stable idle interval after root activity', async () => { + const state = new WrapperState(); + const events: IngestEvent[] = []; + state.bindSession(sessionContext); + state.setSendToIngestFn(event => events.push(event)); + state.acceptMessage('message-1', { + autoCommit: false, + condenseOnComplete: false, + }); + const lifecycle = createLifecycleManager( + { workspacePath: '/tmp' }, + { + state, + kiloClient: {} as WrapperKiloClient, + closeConnections: async () => {}, + isConnected: () => true, + reconnectEventSubscription: () => {}, + } + ); + + lifecycle.onSessionIdle(); + await wait(2_900); + lifecycle.onRootSessionActivity(); + + await wait(200); + expect(events.map(event => event.streamEventType)).not.toContain('complete'); + + lifecycle.onSessionIdle(); + await wait(2_900); + expect(events.map(event => event.streamEventType)).not.toContain('complete'); + + await wait(500); + expect(events.filter(event => event.streamEventType === 'complete')).toHaveLength(1); + }, 10_000); }); diff --git a/services/cloud-agent-next/wrapper/src/main.ts b/services/cloud-agent-next/wrapper/src/main.ts index 1040da4545..82a6765cc5 100644 --- a/services/cloud-agent-next/wrapper/src/main.ts +++ b/services/cloud-agent-next/wrapper/src/main.ts @@ -22,6 +22,11 @@ import { bindSessionContext, createServer } from './server.js'; import { openKiloGlobalFeed } from './global-feed.js'; import { createGlobalFeedManager, type SessionBoundFeedPolicy } from './global-feed-manager.js'; import { logToFile } from './utils.js'; +import { + kiloServerBootstrapError, + kiloServerStartupError, + WrapperBootstrapError, +} from './bootstrap-error.js'; import type { WrapperCommand } from '../../src/shared/protocol.js'; import type { WrapperSessionReadyRequest, @@ -286,7 +291,7 @@ async function main() { ); } catch (error) { logToFile( - `post-bootstrap kilo session lookup end runtime=${runtime} outcome=error expectedSessionId=${expectedSessionId} elapsedMs=${Date.now() - lookupStartedAt} error=${error instanceof Error ? error.message : String(error)}` + `post-bootstrap kilo session lookup end runtime=${runtime} outcome=error expectedSessionId=${expectedSessionId} elapsedMs=${Date.now() - lookupStartedAt}` ); throw error; } @@ -300,17 +305,11 @@ async function main() { } return openKiloGlobalFeed({ state, kiloClient }); }, - onConnectionError: error => { - logToFile( - `kilo global feed failed: ${error instanceof Error ? error.message : String(error)}` - ); + onConnectionError: () => { + logToFile('kilo global feed failed'); }, - onOpenError: error => { - logToFile( - `failed to start kilo global feed: ${ - error instanceof Error ? error.message : String(error) - }` - ); + onOpenError: () => { + logToFile('failed to start kilo global feed'); }, }); @@ -363,10 +362,10 @@ async function main() { logToFile(`kilo server started at ${realKiloServer.url}`); nextKiloClient = createWrapperKiloClient(result.client, realKiloServer.url, workspacePath); closeKiloServer = () => realKiloServer.close(); - } catch (error) { - const msg = error instanceof Error ? error.message : String(error); - logToFile(`failed to start kilo server: ${msg}`); - throw new Error(`Failed to start kilo server: ${msg}`); + } catch { + const startupError = kiloServerStartupError(); + logToFile(`failed to start kilo server: ${startupError.message}`); + throw startupError; } if (expectedSessionId) { @@ -393,11 +392,11 @@ async function main() { state, { kiloClient: nextKiloClient }, { - onTerminalError: (reason: string) => { - logToFile(`terminal error: ${reason}`); + onTerminalError: terminalError => { + logToFile(`terminal error: ${terminalError.error}`); state.sendToIngest({ streamEventType: 'error', - data: { error: reason, fatal: true }, + data: { ...terminalError, fatal: true }, timestamp: new Date().toISOString(), }); const session = state.currentSession; @@ -569,7 +568,7 @@ async function main() { await startKiloRuntime(request.workspace.workspacePath, request.kiloSessionId); if (!kiloClient) { - throw new Error('Kilo server did not start'); + throw kiloServerBootstrapError('Kilo server did not start'); } logToFile( `session/ready complete kiloSessionId=${request.kiloSessionId} elapsedMs=${Date.now() - readyStartedAt}` @@ -587,16 +586,26 @@ async function main() { }, }; } catch (error) { - const message = error instanceof Error ? error.message : String(error); + const bootstrapError = + error instanceof WrapperBootstrapError + ? error + : new WrapperBootstrapError({ + code: 'WORKSPACE_SETUP_FAILED', + subtype: 'workspace_setup_unknown', + message: 'Workspace setup failed', + retryable: true, + }); logToFile( - `session/ready failed kiloSessionId=${request.kiloSessionId} elapsedMs=${Date.now() - readyStartedAt} error=${message}` + `session/ready failed kiloSessionId=${request.kiloSessionId} elapsedMs=${Date.now() - readyStartedAt} code=${bootstrapError.code} subtype=${bootstrapError.subtype ?? '(none)'} error=${bootstrapError.message}${bootstrapError.detail ? ` detail=${bootstrapError.detail}` : ''}` ); return { status: 'error', error: { - code: message.includes('Kilo server') ? 'KILO_SERVER_FAILED' : 'WORKSPACE_SETUP_FAILED', - message, - retryable: true, + code: bootstrapError.code, + ...(bootstrapError.subtype ? { subtype: bootstrapError.subtype } : {}), + message: bootstrapError.message, + ...(bootstrapError.detail ? { detail: bootstrapError.detail } : {}), + retryable: bootstrapError.retryable, }, }; } finally { @@ -635,7 +644,10 @@ async function main() { // Send interrupted event if connected state.sendToIngest({ streamEventType: 'interrupted', - data: { reason: `Container shutdown: ${signal}` }, + data: { + reason: `Container shutdown: ${signal}`, + interruptionSource: 'container_shutdown', + }, timestamp: new Date().toISOString(), }); @@ -672,8 +684,8 @@ async function main() { if (closeKiloServer) { logToFile('kilo server closed'); } - } catch (err) { - logToFile(`kilo server close error: ${err instanceof Error ? err.message : String(err)}`); + } catch { + logToFile('kilo server close failed'); } // Stop HTTP server @@ -692,12 +704,11 @@ async function main() { // --------------------------------------------------------------------------- // Crash handlers — best-effort log upload on unexpected crashes // --------------------------------------------------------------------------- - function handleCrash(label: string, error: unknown): void { + function handleCrash(label: string): void { if (isShuttingDown) return; - const message = error instanceof Error ? (error.stack ?? error.message) : String(error); - logToFile(`${label}: ${message}`); - console.error(`Wrapper ${label}:`, error); + logToFile(label); + console.error(`Wrapper ${label}`); const uploader = state.logUploader; if (uploader) { @@ -711,12 +722,12 @@ async function main() { } } - process.on('uncaughtException', err => handleCrash('uncaught exception', err)); - process.on('unhandledRejection', reason => handleCrash('unhandled rejection', reason)); + process.on('uncaughtException', () => handleCrash('uncaught exception')); + process.on('unhandledRejection', () => handleCrash('unhandled rejection')); } -main().catch(err => { - logToFile(`fatal error: ${err instanceof Error ? err.message : String(err)}`); - console.error('Wrapper fatal error:', err); +main().catch(() => { + logToFile('fatal error'); + console.error('Wrapper fatal error'); process.exit(1); }); diff --git a/services/cloud-agent-next/wrapper/src/restore-session.test.ts b/services/cloud-agent-next/wrapper/src/restore-session.test.ts index a7ce55efed..fa103672d5 100644 --- a/services/cloud-agent-next/wrapper/src/restore-session.test.ts +++ b/services/cloud-agent-next/wrapper/src/restore-session.test.ts @@ -257,13 +257,14 @@ describe('restoreSession', () => { } }); - it('returns download error when fetch throws', async () => { - globalThis.fetch = asFetch(() => Promise.reject(new Error('network failure'))); + it('returns a fixed download error when fetch throws', async () => { + globalThis.fetch = asFetch(() => Promise.reject(new Error('network token secret'))); const result = await restoreSession(SESSION_ID, workspace); expect(result.ok).toBe(false); if (!result.ok) { - expect(result.error).toContain('network failure'); + expect(result.error).toBe('snapshot download failed'); + expect(result.error).not.toContain('network token secret'); expect(result.code).toBeNull(); expect(result.step).toBe('download'); } @@ -346,7 +347,9 @@ describe('restoreSession', () => { expect(result.ok).toBe(false); if (!result.ok) { expect(result.step).toBe('import'); + expect(result.subtype).toBe('kilo_import_failed'); expect(result.error).toContain('kilo import failed'); + expect(result.detail).toContain('exit code 1'); } }); @@ -359,7 +362,9 @@ describe('restoreSession', () => { expect(result.ok).toBe(false); if (!result.ok) { expect(result.step).toBe('import'); + expect(result.subtype).toBe('kilo_import_timeout'); expect(result.error).toContain('kilo import timed out'); + expect(result.detail).toContain('timeout'); } expect(fs.existsSync(TMP_PATH)).toBe(false); }); diff --git a/services/cloud-agent-next/wrapper/src/restore-session.ts b/services/cloud-agent-next/wrapper/src/restore-session.ts index e48fc40477..77e232e6f4 100644 --- a/services/cloud-agent-next/wrapper/src/restore-session.ts +++ b/services/cloud-agent-next/wrapper/src/restore-session.ts @@ -1,7 +1,8 @@ import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; -import { logToFile, runProcess } from './utils.js'; +import type { WorkspaceFailureSubtype } from '../../src/shared/wrapper-bootstrap.js'; +import { createSafeProcessDiagnostic, logToFile, runProcess } from './utils.js'; // --------------------------------------------------------------------------- // Types @@ -14,7 +15,14 @@ export type RestoreResult = imported: true; diffs: { applied: number; skipped: number; total: number }; } - | { ok: false; error: string; code: number | null; step: 'download' | 'import' | 'diffs' }; + | { + ok: false; + error: string; + code: number | null; + step: 'download' | 'import' | 'diffs'; + subtype?: WorkspaceFailureSubtype; + detail?: string; + }; type SnapshotDiff = { file: string; @@ -43,9 +51,18 @@ function log(msg: string): void { function fail( error: string, code: number | null, - step: Extract['step'] + step: Extract['step'], + subtype?: WorkspaceFailureSubtype, + detail?: string ): RestoreResult { - return { ok: false, error, code, step }; + return { + ok: false, + error, + code, + step, + ...(subtype ? { subtype } : {}), + ...(detail ? { detail } : {}), + }; } function tryUnlink(filePath: string): void { @@ -360,23 +377,21 @@ export async function extractDiffs(snapshotPath: string): Promise(); @@ -432,8 +447,7 @@ function applyPatch(workspacePath: string, diff: SnapshotDiff): boolean { stderr: 'pipe', }); if (proc.exitCode === 0) return true; - const stderr = new TextDecoder().decode(proc.stderr).trim(); - log(`git apply failed file=${diff.file} stderr=${stderr}`); + log(`git apply failed file=${diff.file} exitCode=${proc.exitCode}`); return false; } finally { fs.rmSync(dir, { recursive: true, force: true }); @@ -463,9 +477,8 @@ export async function restoreSession( let token: string | undefined; try { token = resolveKilocodeToken(); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - return fail(`failed to read KILOCODE_TOKEN_FILE: ${message}`, null, 'download'); + } catch { + return fail('failed to read KILOCODE_TOKEN_FILE', null, 'download'); } if (!ingestUrl || !token) { @@ -519,10 +532,9 @@ export async function restoreSession( 'download' ); } - } catch (err) { + } catch { tryUnlink(tmpPath); - const message = err instanceof Error ? err.message : String(err); - return fail(message, null, 'download'); + return fail('snapshot download failed', null, 'download'); } } else { log(`using provided file=${filePath}`); @@ -531,10 +543,8 @@ export async function restoreSession( log( `provided snapshot metadata inspected status=${providedInfoValidation.validation} expectedKiloSessionId=${kiloSessionId} snapshotInfoId=${providedInfoValidation.infoId ?? '(missing)'} idMatchesExpected=${providedInfoValidation.infoId === kiloSessionId}` ); - } catch (err) { - log( - `provided snapshot metadata inspection failed expectedKiloSessionId=${kiloSessionId} error=${err instanceof Error ? err.message : String(err)}` - ); + } catch { + log(`provided snapshot metadata inspection failed expectedKiloSessionId=${kiloSessionId}`); } } @@ -555,14 +565,26 @@ export async function restoreSession( log( `kilo import finished outcome=timeout kiloSessionId=${kiloSessionId} input=${downloaded ? 'downloaded' : 'provided'} cwd=${workspacePath} home=${process.env.HOME ?? '(unset)'} elapsedMs=${importElapsedMs} timeoutMs=${importTimeoutMs}` ); - return fail(`kilo import timed out after ${importTimeoutMs}ms`, null, 'import'); + return fail( + `kilo import timed out after ${importTimeoutMs}ms`, + null, + 'import', + 'kilo_import_timeout', + createSafeProcessDiagnostic(importResult) + ); } if (importResult.exitCode !== 0) { log( `kilo import finished outcome=error exitCode=${importResult.exitCode} kiloSessionId=${kiloSessionId} input=${downloaded ? 'downloaded' : 'provided'} cwd=${workspacePath} home=${process.env.HOME ?? '(unset)'} elapsedMs=${importElapsedMs}` ); - return fail(`kilo import failed exitCode=${importResult.exitCode}`, null, 'import'); + return fail( + `kilo import failed exitCode=${importResult.exitCode}`, + null, + 'import', + 'kilo_import_failed', + createSafeProcessDiagnostic(importResult) + ); } log( `kilo import finished outcome=ok exitCode=${importResult.exitCode} kiloSessionId=${kiloSessionId} input=${downloaded ? 'downloaded' : 'provided'} cwd=${workspacePath} home=${process.env.HOME ?? '(unset)'} elapsedMs=${importElapsedMs}` diff --git a/services/cloud-agent-next/wrapper/src/server.test.ts b/services/cloud-agent-next/wrapper/src/server.test.ts index 62a824c2b8..0610773281 100644 --- a/services/cloud-agent-next/wrapper/src/server.test.ts +++ b/services/cloud-agent-next/wrapper/src/server.test.ts @@ -5,9 +5,11 @@ import { bindSessionContext, createFetchHandler, createServer, + createSessionReadyHandler, resolvePtyClientClose, type WrapperServer, } from './server'; +import { isWrapperSessionReadyErrorResponse } from '../../src/shared/wrapper-bootstrap'; import type { WrapperKiloClient, WrapperPty, WrapperPtySize } from './kilo-api'; type PtyCall = { @@ -104,6 +106,102 @@ afterEach(async () => { await Promise.all(servers.splice(0).map(server => server.stop())); }); +describe('session readiness errors', () => { + it('forwards validated workspace subtype and safe diagnostic fields', async () => { + const { fetchHandler } = createTestFetch(); + const handler = createSessionReadyHandler({ + state: new WrapperState(), + kiloClient: {} as WrapperKiloClient, + openConnection: async () => {}, + closeConnection: async () => {}, + setAborted: () => {}, + resetLifecycle: () => {}, + readySession: async () => ({ + status: 'error', + error: { + code: 'WORKSPACE_SETUP_FAILED', + subtype: 'git_clone_timeout', + message: 'Repository clone timed out', + detail: 'termination timeout, elapsed 120000ms, output truncated', + retryable: true, + }, + }), + }); + const request = new Request('http://wrapper.test/session/ready', { + method: 'POST', + body: JSON.stringify({ + agentSessionId: 'agent_00000000-0000-0000-0000-000000000000', + userId: 'user_test', + sandboxId: 'sandbox_test', + kiloSessionId: 'kilo_test', + workspace: { + workspacePath: '/workspace/repo', + sessionHome: '/home/session', + branchName: 'main', + }, + materialized: { env: {} }, + session: { + ingestUrl: 'wss://example.test/ingest', + workerAuthToken: 'secret', + wrapperRunId: 'wr_test', + wrapperGeneration: 1, + wrapperConnectionId: 'conn_test', + }, + }), + }); + + const response = await handler(request); + const body: unknown = await response.json(); + + expect(body).toMatchObject({ + error: 'WORKSPACE_SETUP_FAILED', + subtype: 'git_clone_timeout', + message: 'Repository clone timed out', + detail: 'termination timeout, elapsed 120000ms, output truncated', + retryable: true, + }); + expect(isWrapperSessionReadyErrorResponse(body)).toBe(true); + expect( + isWrapperSessionReadyErrorResponse({ error: 'WORKSPACE_SETUP_FAILED', message: 'old' }) + ).toBe(true); + expect( + isWrapperSessionReadyErrorResponse({ + error: 'WORKSPACE_SETUP_FAILED', + subtype: 'not_allowed', + message: 'bad', + }) + ).toBe(false); + expect( + isWrapperSessionReadyErrorResponse({ + error: 'WORKSPACE_SETUP_FAILED', + message: 'm'.repeat(4_097), + }) + ).toBe(false); + expect( + isWrapperSessionReadyErrorResponse({ + error: 'WORKSPACE_SETUP_FAILED', + message: 'bounded', + detail: 'd'.repeat(8_193), + }) + ).toBe(false); + expect( + isWrapperSessionReadyErrorResponse({ + error: 'WORKSPACE_SETUP_FAILED', + message: 'bounded', + retryable: 'false', + }) + ).toBe(false); + expect( + isWrapperSessionReadyErrorResponse({ + error: 'WORKSPACE_SETUP_FAILED', + message: 'bounded', + wrapperRunId: 42, + }) + ).toBe(false); + expect(fetchHandler).toBeDefined(); + }); +}); + describe('wrapper health', () => { it('reports leased physical wrapper identity separately from session identity', async () => { const { fetchHandler } = createTestFetch(); diff --git a/services/cloud-agent-next/wrapper/src/server.ts b/services/cloud-agent-next/wrapper/src/server.ts index 4b83f1d81b..cd8fb5ef28 100644 --- a/services/cloud-agent-next/wrapper/src/server.ts +++ b/services/cloud-agent-next/wrapper/src/server.ts @@ -988,6 +988,8 @@ export function createSessionReadyHandler(deps: ServerDependencies) { { error: result.error.code, message: result.error.message, + ...(result.error.subtype ? { subtype: result.error.subtype } : {}), + ...(result.error.detail ? { detail: result.error.detail } : {}), ...(result.error.retryable !== undefined ? { retryable: result.error.retryable } : {}), ...(result.error.wrapperRunId ? { wrapperRunId: result.error.wrapperRunId } : {}), }, diff --git a/services/cloud-agent-next/wrapper/src/session-bootstrap.test.ts b/services/cloud-agent-next/wrapper/src/session-bootstrap.test.ts index 4095bd39b3..c60befd0b6 100644 --- a/services/cloud-agent-next/wrapper/src/session-bootstrap.test.ts +++ b/services/cloud-agent-next/wrapper/src/session-bootstrap.test.ts @@ -235,8 +235,188 @@ describe('prepareWrapperBootstrapWorkspace', () => { }); }); - it('still fails fresh cold bootstraps when a setup command fails', async () => { + it.each([ + { + name: 'clone timeout', + stage: 'clone', + result: { stdout: '', stderr: '', exitCode: 124, terminationReason: 'timeout' as const }, + subtype: 'git_clone_timeout', + }, + { + name: 'clone authentication failure', + stage: 'clone', + result: { + stdout: '', + stderr: 'fatal: Authentication failed for credentialed repository', + exitCode: 128, + }, + subtype: 'git_authentication_failed', + }, + { + name: 'clone network failure', + stage: 'clone', + result: { stdout: '', stderr: 'fatal: the remote end hung up unexpectedly', exitCode: 128 }, + subtype: 'git_network_failed', + }, + { + name: 'clone corrupt pack', + stage: 'clone', + result: { stdout: '', stderr: 'fatal: pack has bad object at offset', exitCode: 128 }, + subtype: 'git_pack_corrupt', + }, + { + name: 'clone storage exhaustion', + stage: 'clone', + result: { stdout: '', stderr: 'fatal: No space left on device', exitCode: 128 }, + subtype: 'sandbox_storage_full', + }, + { + name: 'checkout timeout', + stage: 'checkout', + result: { stdout: '', stderr: '', exitCode: 124, terminationReason: 'timeout' as const }, + subtype: 'git_checkout_timeout', + }, + { + name: 'checkout conflict', + stage: 'checkout', + result: { + stdout: '', + stderr: 'untracked working tree files would be overwritten by checkout', + exitCode: 1, + }, + subtype: 'git_checkout_conflict', + }, + ])('classifies $name without exposing credentials', async ({ stage, result, subtype }) => { const request = makeRequest(tmpDir); + request.materialized.setupCommands = []; + const deps: WrapperBootstrapDeps = { + git: async args => { + if (args[0] === 'clone') { + if (stage === 'clone') return result; + await fsp.mkdir(path.join(request.workspace.workspacePath, '.git'), { recursive: true }); + } + if (args[0] === 'rev-parse') return { stdout: 'main', stderr: '', exitCode: 0 }; + if (args[0] === 'checkout' && stage === 'checkout') return result; + return { stdout: '', stderr: '', exitCode: 0 }; + }, + restoreSession: async () => ({ + ok: true, + downloaded: false, + imported: true, + diffs: { applied: 0, skipped: 0, total: 0 }, + }), + }; + + expect(prepareWrapperBootstrapWorkspace(request, undefined, deps)).rejects.toMatchObject({ + code: 'WORKSPACE_SETUP_FAILED', + subtype, + retryable: true, + }); + }); + + it('keeps strict-branch fetch timeouts retryable', async () => { + const request = makeRequest(tmpDir); + request.workspace.strictBranch = true; + request.materialized.setupCommands = []; + + expect( + prepareWrapperBootstrapWorkspace(request, undefined, { + git: async args => { + if (args[0] === 'clone') { + await fsp.mkdir(path.join(request.workspace.workspacePath, '.git'), { + recursive: true, + }); + return { stdout: '', stderr: '', exitCode: 0 }; + } + if (args[0] === 'fetch') { + return { + stdout: '', + stderr: '', + exitCode: 124, + terminationReason: 'timeout', + }; + } + return { stdout: '', stderr: '', exitCode: 0 }; + }, + restoreSession: async () => ({ + ok: true, + downloaded: false, + imported: true, + diffs: { applied: 0, skipped: 0, total: 0 }, + }), + }) + ).rejects.toMatchObject({ + subtype: 'git_checkout_timeout', + retryable: true, + }); + }); + + it('keeps strict-branch reference probe timeouts retryable', async () => { + const request = makeRequest(tmpDir); + request.workspace.strictBranch = true; + request.materialized.setupCommands = []; + + expect( + prepareWrapperBootstrapWorkspace(request, undefined, { + git: async args => { + if (args[0] === 'clone') { + await fsp.mkdir(path.join(request.workspace.workspacePath, '.git'), { + recursive: true, + }); + } + if (args[0] === 'rev-parse') { + return { + stdout: '', + stderr: '', + exitCode: 124, + terminationReason: 'timeout', + }; + } + return { stdout: '', stderr: '', exitCode: 0 }; + }, + restoreSession: async () => ({ + ok: true, + downloaded: false, + imported: true, + diffs: { applied: 0, skipped: 0, total: 0 }, + }), + }) + ).rejects.toMatchObject({ + subtype: 'git_checkout_timeout', + retryable: true, + }); + }); + + it('classifies strict missing branches', async () => { + const request = makeRequest(tmpDir); + request.workspace.strictBranch = true; + request.materialized.setupCommands = []; + expect( + prepareWrapperBootstrapWorkspace(request, undefined, { + git: async args => { + if (args[0] === 'clone') { + await fsp.mkdir(path.join(request.workspace.workspacePath, '.git'), { + recursive: true, + }); + } + return { stdout: '', stderr: '', exitCode: args[0] === 'rev-parse' ? 1 : 0 }; + }, + restoreSession: async () => ({ + ok: true, + downloaded: false, + imported: true, + diffs: { applied: 0, skipped: 0, total: 0 }, + }), + }) + ).rejects.toMatchObject({ + subtype: 'git_branch_missing', + retryable: false, + }); + }); + + it('still fails fresh cold bootstraps without exposing setup command or output', async () => { + const request = makeRequest(tmpDir); + request.materialized.setupCommands = ['private-tool --token argv-secret']; const deps: WrapperBootstrapDeps = { git: async args => { if (args[0] === 'clone') { @@ -247,7 +427,19 @@ describe('prepareWrapperBootstrapWorkspace', () => { } return { stdout: '', stderr: '', exitCode: 0 }; }, - runProcess: async () => ({ stdout: '', stderr: 'install failed', exitCode: 1 }), + runProcess: async () => ({ + stdout: 'private-file-content', + stderr: [ + 'bare-unlabeled-token', + 'https://user:url-secret@example.com/repo.git', + 'Authorization: Bearer bearer-secret', + 'Cookie: session=cookie-secret', + 'SECRET_VALUE=env-secret', + ].join('\n'), + exitCode: 1, + elapsedMs: 17, + stderrTruncated: true, + }), restoreSession: async () => ({ ok: true, downloaded: false, @@ -267,7 +459,76 @@ describe('prepareWrapperBootstrapWorkspace', () => { throw new Error('Expected setup command failure'); } - expect(setupError.message).toContain('Setup command failed: pnpm install (exit code 1)'); + expect(setupError).toMatchObject({ + code: 'WORKSPACE_SETUP_FAILED', + subtype: 'setup_command_failed', + retryable: true, + }); + expect(setupError.message).toBe('Setup command 1 failed'); + expect(setupError).toMatchObject({ + detail: 'termination nonzero exit, exit code 1, elapsed 17ms, output truncated', + }); + const projectedError = JSON.stringify(setupError); + for (const sensitiveValue of [ + 'private-tool', + 'argv-secret', + 'private-file-content', + 'bare-unlabeled-token', + 'url-secret', + 'bearer-secret', + 'cookie-secret', + 'env-secret', + ]) { + expect(projectedError).not.toContain(sensitiveValue); + } + }); + + it('classifies setup command timeouts with a safe command index', async () => { + const request = makeRequest(tmpDir); + expect( + prepareWrapperBootstrapWorkspace(request, undefined, { + git: async args => { + if (args[0] === 'clone') { + await fsp.mkdir(path.join(request.workspace.workspacePath, '.git'), { + recursive: true, + }); + } + if (args[0] === 'rev-parse') return { stdout: '', stderr: '', exitCode: 1 }; + return { stdout: '', stderr: '', exitCode: 0 }; + }, + runProcess: async () => ({ + stdout: '', + stderr: 'Authorization: Bearer setup-secret', + exitCode: 124, + terminationReason: 'timeout', + elapsedMs: 300_000, + }), + restoreSession: async () => ({ + ok: true, + downloaded: false, + imported: true, + diffs: { applied: 0, skipped: 0, total: 0 }, + }), + }) + ).rejects.toMatchObject({ + subtype: 'setup_command_timeout', + message: expect.not.stringContaining('setup-secret'), + detail: expect.not.stringContaining('setup-secret'), + }); + }); + + it('uses an unknown workspace subtype for untyped failures', async () => { + const request = makeRequest(tmpDir); + expect( + prepareWrapperBootstrapWorkspace(request, undefined, { + git: async () => { + throw new Error('unexpected internal failure'); + }, + }) + ).rejects.toMatchObject({ + code: 'WORKSPACE_SETUP_FAILED', + subtype: 'workspace_setup_unknown', + }); }); it('resumes unfinished cold bootstraps when a prior attempt left a git workspace behind', async () => { diff --git a/services/cloud-agent-next/wrapper/src/session-bootstrap.ts b/services/cloud-agent-next/wrapper/src/session-bootstrap.ts index fa97539a5c..0072e74ce5 100644 --- a/services/cloud-agent-next/wrapper/src/session-bootstrap.ts +++ b/services/cloud-agent-next/wrapper/src/session-bootstrap.ts @@ -7,8 +7,15 @@ import { type WrapperSessionReadyRequest, } from '../../src/shared/wrapper-bootstrap.js'; import { buildCloudAgentRules } from '../../src/shared/cloud-agent-rules.js'; -import { git, logToFile, runProcess, type ExecResult } from './utils.js'; +import { + createSafeProcessDiagnostic, + git, + logToFile, + runProcess, + type ExecResult, +} from './utils.js'; import { restoreSession } from './restore-session.js'; +import { WrapperBootstrapError, workspaceBootstrapError } from './bootstrap-error.js'; const SETUP_COMMAND_TIMEOUT_MS = 300_000; const GIT_COMMAND_TIMEOUT_MS = 120_000; @@ -46,8 +53,48 @@ export type WrapperBootstrapDeps = { restoreSession?: typeof restoreSession; }; -function sanitizeGitOutput(output: string): string { - return output.replace(/(oauth2|x-access-token|x-token-auth):([^@]+)@/gi, '$1:***@'); +const GIT_FAILURE_PATTERNS = [ + { subtype: 'sandbox_storage_full', pattern: /no space left on device|disk quota exceeded/i }, + { + subtype: 'git_authentication_failed', + pattern: /authentication failed|could not read username|http 401|http 403/i, + }, + { + subtype: 'git_network_failed', + pattern: + /remote end hung up|connection (?:reset|timed out)|could not resolve host|failed to connect/i, + }, + { + subtype: 'git_pack_corrupt', + pattern: /bad object|pack.*corrupt|invalid index-pack output|early eof/i, + }, +] as const; + +function classifyGitFailure(result: ExecResult, operation: 'clone' | 'checkout') { + if (result.terminationReason === 'timeout') { + return operation === 'clone' ? 'git_clone_timeout' : 'git_checkout_timeout'; + } + const output = `${result.stderr}\n${result.stdout}`; + if ( + operation === 'checkout' && + /would be overwritten|index\.lock.*exists|unable to create.*index\.lock/i.test(output) + ) { + return 'git_checkout_conflict'; + } + return ( + GIT_FAILURE_PATTERNS.find(entry => entry.pattern.test(output))?.subtype ?? + 'workspace_setup_unknown' + ); +} + +function gitOperationError( + result: ExecResult, + operation: 'clone' | 'checkout' +): WrapperBootstrapError { + const label = operation === 'clone' ? 'Repository clone' : 'Repository checkout'; + const subtype = classifyGitFailure(result, operation); + const message = result.terminationReason === 'timeout' ? `${label} timed out` : `${label} failed`; + return workspaceBootstrapError(subtype, message, createSafeProcessDiagnostic(result)); } function authenticatedUrl( @@ -106,7 +153,7 @@ async function cloneRepository( const result = await runGit(args, { timeoutMs: GIT_COMMAND_TIMEOUT_MS }); if (result.exitCode !== 0) { - throw new Error(`Git clone failed: ${sanitizeGitOutput(result.stderr || result.stdout)}`); + throw gitOperationError(result, 'clone'); } const authorName = @@ -133,11 +180,15 @@ async function branchExists( remote: boolean ): Promise { const ref = remote ? `origin/${branch}` : branch; - const result = await runGit(['rev-parse', '--verify', ref], { + const result = await runGit(['rev-parse', '--verify', '--quiet', ref], { cwd: workspacePath, timeoutMs: GIT_COMMAND_TIMEOUT_MS, }); - return result.exitCode === 0; + if (result.exitCode === 0) return true; + if (result.exitCode !== 1 || result.terminationReason !== undefined) { + throw gitOperationError(result, 'checkout'); + } + return false; } const GITHUB_PULL_REF_PATTERN = /^refs\/pull\/\d+\/head$/; @@ -157,9 +208,7 @@ async function fetchSyntheticReviewRef( timeoutMs: GIT_COMMAND_TIMEOUT_MS, }); if (fetchResult.exitCode !== 0) { - throw new Error( - `Failed to fetch pull ref ${branchName}: ${sanitizeGitOutput(fetchResult.stderr || fetchResult.stdout)}` - ); + throw gitOperationError(fetchResult, 'checkout'); } const checkoutResult = await runGit(['checkout', '-B', branchName, 'FETCH_HEAD'], { @@ -167,9 +216,7 @@ async function fetchSyntheticReviewRef( timeoutMs: GIT_COMMAND_TIMEOUT_MS, }); if (checkoutResult.exitCode !== 0) { - throw new Error( - `Failed to checkout pull ref ${branchName}: ${sanitizeGitOutput(checkoutResult.stderr || checkoutResult.stdout)}` - ); + throw gitOperationError(checkoutResult, 'checkout'); } } @@ -183,7 +230,13 @@ async function prepareBranch( return; } - await runGit(['fetch', 'origin'], { cwd: workspacePath, timeoutMs: GIT_COMMAND_TIMEOUT_MS }); + const fetchResult = await runGit(['fetch', 'origin'], { + cwd: workspacePath, + timeoutMs: GIT_COMMAND_TIMEOUT_MS, + }); + if (fetchResult.exitCode !== 0) { + throw gitOperationError(fetchResult, 'checkout'); + } if (await branchExists(runGit, workspacePath, branchName, false)) { const result = await runGit(['checkout', branchName], { @@ -191,7 +244,7 @@ async function prepareBranch( timeoutMs: GIT_COMMAND_TIMEOUT_MS, }); if (result.exitCode !== 0) { - throw new Error(`Failed to checkout branch ${branchName}`); + throw gitOperationError(result, 'checkout'); } return; } @@ -202,13 +255,18 @@ async function prepareBranch( timeoutMs: GIT_COMMAND_TIMEOUT_MS, }); if (result.exitCode !== 0) { - throw new Error(`Failed to checkout branch ${branchName}`); + throw gitOperationError(result, 'checkout'); } return; } if (strictBranch) { - throw new Error(`Branch "${branchName}" not found in repository`); + throw workspaceBootstrapError( + 'git_branch_missing', + 'Requested repository branch was not found', + undefined, + false + ); } const result = await runGit(['checkout', '-b', branchName], { @@ -328,9 +386,15 @@ async function bootstrapEmptyKiloSession( ); if (!result.ok) { logToFile( - `bootstrap empty kilo session failed kiloSessionId=${request.kiloSessionId} step=${result.step} code=${result.code ?? '(none)'} error=${result.error}` + `bootstrap empty kilo session failed kiloSessionId=${request.kiloSessionId} step=${result.step} code=${result.code ?? '(none)'} subtype=${result.subtype ?? '(none)'}` + ); + throw workspaceBootstrapError( + result.subtype ?? 'workspace_setup_unknown', + result.subtype === 'kilo_import_timeout' + ? 'Session import timed out' + : 'Session import failed', + result.detail ); - throw new Error(`Session bootstrap failed: ${result.error}`); } logToFile( `bootstrap empty kilo session ready kiloSessionId=${request.kiloSessionId} diffsApplied=${result.diffs.applied} diffsSkipped=${result.diffs.skipped} diffsTotal=${result.diffs.total}` @@ -353,10 +417,16 @@ async function restoreOrBootstrapKiloSession( return; } logToFile( - `bootstrap snapshot restore failed kiloSessionId=${request.kiloSessionId} step=${result.step} code=${result.code ?? '(none)'} error=${result.error}` + `bootstrap snapshot restore failed kiloSessionId=${request.kiloSessionId} step=${result.step} code=${result.code ?? '(none)'} subtype=${result.subtype ?? '(none)'}` ); if (result.code !== 404) { - throw new Error(`Session snapshot restore failed: ${result.error}`); + throw workspaceBootstrapError( + result.subtype ?? 'workspace_setup_unknown', + result.subtype === 'kilo_import_timeout' + ? 'Session import timed out' + : 'Session restore failed', + result.detail + ); } logToFile( `bootstrap snapshot missing; falling back to empty import kiloSessionId=${request.kiloSessionId}` @@ -376,13 +446,18 @@ async function runSetupCommands( logToFile( `bootstrap setup commands starting kiloSessionId=${request.kiloSessionId} count=${setupCommands.length} failFast=${failFast} workspacePath=${request.workspace.workspacePath}` ); - for (const command of setupCommands) { + for (const [commandIndex, command] of setupCommands.entries()) { const result = await run('sh', ['-lc', command], { cwd: request.workspace.workspacePath, timeoutMs: SETUP_COMMAND_TIMEOUT_MS, }); if (result.exitCode !== 0 && failFast) { - throw new Error(`Setup command failed: ${command} (exit code ${result.exitCode})`); + const timedOut = result.terminationReason === 'timeout'; + throw workspaceBootstrapError( + timedOut ? 'setup_command_timeout' : 'setup_command_failed', + `Setup command ${commandIndex + 1} ${timedOut ? 'timed out' : 'failed'}`, + createSafeProcessDiagnostic(result) + ); } } logToFile( @@ -513,14 +588,17 @@ export async function prepareWrapperBootstrapWorkspace( ); return { workspaceWasWarm }; } catch (error) { - const message = error instanceof Error ? error.message : String(error); + const bootstrapError = + error instanceof WrapperBootstrapError + ? error + : workspaceBootstrapError('workspace_setup_unknown', 'Workspace setup failed'); logToFile( - `bootstrap workspace failed kiloSessionId=${request.kiloSessionId} workspaceWasWarm=${workspaceWasWarm} workspaceNeedsBootstrap=${workspaceNeedsBootstrap} willCleanup=${workspaceNeedsBootstrap} error=${message}` + `bootstrap workspace failed kiloSessionId=${request.kiloSessionId} workspaceWasWarm=${workspaceWasWarm} workspaceNeedsBootstrap=${workspaceNeedsBootstrap} willCleanup=${workspaceNeedsBootstrap} code=${bootstrapError.code} subtype=${bootstrapError.subtype ?? '(none)'}` ); if (workspaceNeedsBootstrap) { await cleanupWorkspace(request); logToFile(`bootstrap workspace cleanup finished kiloSessionId=${request.kiloSessionId}`); } - throw error; + throw bootstrapError; } } diff --git a/services/cloud-agent-next/wrapper/src/utils.ts b/services/cloud-agent-next/wrapper/src/utils.ts index 6b6f49af40..3e618b1708 100644 --- a/services/cloud-agent-next/wrapper/src/utils.ts +++ b/services/cloud-agent-next/wrapper/src/utils.ts @@ -5,7 +5,10 @@ export type ExecResult = { stdout: string; stderr: string; exitCode: number; + elapsedMs?: number; terminationReason?: TerminationReason; + stdoutTruncated?: boolean; + stderrTruncated?: boolean; }; export type ProcessOptions = { @@ -13,6 +16,7 @@ export type ProcessOptions = { timeoutMs?: number; signal?: AbortSignal; terminationGraceMs?: number; + maxOutputBytes?: number; }; export type GitOptions = ProcessOptions; @@ -29,11 +33,45 @@ const EXEC_TERMINATION_GRACE_MS = 2_000; const EXEC_TERMINATION_POLL_MS = 25; const EXEC_TIMEOUT_MESSAGE = 'exec timeout reached'; const EXEC_ABORTED_MESSAGE = 'exec aborted'; +const DEFAULT_MAX_OUTPUT_BYTES = 64 * 1_024; +const TRUNCATION_MARKER = 'output truncated'; export type TerminationReason = 'timeout' | 'abort'; -function withStderrSuffix(stderr: string, suffix: string): string { - return `${stderr}${stderr.endsWith('\n') || stderr.length === 0 ? '' : '\n'}${suffix}`; +function utf8Tail(value: string, maxBytes: number): string { + const bytes = Buffer.from(value); + if (bytes.length <= maxBytes) return value; + return bytes + .subarray(bytes.length - maxBytes) + .toString('utf8') + .replace(/^\uFFFD/, ''); +} + +function appendBoundedTail( + current: string, + chunk: Buffer | string, + maxBytes: number +): { value: string; truncated: boolean } { + const next = current + chunk.toString(); + const truncated = Buffer.byteLength(next) > maxBytes; + return { value: truncated ? utf8Tail(next, maxBytes) : next, truncated }; +} + +export function createSafeProcessDiagnostic(result: ExecResult): string { + const termination = + result.terminationReason ?? (result.exitCode === 0 ? 'completed' : 'nonzero exit'); + return [ + `termination ${termination}`, + result.terminationReason === undefined && result.exitCode !== 0 + ? `exit code ${result.exitCode}` + : undefined, + result.elapsedMs === undefined ? undefined : `elapsed ${result.elapsedMs}ms`, + result.stdoutTruncated === true || result.stderrTruncated === true + ? TRUNCATION_MARKER + : undefined, + ] + .filter(value => value !== undefined) + .join(', '); } export function runProcess( @@ -41,11 +79,13 @@ export function runProcess( args: string[], opts?: ProcessOptions ): Promise { + const startedAt = Date.now(); if (opts?.signal?.aborted) { return Promise.resolve({ stdout: '', stderr: EXEC_ABORTED_MESSAGE, exitCode: EXEC_TIMEOUT_EXIT_CODE, + elapsedMs: 0, terminationReason: 'abort', }); } @@ -58,6 +98,9 @@ export function runProcess( }); let stdout = ''; let stderr = ''; + let stdoutTruncated = false; + let stderrTruncated = false; + const maxOutputBytes = opts?.maxOutputBytes ?? DEFAULT_MAX_OUTPUT_BYTES; let settled = false; let terminationReason: TerminationReason | null = null; let terminationTimer: ReturnType | undefined; @@ -89,14 +132,19 @@ export function runProcess( clearTimers(); removeAbortHandler(); if (destroyOpenPipes) destroyPipes(); + const boundedStderr = appendBoundedTail( + stderr, + `${stderr.endsWith('\n') || stderr.length === 0 ? '' : '\n'}${reason === 'timeout' ? EXEC_TIMEOUT_MESSAGE : EXEC_ABORTED_MESSAGE}`, + maxOutputBytes + ); resolve({ stdout, - stderr: withStderrSuffix( - stderr, - reason === 'timeout' ? EXEC_TIMEOUT_MESSAGE : EXEC_ABORTED_MESSAGE - ), + stderr: boundedStderr.value, exitCode: EXEC_TIMEOUT_EXIT_CODE, + elapsedMs: Date.now() - startedAt, terminationReason: reason, + ...(stdoutTruncated ? { stdoutTruncated: true } : {}), + ...(stderrTruncated || boundedStderr.truncated ? { stderrTruncated: true } : {}), }); }; @@ -145,8 +193,16 @@ export function runProcess( ? setTimeout(() => terminate('timeout'), opts.timeoutMs) : undefined; - proc.stdout.on('data', d => (stdout += d)); - proc.stderr.on('data', d => (stderr += d)); + proc.stdout.on('data', (chunk: Buffer) => { + const bounded = appendBoundedTail(stdout, chunk, maxOutputBytes); + stdout = bounded.value; + stdoutTruncated ||= bounded.truncated; + }); + proc.stderr.on('data', (chunk: Buffer) => { + const bounded = appendBoundedTail(stderr, chunk, maxOutputBytes); + stderr = bounded.value; + stderrTruncated ||= bounded.truncated; + }); if (opts?.signal) { if (opts.signal.aborted) { @@ -164,7 +220,14 @@ export function runProcess( settled = true; clearTimers(); removeAbortHandler(); - resolve({ stdout, stderr, exitCode: code ?? (signal === null ? 0 : 1) }); + resolve({ + stdout, + stderr, + exitCode: code ?? (signal === null ? 0 : 1), + elapsedMs: Date.now() - startedAt, + ...(stdoutTruncated ? { stdoutTruncated: true } : {}), + ...(stderrTruncated ? { stderrTruncated: true } : {}), + }); }); proc.on('error', err => { if (!settled) { diff --git a/services/security-auto-analysis/src/callbacks.test.ts b/services/security-auto-analysis/src/callbacks.test.ts index 051a9bc8cf..3f4ecd0a32 100644 --- a/services/security-auto-analysis/src/callbacks.test.ts +++ b/services/security-auto-analysis/src/callbacks.test.ts @@ -7,6 +7,7 @@ import { finalizeFailedAnalysisCallback, mapAnalysisCallbackFailure, resolveCompletedCallbackMarkdown, + SecurityAnalysisCallbackPayloadSchema, type SecurityAnalysisCallbackPayload, } from './callbacks.js'; @@ -647,6 +648,57 @@ describe('finalizeFailedAnalysisCallback', () => { }); }); + it('persists a valid structured safe message instead of the raw legacy error', async () => { + const db = { + select: () => ({ + from: () => ({ + where: () => ({ + limit: async () => [ + { + session_id: 'agent-123', + cli_session_id: null, + ignored_reason: null, + analysis_status: 'running', + claimToken: ATTEMPT_TOKEN, + }, + ], + }), + }), + }), + }; + + await expect( + finalizeFailedAnalysisCallback({ + db: db as never, + findingId: 'bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb', + attemptToken: ATTEMPT_TOKEN, + payload: { + ...failedPayload, + errorMessage: 'legacy raw token=secret', + failure: { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + message: 'Repository clone timed out', + }, + }, + }) + ).resolves.toEqual({ status: 'failed-finalized' }); + + expect(transitionAnalysisCallbackLifecycle).toHaveBeenCalledWith(db, { + findingId: 'bbbbbbbb-bbbb-4bbb-8bbb-bbbbbbbbbbbb', + attemptToken: ATTEMPT_TOKEN, + outcome: { + type: 'failed', + errorMessage: 'Repository clone timed out', + failureCode: 'NETWORK_TIMEOUT', + }, + }); + expect( + JSON.stringify(vi.mocked(transitionAnalysisCallbackLifecycle).mock.calls.at(-1)) + ).not.toContain('token=secret'); + }); + it('resolves the active claim token for legacy failed callback messages', async () => { const db = { select: () => ({ @@ -686,6 +738,45 @@ describe('finalizeFailedAnalysisCallback', () => { }); }); +describe('SecurityAnalysisCallbackPayloadSchema', () => { + it('accepts structured failure payloads and completed callbacks without failure', () => { + expect( + SecurityAnalysisCallbackPayloadSchema.safeParse({ + ...failedPayload, + failure: { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + attempts: 2, + message: 'Repository clone timed out', + }, + }).success + ).toBe(true); + expect( + SecurityAnalysisCallbackPayloadSchema.safeParse({ + ...failedPayload, + status: 'completed', + errorMessage: undefined, + }).success + ).toBe(true); + }); + + it.each([ + { failure: { code: 'future_failure_code' } }, + { failure: { subtype: 'unknown_workspace_failure' } }, + { failure: { extra: true } }, + { failure: { attempts: -1 } }, + { failure: { message: 'x'.repeat(4_097) } }, + ])('discards incompatible failure while retaining the legacy payload: %o', extension => { + expect(SecurityAnalysisCallbackPayloadSchema.parse({ ...failedPayload, ...extension })).toEqual( + { + ...failedPayload, + failure: undefined, + } + ); + }); +}); + describe('mapAnalysisCallbackFailure', () => { it('maps interrupted callbacks to state guard rejection', () => { expect( @@ -696,7 +787,25 @@ describe('mapAnalysisCallbackFailure', () => { }); }); - it('maps transient upstream failures to UPSTREAM_5XX', () => { + it('prefers structured timeout classification and message', () => { + expect( + mapAnalysisCallbackFailure({ + ...failedPayload, + errorMessage: 'legacy upstream 503', + failure: { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + message: 'Repository clone timed out', + }, + }) + ).toEqual({ + errorMessage: 'Repository clone timed out', + failureCode: 'NETWORK_TIMEOUT', + }); + }); + + it('preserves legacy text classification when failure is absent', () => { expect(mapAnalysisCallbackFailure(failedPayload)).toEqual({ errorMessage: 'upstream 503', failureCode: 'UPSTREAM_5XX', diff --git a/services/security-auto-analysis/src/callbacks.ts b/services/security-auto-analysis/src/callbacks.ts index c74f804723..98ecb79a73 100644 --- a/services/security-auto-analysis/src/callbacks.ts +++ b/services/security-auto-analysis/src/callbacks.ts @@ -1,6 +1,10 @@ import { getWorkerDb, type WorkerDb } from '@kilocode/db/client'; import { security_audit_log } from '@kilocode/db/schema'; import { SecurityAuditLogAction } from '@kilocode/db/schema-types'; +import { + CloudAgentCallbackFailureSchema, + type CloudAgentSafeFailure, +} from '@kilocode/worker-utils/cloud-agent-failure'; import { z } from 'zod'; import { getActiveAnalysisAttemptToken, @@ -19,12 +23,15 @@ import type { SecurityFindingSandboxAnalysis, } from './types.js'; +type CallbackFailure = CloudAgentSafeFailure; + export const SecurityAnalysisCallbackPayloadSchema = z.object({ sessionId: z.string().min(1), cloudAgentSessionId: z.string().min(1), executionId: z.string().min(1), status: z.enum(['completed', 'failed', 'interrupted']), errorMessage: z.string().optional(), + failure: CloudAgentCallbackFailureSchema, kiloSessionId: z.string().optional(), lastSeenBranch: z.string().optional(), lastAssistantMessageText: z.string().optional(), @@ -79,21 +86,11 @@ export function classifyAnalysisCallback( return 'process'; } -export function mapAnalysisCallbackFailure(params: { - status: 'failed' | 'interrupted'; - errorMessage?: string; -}): { errorMessage: string; failureCode: AutoAnalysisFailureCode } { - if (params.status === 'interrupted') { - return { - errorMessage: `Analysis interrupted: ${params.errorMessage ?? 'unknown reason'}`, - failureCode: 'STATE_GUARD_REJECTED', - }; - } - - const errorMessage = params.errorMessage ?? 'Analysis failed'; - const normalized = errorMessage.toLowerCase(); +function classifyAnalysisFailureText(message?: string): AutoAnalysisFailureCode | undefined { + const normalized = message?.toLowerCase(); + if (!normalized) return undefined; if (normalized.includes('timeout') || normalized.includes('timed out')) { - return { errorMessage, failureCode: 'NETWORK_TIMEOUT' }; + return 'NETWORK_TIMEOUT'; } if ( normalized.includes('502') || @@ -102,9 +99,42 @@ export function mapAnalysisCallbackFailure(params: { normalized.includes('upstream') || normalized.includes('5xx') ) { - return { errorMessage, failureCode: 'UPSTREAM_5XX' }; + return 'UPSTREAM_5XX'; } - return { errorMessage, failureCode: 'START_CALL_AMBIGUOUS' }; + return undefined; +} + +function classifyStructuredAnalysisFailure( + failure?: CallbackFailure +): AutoAnalysisFailureCode | undefined { + if ( + failure?.code === 'wrapper_no_output' || + failure?.code === 'wrapper_ping_timeout' || + failure?.subtype?.endsWith('_timeout') + ) { + return 'NETWORK_TIMEOUT'; + } + return classifyAnalysisFailureText(failure?.message); +} + +export function mapAnalysisCallbackFailure(params: { + status: 'failed' | 'interrupted'; + errorMessage?: string; + failure?: CallbackFailure; +}): { errorMessage: string; failureCode: AutoAnalysisFailureCode } { + const errorMessage = params.failure?.message ?? params.errorMessage; + if (params.status === 'interrupted') { + return { + errorMessage: `Analysis interrupted: ${errorMessage ?? 'unknown reason'}`, + failureCode: 'STATE_GUARD_REJECTED', + }; + } + + const failureCode = + classifyStructuredAnalysisFailure(params.failure) ?? + classifyAnalysisFailureText(params.errorMessage) ?? + 'START_CALL_AMBIGUOUS'; + return { errorMessage: errorMessage ?? 'Analysis failed', failureCode }; } type ExtractSandboxAnalysis = (params: { @@ -324,6 +354,7 @@ export async function finalizeFailedAnalysisCallback(params: { ? mapAnalysisCallbackFailure({ status: params.payload.status === 'interrupted' ? 'interrupted' : 'failed', errorMessage: params.payload.errorMessage, + failure: params.payload.failure, }) : null; if (!attemptToken) return { status: disposition }; @@ -353,6 +384,7 @@ export async function finalizeFailedAnalysisCallback(params: { const failure = mapAnalysisCallbackFailure({ status: params.payload.status === 'interrupted' ? 'interrupted' : 'failed', errorMessage: params.payload.errorMessage, + failure: params.payload.failure, }); const lifecycleTransition = await transitionAnalysisCallbackLifecycle(params.db, { findingId: params.findingId, diff --git a/services/webhook-agent-ingest/src/routes/callbacks.test.ts b/services/webhook-agent-ingest/src/routes/callbacks.test.ts index e82e39c6e0..5f5eb35b5e 100644 --- a/services/webhook-agent-ingest/src/routes/callbacks.test.ts +++ b/services/webhook-agent-ingest/src/routes/callbacks.test.ts @@ -63,13 +63,17 @@ function callbackHeaders(headers: CallbackHeaders): HeadersInit { return result; } -async function requestCallback(env: Env, headers: CallbackHeaders) { +async function requestCallback( + env: Env, + headers: CallbackHeaders, + payload: Record = callbackPayload +) { return callbacks.request( '/execution', { method: 'POST', headers: callbackHeaders(headers), - body: JSON.stringify(callbackPayload), + body: JSON.stringify(payload), }, env ); @@ -97,6 +101,95 @@ describe('webhook execution callback auth', () => { ); }); + it('prefers a validated structured failure message over the legacy message', async () => { + const { env, updateRequest } = createRouteHarness(); + const callbackToken = await deriveCallbackToken({ + secret: CALLBACK_SECRET, + scope: 'webhook-execution-callback', + resourceParts: [NAMESPACE, TRIGGER_ID, REQUEST_ID], + }); + + const response = await requestCallback( + env, + { callbackToken }, + { + ...callbackPayload, + status: 'failed', + errorMessage: 'legacy error', + failure: { + stage: 'pre_dispatch', + code: 'workspace_setup_failed', + subtype: 'git_clone_timeout', + attempts: 2, + message: 'Repository clone timed out', + }, + } + ); + + expect(response.status).toBe(200); + expect(updateRequest).toHaveBeenCalledWith( + REQUEST_ID, + expect.objectContaining({ error_message: 'Repository clone timed out' }) + ); + }); + + it('falls back to the legacy error message when failure is absent', async () => { + const { env, updateRequest } = createRouteHarness(); + const callbackToken = await deriveCallbackToken({ + secret: CALLBACK_SECRET, + scope: 'webhook-execution-callback', + resourceParts: [NAMESPACE, TRIGGER_ID, REQUEST_ID], + }); + + const response = await requestCallback( + env, + { callbackToken }, + { + ...callbackPayload, + status: 'failed', + errorMessage: 'legacy error', + } + ); + + expect(response.status).toBe(200); + expect(updateRequest).toHaveBeenCalledWith( + REQUEST_ID, + expect.objectContaining({ error_message: 'legacy error' }) + ); + }); + + it.each([ + { failure: { code: 'future_failure_code' } }, + { failure: { subtype: 'unknown_workspace_failure' } }, + { failure: { extra: true } }, + { failure: { attempts: -1 } }, + { failure: { message: 'x'.repeat(4_097) } }, + ])('discards incompatible failure and uses the legacy message: %o', async payloadExtension => { + const { env, updateRequest } = createRouteHarness(); + const callbackToken = await deriveCallbackToken({ + secret: CALLBACK_SECRET, + scope: 'webhook-execution-callback', + resourceParts: [NAMESPACE, TRIGGER_ID, REQUEST_ID], + }); + + const response = await requestCallback( + env, + { callbackToken }, + { + ...callbackPayload, + status: 'failed', + errorMessage: 'legacy error', + ...payloadExtension, + } + ); + + expect(response.status).toBe(200); + expect(updateRequest).toHaveBeenCalledWith( + REQUEST_ID, + expect.objectContaining({ error_message: 'legacy error' }) + ); + }); + it.each([ { namespace: 'user/tampered', triggerId: TRIGGER_ID, requestId: REQUEST_ID }, { namespace: NAMESPACE, triggerId: 'trigger-tampered', requestId: REQUEST_ID }, diff --git a/services/webhook-agent-ingest/src/routes/callbacks.ts b/services/webhook-agent-ingest/src/routes/callbacks.ts index ffef89e29a..efb8a044d3 100644 --- a/services/webhook-agent-ingest/src/routes/callbacks.ts +++ b/services/webhook-agent-ingest/src/routes/callbacks.ts @@ -3,6 +3,7 @@ import { z } from 'zod'; import type { HonoContext } from '../index'; import { logger } from '../util/logger'; import { resError, resSuccess, verifyCallbackToken } from '@kilocode/worker-utils'; +import { CloudAgentCallbackFailureSchema } from '@kilocode/worker-utils/cloud-agent-failure'; import { withDORetry } from '../util/do-retry'; const callbacks = new Hono(); @@ -13,6 +14,7 @@ const ExecutionCallbackPayloadSchema = z.object({ executionId: z.string(), status: z.enum(['completed', 'failed', 'interrupted']), errorMessage: z.string().optional(), + failure: CloudAgentCallbackFailureSchema, lastSeenBranch: z.string().optional(), kiloSessionId: z.string().optional(), }); @@ -95,7 +97,7 @@ callbacks.post('/execution', async c => { doStub.updateRequest(requestId, { process_status: payload.status === 'completed' ? 'success' : 'failed', completed_at: new Date().toISOString(), - error_message: payload.errorMessage, + error_message: payload.failure?.message ?? payload.errorMessage, }), 'updateRequest' );