diff --git a/apps/sim/app/api/billing/update-cost/route.ts b/apps/sim/app/api/billing/update-cost/route.ts index f01ec13f939..eef3ac31be0 100644 --- a/apps/sim/app/api/billing/update-cost/route.ts +++ b/apps/sim/app/api/billing/update-cost/route.ts @@ -4,7 +4,11 @@ import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { recordUsage } from '@/lib/billing/core/usage-log' import { checkAndBillOverageThreshold } from '@/lib/billing/threshold-billing' +import { BillingRouteOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' import { checkInternalApiKey } from '@/lib/copilot/request/http' +import { withIncomingGoSpan } from '@/lib/copilot/request/otel' import { isBillingEnabled } from '@/lib/core/config/feature-flags' import { type AtomicClaimResult, billingIdempotency } from '@/lib/core/idempotency/service' import { generateRequestId } from '@/lib/core/utils/request' @@ -26,8 +30,28 @@ const UpdateCostSchema = z.object({ /** * POST /api/billing/update-cost * Update user cost with a pre-calculated cost value (internal API key auth required) + * + * Parented under the Go-side `sim.update_cost` span via W3C traceparent + * propagation. Every mothership request that bills should therefore show + * the Go client span AND this Sim server span sharing one trace, with + * the actual usage/overage work nested below. */ export async function POST(req: NextRequest) { + return withIncomingGoSpan( + req.headers, + TraceSpan.CopilotBillingUpdateCost, + { + [TraceAttr.HttpMethod]: 'POST', + [TraceAttr.HttpRoute]: '/api/billing/update-cost', + }, + async (span) => updateCostInner(req, span) + ) +} + +async function updateCostInner( + req: NextRequest, + span: import('@opentelemetry/api').Span +): Promise { const requestId = generateRequestId() const startTime = Date.now() let claim: AtomicClaimResult | null = null @@ -37,6 +61,8 @@ export async function POST(req: NextRequest) { logger.info(`[${requestId}] Update cost request started`) if (!isBillingEnabled) { + span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.BillingDisabled) + span.setAttribute(TraceAttr.HttpStatusCode, 200) return NextResponse.json({ success: true, message: 'Billing disabled, cost update skipped', @@ -52,6 +78,8 @@ export async function POST(req: NextRequest) { const authResult = checkInternalApiKey(req) if (!authResult.success) { logger.warn(`[${requestId}] Authentication failed: ${authResult.error}`) + span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.AuthFailed) + span.setAttribute(TraceAttr.HttpStatusCode, 401) return NextResponse.json( { success: false, @@ -67,8 +95,9 @@ export async function POST(req: NextRequest) { if (!validation.success) { logger.warn(`[${requestId}] Invalid request body`, { errors: validation.error.issues, - body, }) + span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.InvalidBody) + span.setAttribute(TraceAttr.HttpStatusCode, 400) return NextResponse.json( { success: false, @@ -83,6 +112,17 @@ export async function POST(req: NextRequest) { validation.data const isMcp = source === 'mcp_copilot' + span.setAttributes({ + [TraceAttr.UserId]: userId, + [TraceAttr.GenAiRequestModel]: model, + [TraceAttr.BillingSource]: source, + [TraceAttr.BillingCostUsd]: cost, + [TraceAttr.GenAiUsageInputTokens]: inputTokens, + [TraceAttr.GenAiUsageOutputTokens]: outputTokens, + [TraceAttr.BillingIsMcp]: isMcp, + ...(idempotencyKey ? { [TraceAttr.BillingIdempotencyKey]: idempotencyKey } : {}), + }) + claim = idempotencyKey ? await billingIdempotency.atomicallyClaim('update-cost', idempotencyKey) : null @@ -93,6 +133,8 @@ export async function POST(req: NextRequest) { userId, source, }) + span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.DuplicateIdempotencyKey) + span.setAttribute(TraceAttr.HttpStatusCode, 409) return NextResponse.json( { success: false, @@ -157,6 +199,9 @@ export async function POST(req: NextRequest) { cost, }) + span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.Billed) + span.setAttribute(TraceAttr.HttpStatusCode, 200) + span.setAttribute(TraceAttr.BillingDurationMs, duration) return NextResponse.json({ success: true, data: { @@ -191,6 +236,9 @@ export async function POST(req: NextRequest) { ) } + span.setAttribute(TraceAttr.BillingOutcome, BillingRouteOutcome.InternalError) + span.setAttribute(TraceAttr.HttpStatusCode, 500) + span.setAttribute(TraceAttr.BillingDurationMs, duration) return NextResponse.json( { success: false, diff --git a/apps/sim/app/api/copilot/api-keys/generate/route.ts b/apps/sim/app/api/copilot/api-keys/generate/route.ts index 27971cede75..950bb9f16cb 100644 --- a/apps/sim/app/api/copilot/api-keys/generate/route.ts +++ b/apps/sim/app/api/copilot/api-keys/generate/route.ts @@ -2,6 +2,8 @@ import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { getSession } from '@/lib/auth' import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { fetchGo } from '@/lib/copilot/request/go/fetch' import { env } from '@/lib/core/config/env' const GenerateApiKeySchema = z.object({ @@ -32,13 +34,16 @@ export async function POST(req: NextRequest) { const { name } = validationResult.data - const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/generate`, { + const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/generate`, { method: 'POST', headers: { 'Content-Type': 'application/json', ...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}), }, body: JSON.stringify({ userId, name }), + spanName: 'sim → go /api/validate-key/generate', + operation: 'generate_api_key', + attributes: { [TraceAttr.UserId]: userId }, }) if (!res.ok) { diff --git a/apps/sim/app/api/copilot/api-keys/route.ts b/apps/sim/app/api/copilot/api-keys/route.ts index 02d0d5be2b0..89a88d381dc 100644 --- a/apps/sim/app/api/copilot/api-keys/route.ts +++ b/apps/sim/app/api/copilot/api-keys/route.ts @@ -1,6 +1,8 @@ import { type NextRequest, NextResponse } from 'next/server' import { getSession } from '@/lib/auth' import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { fetchGo } from '@/lib/copilot/request/go/fetch' import { env } from '@/lib/core/config/env' export async function GET(request: NextRequest) { @@ -12,13 +14,16 @@ export async function GET(request: NextRequest) { const userId = session.user.id - const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/get-api-keys`, { + const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/get-api-keys`, { method: 'POST', headers: { 'Content-Type': 'application/json', ...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}), }, body: JSON.stringify({ userId }), + spanName: 'sim → go /api/validate-key/get-api-keys', + operation: 'get_api_keys', + attributes: { [TraceAttr.UserId]: userId }, }) if (!res.ok) { @@ -66,13 +71,16 @@ export async function DELETE(request: NextRequest) { return NextResponse.json({ error: 'id is required' }, { status: 400 }) } - const res = await fetch(`${SIM_AGENT_API_URL}/api/validate-key/delete`, { + const res = await fetchGo(`${SIM_AGENT_API_URL}/api/validate-key/delete`, { method: 'POST', headers: { 'Content-Type': 'application/json', ...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}), }, body: JSON.stringify({ userId, apiKeyId: id }), + spanName: 'sim → go /api/validate-key/delete', + operation: 'delete_api_key', + attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ApiKeyId]: id }, }) if (!res.ok) { diff --git a/apps/sim/app/api/copilot/api-keys/validate/route.ts b/apps/sim/app/api/copilot/api-keys/validate/route.ts index 1c1df540132..fffc2b9e534 100644 --- a/apps/sim/app/api/copilot/api-keys/validate/route.ts +++ b/apps/sim/app/api/copilot/api-keys/validate/route.ts @@ -5,7 +5,11 @@ import { eq } from 'drizzle-orm' import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { checkServerSideUsageLimits } from '@/lib/billing/calculations/usage-monitor' +import { CopilotValidateOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' import { checkInternalApiKey } from '@/lib/copilot/request/http' +import { withIncomingGoSpan } from '@/lib/copilot/request/otel' const logger = createLogger('CopilotApiKeysValidate') @@ -14,54 +18,86 @@ const ValidateApiKeySchema = z.object({ }) export async function POST(req: NextRequest) { - try { - const auth = checkInternalApiKey(req) - if (!auth.success) { - return new NextResponse(null, { status: 401 }) - } - - const body = await req.json().catch(() => null) - - const validationResult = ValidateApiKeySchema.safeParse(body) + // Incoming-from-Go: extracts traceparent so this handler's work shows + // up as a child of the Go-side `sim.validate_api_key` span in the same + // trace. If there's no traceparent (manual curl / browser), the helper + // falls back to a new root span. + return withIncomingGoSpan( + req.headers, + TraceSpan.CopilotAuthValidateApiKey, + { + [TraceAttr.HttpMethod]: 'POST', + [TraceAttr.HttpRoute]: '/api/copilot/api-keys/validate', + }, + async (span) => { + try { + const auth = checkInternalApiKey(req) + if (!auth.success) { + span.setAttribute( + TraceAttr.CopilotValidateOutcome, + CopilotValidateOutcome.InternalAuthFailed + ) + span.setAttribute(TraceAttr.HttpStatusCode, 401) + return new NextResponse(null, { status: 401 }) + } - if (!validationResult.success) { - logger.warn('Invalid validation request', { errors: validationResult.error.errors }) - return NextResponse.json( - { - error: 'userId is required', - details: validationResult.error.errors, - }, - { status: 400 } - ) - } + const body = await req.json().catch(() => null) + const validationResult = ValidateApiKeySchema.safeParse(body) + if (!validationResult.success) { + logger.warn('Invalid validation request', { errors: validationResult.error.errors }) + span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.InvalidBody) + span.setAttribute(TraceAttr.HttpStatusCode, 400) + return NextResponse.json( + { + error: 'userId is required', + details: validationResult.error.errors, + }, + { status: 400 } + ) + } - const { userId } = validationResult.data + const { userId } = validationResult.data + span.setAttribute(TraceAttr.UserId, userId) - const [existingUser] = await db.select().from(user).where(eq(user.id, userId)).limit(1) - if (!existingUser) { - logger.warn('[API VALIDATION] userId does not exist', { userId }) - return NextResponse.json({ error: 'User not found' }, { status: 403 }) - } + const [existingUser] = await db.select().from(user).where(eq(user.id, userId)).limit(1) + if (!existingUser) { + logger.warn('[API VALIDATION] userId does not exist', { userId }) + span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.UserNotFound) + span.setAttribute(TraceAttr.HttpStatusCode, 403) + return NextResponse.json({ error: 'User not found' }, { status: 403 }) + } - logger.info('[API VALIDATION] Validating usage limit', { userId }) + logger.info('[API VALIDATION] Validating usage limit', { userId }) + const { isExceeded, currentUsage, limit } = await checkServerSideUsageLimits(userId) + span.setAttributes({ + [TraceAttr.BillingUsageCurrent]: currentUsage, + [TraceAttr.BillingUsageLimit]: limit, + [TraceAttr.BillingUsageExceeded]: isExceeded, + }) - const { isExceeded, currentUsage, limit } = await checkServerSideUsageLimits(userId) + logger.info('[API VALIDATION] Usage limit validated', { + userId, + currentUsage, + limit, + isExceeded, + }) - logger.info('[API VALIDATION] Usage limit validated', { - userId, - currentUsage, - limit, - isExceeded, - }) + if (isExceeded) { + logger.info('[API VALIDATION] Usage exceeded', { userId, currentUsage, limit }) + span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.UsageExceeded) + span.setAttribute(TraceAttr.HttpStatusCode, 402) + return new NextResponse(null, { status: 402 }) + } - if (isExceeded) { - logger.info('[API VALIDATION] Usage exceeded', { userId, currentUsage, limit }) - return new NextResponse(null, { status: 402 }) + span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.Ok) + span.setAttribute(TraceAttr.HttpStatusCode, 200) + return new NextResponse(null, { status: 200 }) + } catch (error) { + logger.error('Error validating usage limit', { error }) + span.setAttribute(TraceAttr.CopilotValidateOutcome, CopilotValidateOutcome.InternalError) + span.setAttribute(TraceAttr.HttpStatusCode, 500) + return NextResponse.json({ error: 'Failed to validate usage' }, { status: 500 }) + } } - - return new NextResponse(null, { status: 200 }) - } catch (error) { - logger.error('Error validating usage limit', { error }) - return NextResponse.json({ error: 'Failed to validate usage' }, { status: 500 }) - } + ) } diff --git a/apps/sim/app/api/copilot/auto-allowed-tools/route.ts b/apps/sim/app/api/copilot/auto-allowed-tools/route.ts index 61343d7541b..a00fd80e5bc 100644 --- a/apps/sim/app/api/copilot/auto-allowed-tools/route.ts +++ b/apps/sim/app/api/copilot/auto-allowed-tools/route.ts @@ -2,6 +2,8 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { getSession } from '@/lib/auth' import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { fetchGo } from '@/lib/copilot/request/go/fetch' import { env } from '@/lib/core/config/env' const logger = createLogger('CopilotAutoAllowedToolsAPI') @@ -30,9 +32,15 @@ export async function GET() { const userId = session.user.id - const res = await fetch( + const res = await fetchGo( `${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed?userId=${encodeURIComponent(userId)}`, - { method: 'GET', headers: copilotHeaders() } + { + method: 'GET', + headers: copilotHeaders(), + spanName: 'sim → go /api/tool-preferences/auto-allowed', + operation: 'list_auto_allowed_tools', + attributes: { [TraceAttr.UserId]: userId }, + } ) if (!res.ok) { @@ -66,10 +74,13 @@ export async function POST(request: NextRequest) { return NextResponse.json({ error: 'toolId must be a string' }, { status: 400 }) } - const res = await fetch(`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed`, { + const res = await fetchGo(`${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed`, { method: 'POST', headers: copilotHeaders(), body: JSON.stringify({ userId, toolId: body.toolId }), + spanName: 'sim → go /api/tool-preferences/auto-allowed', + operation: 'add_auto_allowed_tool', + attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ToolId]: body.toolId }, }) if (!res.ok) { @@ -107,9 +118,15 @@ export async function DELETE(request: NextRequest) { return NextResponse.json({ error: 'toolId query parameter is required' }, { status: 400 }) } - const res = await fetch( + const res = await fetchGo( `${SIM_AGENT_API_URL}/api/tool-preferences/auto-allowed?userId=${encodeURIComponent(userId)}&toolId=${encodeURIComponent(toolId)}`, - { method: 'DELETE', headers: copilotHeaders() } + { + method: 'DELETE', + headers: copilotHeaders(), + spanName: 'sim → go /api/tool-preferences/auto-allowed', + operation: 'remove_auto_allowed_tool', + attributes: { [TraceAttr.UserId]: userId, [TraceAttr.ToolId]: toolId }, + } ) if (!res.ok) { diff --git a/apps/sim/app/api/copilot/chat/abort/route.ts b/apps/sim/app/api/copilot/chat/abort/route.ts index 375065eb418..971c00193da 100644 --- a/apps/sim/app/api/copilot/chat/abort/route.ts +++ b/apps/sim/app/api/copilot/chat/abort/route.ts @@ -2,7 +2,12 @@ import { createLogger } from '@sim/logger' import { NextResponse } from 'next/server' import { getLatestRunForStream } from '@/lib/copilot/async-runs/repository' import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { CopilotAbortOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { fetchGo } from '@/lib/copilot/request/go/fetch' import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http' +import { withCopilotSpan, withIncomingGoSpan } from '@/lib/copilot/request/otel' import { abortActiveStream, waitForPendingChatStream } from '@/lib/copilot/request/session' import { env } from '@/lib/core/config/env' @@ -10,81 +15,136 @@ const logger = createLogger('CopilotChatAbortAPI') const GO_EXPLICIT_ABORT_TIMEOUT_MS = 3000 const STREAM_ABORT_SETTLE_TIMEOUT_MS = 8000 +// POST /api/copilot/chat/abort — fires on user Stop; marks the Go +// side aborted then waits for the prior stream to settle. export async function POST(request: Request) { - const { userId: authenticatedUserId, isAuthenticated } = - await authenticateCopilotRequestSessionOnly() + return withIncomingGoSpan( + request.headers, + TraceSpan.CopilotChatAbortStream, + undefined, + async (rootSpan) => { + const { userId: authenticatedUserId, isAuthenticated } = + await authenticateCopilotRequestSessionOnly() - if (!isAuthenticated || !authenticatedUserId) { - return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) - } + if (!isAuthenticated || !authenticatedUserId) { + rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.Unauthorized) + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) + } - const body = await request.json().catch((err) => { - logger.warn('Abort request body parse failed; continuing with empty object', { - error: err instanceof Error ? err.message : String(err), - }) - return {} - }) - const streamId = typeof body.streamId === 'string' ? body.streamId : '' - let chatId = typeof body.chatId === 'string' ? body.chatId : '' - - if (!streamId) { - return NextResponse.json({ error: 'streamId is required' }, { status: 400 }) - } + const body = await request.json().catch((err) => { + logger.warn('Abort request body parse failed; continuing with empty object', { + error: err instanceof Error ? err.message : String(err), + }) + return {} + }) + const streamId = typeof body.streamId === 'string' ? body.streamId : '' + let chatId = typeof body.chatId === 'string' ? body.chatId : '' - if (!chatId) { - const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => { - logger.warn('getLatestRunForStream failed while resolving chatId for abort', { - streamId, - error: err instanceof Error ? err.message : String(err), + if (!streamId) { + rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.MissingStreamId) + return NextResponse.json({ error: 'streamId is required' }, { status: 400 }) + } + rootSpan.setAttributes({ + [TraceAttr.StreamId]: streamId, + [TraceAttr.UserId]: authenticatedUserId, }) - return null - }) - if (run?.chatId) { - chatId = run.chatId - } - } - try { - const headers: Record = { 'Content-Type': 'application/json' } - if (env.COPILOT_API_KEY) { - headers['x-api-key'] = env.COPILOT_API_KEY - } - const controller = new AbortController() - const timeout = setTimeout( - () => controller.abort('timeout:go_explicit_abort_fetch'), - GO_EXPLICIT_ABORT_TIMEOUT_MS - ) - const response = await fetch(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, { - method: 'POST', - headers, - signal: controller.signal, - body: JSON.stringify({ - messageId: streamId, - userId: authenticatedUserId, - ...(chatId ? { chatId } : {}), - }), - }).finally(() => clearTimeout(timeout)) - if (!response.ok) { - throw new Error(`Explicit abort marker request failed: ${response.status}`) - } - } catch (err) { - logger.warn('Explicit abort marker request failed; proceeding with local abort', { - streamId, - error: err instanceof Error ? err.message : String(err), - }) - } + if (!chatId) { + const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => { + logger.warn('getLatestRunForStream failed while resolving chatId for abort', { + streamId, + error: err instanceof Error ? err.message : String(err), + }) + return null + }) + if (run?.chatId) { + chatId = run.chatId + } + } + if (chatId) rootSpan.setAttribute(TraceAttr.ChatId, chatId) - const aborted = await abortActiveStream(streamId) - if (chatId) { - const settled = await waitForPendingChatStream(chatId, STREAM_ABORT_SETTLE_TIMEOUT_MS, streamId) - if (!settled) { - return NextResponse.json( - { error: 'Previous response is still shutting down', aborted, settled: false }, - { status: 409 } - ) - } - return NextResponse.json({ aborted, settled: true }) - } + // Local abort before Go — lets the lifecycle classifier see + // `signal.aborted` with an explicit-stop reason before Go's + // context-canceled error propagates back. Go's endpoint runs + // second for billing-ledger flush; Go's context is already + // cancelled by then. + const aborted = await abortActiveStream(streamId) + rootSpan.setAttribute(TraceAttr.CopilotAbortLocalAborted, aborted) + + let goAbortOk = false + try { + const headers: Record = { 'Content-Type': 'application/json' } + if (env.COPILOT_API_KEY) { + headers['x-api-key'] = env.COPILOT_API_KEY + } + const controller = new AbortController() + const timeout = setTimeout( + () => controller.abort('timeout:go_explicit_abort_fetch'), + GO_EXPLICIT_ABORT_TIMEOUT_MS + ) + const response = await fetchGo(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, { + method: 'POST', + headers, + signal: controller.signal, + body: JSON.stringify({ + messageId: streamId, + userId: authenticatedUserId, + ...(chatId ? { chatId } : {}), + }), + spanName: 'sim → go /api/streams/explicit-abort', + operation: 'explicit_abort', + attributes: { + [TraceAttr.StreamId]: streamId, + ...(chatId ? { [TraceAttr.ChatId]: chatId } : {}), + }, + }).finally(() => clearTimeout(timeout)) + if (!response.ok) { + throw new Error(`Explicit abort marker request failed: ${response.status}`) + } + goAbortOk = true + } catch (err) { + logger.warn('Explicit abort marker request failed after local abort', { + streamId, + error: err instanceof Error ? err.message : String(err), + }) + } + rootSpan.setAttribute(TraceAttr.CopilotAbortGoMarkerOk, goAbortOk) - return NextResponse.json({ aborted }) + if (chatId) { + const settled = await withCopilotSpan( + TraceSpan.CopilotChatAbortWaitSettle, + { + [TraceAttr.ChatId]: chatId, + [TraceAttr.StreamId]: streamId, + [TraceAttr.SettleTimeoutMs]: STREAM_ABORT_SETTLE_TIMEOUT_MS, + }, + async (settleSpan) => { + const start = Date.now() + const ok = await waitForPendingChatStream( + chatId, + STREAM_ABORT_SETTLE_TIMEOUT_MS, + streamId + ) + settleSpan.setAttributes({ + [TraceAttr.SettleWaitMs]: Date.now() - start, + [TraceAttr.SettleCompleted]: ok, + }) + return ok + } + ) + if (!settled) { + rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.SettleTimeout) + return NextResponse.json( + { error: 'Previous response is still shutting down', aborted, settled: false }, + { status: 409 } + ) + } + rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.Settled) + return NextResponse.json({ aborted, settled: true }) + } + + rootSpan.setAttribute(TraceAttr.CopilotAbortOutcome, CopilotAbortOutcome.NoChatId) + return NextResponse.json({ aborted }) + } + ) } diff --git a/apps/sim/app/api/copilot/chat/stop/route.ts b/apps/sim/app/api/copilot/chat/stop/route.ts index 05e5935aa40..0505582e8de 100644 --- a/apps/sim/app/api/copilot/chat/stop/route.ts +++ b/apps/sim/app/api/copilot/chat/stop/route.ts @@ -6,6 +6,10 @@ import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { getSession } from '@/lib/auth' import { normalizeMessage, type PersistedMessage } from '@/lib/copilot/chat/persisted-message' +import { CopilotStopOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { withIncomingGoSpan } from '@/lib/copilot/request/otel' import { taskPubSub } from '@/lib/copilot/tasks' import { generateId } from '@/lib/core/utils/uuid' @@ -54,95 +58,126 @@ const StopSchema = z.object({ streamId: z.string(), content: z.string(), contentBlocks: z.array(ContentBlockSchema).optional(), + // Optional for older clients; when present, flows into msg.requestId + // so the UI's copy-request-ID button survives a stopped turn. + requestId: z.string().optional(), }) -/** - * POST /api/copilot/chat/stop - * Persists partial assistant content when the user stops a stream mid-response. - * Clears conversationId so the server-side onComplete won't duplicate the message. - * The chat stream lock is intentionally left alone here; it is released only once - * the aborted server stream actually unwinds. - */ +// POST /api/copilot/chat/stop — persists partial assistant content +// when the user stops mid-stream. Lock release is handled by the +// aborted server stream unwinding, not this handler. export async function POST(req: NextRequest) { - try { - const session = await getSession() - if (!session?.user?.id) { - return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) - } + return withIncomingGoSpan( + req.headers, + TraceSpan.CopilotChatStopStream, + undefined, + async (span) => { + try { + const session = await getSession() + if (!session?.user?.id) { + span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.Unauthorized) + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) + } - const { chatId, streamId, content, contentBlocks } = StopSchema.parse(await req.json()) - const [row] = await db - .select({ - workspaceId: copilotChats.workspaceId, - messages: copilotChats.messages, - }) - .from(copilotChats) - .where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, session.user.id))) - .limit(1) + const { chatId, streamId, content, contentBlocks, requestId } = StopSchema.parse( + await req.json() + ) + span.setAttributes({ + [TraceAttr.ChatId]: chatId, + [TraceAttr.StreamId]: streamId, + [TraceAttr.UserId]: session.user.id, + [TraceAttr.CopilotStopContentLength]: content.length, + [TraceAttr.CopilotStopBlocksCount]: contentBlocks?.length ?? 0, + ...(requestId ? { [TraceAttr.RequestId]: requestId } : {}), + }) - if (!row) { - return NextResponse.json({ success: true }) - } + const [row] = await db + .select({ + workspaceId: copilotChats.workspaceId, + messages: copilotChats.messages, + }) + .from(copilotChats) + .where(and(eq(copilotChats.id, chatId), eq(copilotChats.userId, session.user.id))) + .limit(1) - const messages: Record[] = Array.isArray(row.messages) ? row.messages : [] - const userIdx = messages.findIndex((message) => message.id === streamId) - const alreadyHasResponse = - userIdx >= 0 && - userIdx + 1 < messages.length && - (messages[userIdx + 1] as Record)?.role === 'assistant' - const canAppendAssistant = - userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse + if (!row) { + span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.ChatNotFound) + return NextResponse.json({ success: true }) + } - const updateWhere = and( - eq(copilotChats.id, chatId), - eq(copilotChats.userId, session.user.id), - eq(copilotChats.conversationId, streamId) - ) + const messages: Record[] = Array.isArray(row.messages) ? row.messages : [] + const userIdx = messages.findIndex((message) => message.id === streamId) + const alreadyHasResponse = + userIdx >= 0 && + userIdx + 1 < messages.length && + (messages[userIdx + 1] as Record)?.role === 'assistant' + const canAppendAssistant = + userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse - const setClause: Record = { - conversationId: null, - updatedAt: new Date(), - } + const updateWhere = and( + eq(copilotChats.id, chatId), + eq(copilotChats.userId, session.user.id), + eq(copilotChats.conversationId, streamId) + ) - const hasContent = content.trim().length > 0 - const hasBlocks = Array.isArray(contentBlocks) && contentBlocks.length > 0 - const synthesizedStoppedBlocks = hasBlocks - ? contentBlocks - : hasContent - ? [{ type: 'text', channel: 'assistant', content }, { type: 'stopped' }] - : [{ type: 'stopped' }] - if (canAppendAssistant) { - const normalized = normalizeMessage({ - id: generateId(), - role: 'assistant', - content, - timestamp: new Date().toISOString(), - contentBlocks: synthesizedStoppedBlocks, - }) - const assistantMessage: PersistedMessage = normalized - setClause.messages = sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb` - } + const setClause: Record = { + conversationId: null, + updatedAt: new Date(), + } - const [updated] = await db - .update(copilotChats) - .set(setClause) - .where(updateWhere) - .returning({ workspaceId: copilotChats.workspaceId }) + const hasContent = content.trim().length > 0 + const hasBlocks = Array.isArray(contentBlocks) && contentBlocks.length > 0 + const synthesizedStoppedBlocks = hasBlocks + ? contentBlocks + : hasContent + ? [{ type: 'text', channel: 'assistant', content }, { type: 'stopped' }] + : [{ type: 'stopped' }] + if (canAppendAssistant) { + const normalized = normalizeMessage({ + id: generateId(), + role: 'assistant', + content, + timestamp: new Date().toISOString(), + contentBlocks: synthesizedStoppedBlocks, + // Persist so the UI copy-request-id button survives refetch. + ...(requestId ? { requestId } : {}), + }) + const assistantMessage: PersistedMessage = normalized + setClause.messages = sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb` + } + span.setAttribute(TraceAttr.CopilotStopAppendedAssistant, canAppendAssistant) - if (updated?.workspaceId) { - taskPubSub?.publishStatusChanged({ - workspaceId: updated.workspaceId, - chatId, - type: 'completed', - }) - } + const [updated] = await db + .update(copilotChats) + .set(setClause) + .where(updateWhere) + .returning({ workspaceId: copilotChats.workspaceId }) + + if (updated?.workspaceId) { + taskPubSub?.publishStatusChanged({ + workspaceId: updated.workspaceId, + chatId, + type: 'completed', + }) + } - return NextResponse.json({ success: true }) - } catch (error) { - if (error instanceof z.ZodError) { - return NextResponse.json({ error: 'Invalid request' }, { status: 400 }) + span.setAttribute( + TraceAttr.CopilotStopOutcome, + updated ? CopilotStopOutcome.Persisted : CopilotStopOutcome.NoMatchingRow + ) + return NextResponse.json({ success: true }) + } catch (error) { + if (error instanceof z.ZodError) { + span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.ValidationError) + return NextResponse.json( + { error: 'Invalid request data', details: error.errors }, + { status: 400 } + ) + } + logger.error('Error stopping chat stream:', error) + span.setAttribute(TraceAttr.CopilotStopOutcome, CopilotStopOutcome.InternalError) + return NextResponse.json({ error: 'Internal server error' }, { status: 500 }) + } } - logger.error('Error stopping chat stream:', error) - return NextResponse.json({ error: 'Internal server error' }, { status: 500 }) - } + ) } diff --git a/apps/sim/app/api/copilot/chat/stream/route.test.ts b/apps/sim/app/api/copilot/chat/stream/route.test.ts index 3105f9216ce..7c99617e592 100644 --- a/apps/sim/app/api/copilot/chat/stream/route.test.ts +++ b/apps/sim/app/api/copilot/chat/stream/route.test.ts @@ -167,4 +167,42 @@ describe('copilot chat stream replay route', () => { expect(body).toContain('"code":"resume_run_unavailable"') expect(body).toContain(`"type":"${MothershipStreamV1EventType.complete}"`) }) + + it('uses the latest live request id for synthetic terminal replay events', async () => { + getLatestRunForStream + .mockResolvedValueOnce({ + status: 'active', + executionId: 'exec-1', + id: 'run-1', + }) + .mockResolvedValueOnce({ + status: 'cancelled', + executionId: 'exec-1', + id: 'run-1', + }) + readEvents + .mockResolvedValueOnce([ + { + stream: { streamId: 'stream-1', cursor: '1' }, + seq: 1, + trace: { requestId: 'req-live-123' }, + type: MothershipStreamV1EventType.text, + payload: { + channel: 'assistant', + text: 'hello', + }, + }, + ]) + .mockResolvedValueOnce([]) + + const response = await GET( + new NextRequest('http://localhost:3000/api/copilot/chat/stream?streamId=stream-1&after=0') + ) + + const chunks = await readAllChunks(response) + const terminalChunk = chunks[chunks.length - 1] ?? '' + expect(terminalChunk).toContain(`"type":"${MothershipStreamV1EventType.complete}"`) + expect(terminalChunk).toContain('"requestId":"req-live-123"') + expect(terminalChunk).toContain('"status":"cancelled"') + }) }) diff --git a/apps/sim/app/api/copilot/chat/stream/route.ts b/apps/sim/app/api/copilot/chat/stream/route.ts index 5028ecf7e5e..bb4544c2f24 100644 --- a/apps/sim/app/api/copilot/chat/stream/route.ts +++ b/apps/sim/app/api/copilot/chat/stream/route.ts @@ -1,3 +1,4 @@ +import { context as otelContext, trace } from '@opentelemetry/api' import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { getLatestRunForStream } from '@/lib/copilot/async-runs/repository' @@ -5,7 +6,15 @@ import { MothershipStreamV1CompletionStatus, MothershipStreamV1EventType, } from '@/lib/copilot/generated/mothership-stream-v1' +import { + CopilotResumeOutcome, + CopilotTransport, +} from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { contextFromRequestHeaders } from '@/lib/copilot/request/go/propagation' import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http' +import { getCopilotTracer, markSpanForError } from '@/lib/copilot/request/otel' import { checkForReplayGap, createEvent, @@ -22,6 +31,25 @@ const logger = createLogger('CopilotChatStreamAPI') const POLL_INTERVAL_MS = 250 const MAX_STREAM_MS = 60 * 60 * 1000 +function extractCanonicalRequestId(value: unknown): string { + return typeof value === 'string' && value.length > 0 ? value : '' +} + +function extractRunRequestId(run: { requestContext?: unknown } | null | undefined): string { + if (!run || typeof run.requestContext !== 'object' || run.requestContext === null) { + return '' + } + const requestContext = run.requestContext as Record + return ( + extractCanonicalRequestId(requestContext.requestId) || + extractCanonicalRequestId(requestContext.simRequestId) + ) +} + +function extractEnvelopeRequestId(envelope: { trace?: { requestId?: unknown } }): string { + return extractCanonicalRequestId(envelope.trace?.requestId) +} + function isTerminalStatus( status: string | null | undefined ): status is MothershipStreamV1CompletionStatus { @@ -39,10 +67,12 @@ function buildResumeTerminalEnvelopes(options: { message?: string code: string reason?: string + requestId?: string }) { const baseSeq = Number(options.afterCursor || '0') const seq = Number.isFinite(baseSeq) ? baseSeq : 0 const envelopes: ReturnType[] = [] + const rid = options.requestId ?? '' if (options.status === MothershipStreamV1CompletionStatus.error) { envelopes.push( @@ -50,7 +80,7 @@ function buildResumeTerminalEnvelopes(options: { streamId: options.streamId, cursor: String(seq + 1), seq: seq + 1, - requestId: '', + requestId: rid, type: MothershipStreamV1EventType.error, payload: { message: options.message || 'Stream recovery failed before completion.', @@ -65,7 +95,7 @@ function buildResumeTerminalEnvelopes(options: { streamId: options.streamId, cursor: String(seq + envelopes.length + 1), seq: seq + envelopes.length + 1, - requestId: '', + requestId: rid, type: MothershipStreamV1EventType.complete, payload: { status: options.status, @@ -94,6 +124,77 @@ export async function GET(request: NextRequest) { return NextResponse.json({ error: 'streamId is required' }, { status: 400 }) } + // Root span for the whole resume/reconnect request. In stream mode the + // work happens inside `ReadableStream.start`, which the Node runtime + // invokes after this function returns and OUTSIDE the AsyncLocalStorage + // scope installed by `startActiveSpan`. We therefore start the span + // manually, capture its context, and re-enter that context inside the + // stream callback so every nested `withCopilotSpan` / `withDbSpan` call + // attaches to this root. + // + // `contextFromRequestHeaders` extracts the W3C `traceparent` the + // client echoed (set via `streamTraceparentRef` on Sim's chat POST + // response), so the resume span becomes a child of the original + // chat's `gen_ai.agent.execute` trace instead of a disconnected + // new root. On reconnects after page reload (client ref was wiped) + // the header is absent and extraction leaves the ambient context + // alone → the resume span becomes its own root. Same as pre- + // linking behavior; no regression. + const incomingContext = contextFromRequestHeaders(request.headers) + const rootSpan = getCopilotTracer().startSpan( + TraceSpan.CopilotResumeRequest, + { + attributes: { + [TraceAttr.CopilotTransport]: batchMode ? CopilotTransport.Batch : CopilotTransport.Stream, + [TraceAttr.StreamId]: streamId, + [TraceAttr.UserId]: authenticatedUserId, + [TraceAttr.CopilotResumeAfterCursor]: afterCursor || '0', + }, + }, + incomingContext + ) + const rootContext = trace.setSpan(incomingContext, rootSpan) + + try { + return await otelContext.with(rootContext, () => + handleResumeRequestBody({ + request, + streamId, + afterCursor, + batchMode, + authenticatedUserId, + rootSpan, + rootContext, + }) + ) + } catch (err) { + rootSpan.setStatus({ + code: SpanStatusCode.ERROR, + message: err instanceof Error ? err.message : String(err), + }) + rootSpan.recordException(err instanceof Error ? err : new Error(String(err))) + rootSpan.end() + throw err + } +} + +async function handleResumeRequestBody({ + request, + streamId, + afterCursor, + batchMode, + authenticatedUserId, + rootSpan, + rootContext, +}: { + request: NextRequest + streamId: string + afterCursor: string + batchMode: boolean + authenticatedUserId: string + rootSpan: import('@opentelemetry/api').Span + rootContext: import('@opentelemetry/api').Context +}) { const run = await getLatestRunForStream(streamId, authenticatedUserId).catch((err) => { logger.warn('Failed to fetch latest run for stream', { streamId, @@ -109,8 +210,11 @@ export async function GET(request: NextRequest) { runStatus: run?.status, }) if (!run) { + rootSpan.setAttribute(TraceAttr.CopilotResumeOutcome, CopilotResumeOutcome.StreamNotFound) + rootSpan.end() return NextResponse.json({ error: 'Stream not found' }, { status: 404 }) } + rootSpan.setAttribute(TraceAttr.CopilotRunStatus, run.status) if (batchMode) { const afterSeq = afterCursor || '0' @@ -132,6 +236,12 @@ export async function GET(request: NextRequest) { previewSessionCount: previewSessions.length, runStatus: run.status, }) + rootSpan.setAttributes({ + [TraceAttr.CopilotResumeOutcome]: CopilotResumeOutcome.BatchDelivered, + [TraceAttr.CopilotResumeEventCount]: batchEvents.length, + [TraceAttr.CopilotResumePreviewSessionCount]: previewSessions.length, + }) + rootSpan.end() return NextResponse.json({ success: true, events: batchEvents, @@ -141,165 +251,203 @@ export async function GET(request: NextRequest) { } const startTime = Date.now() + let totalEventsFlushed = 0 + let pollIterations = 0 const stream = new ReadableStream({ async start(controller) { - let cursor = afterCursor || '0' - let controllerClosed = false - let sawTerminalEvent = false + // Re-enter the root OTel context so any `withCopilotSpan` call below + // (inside flushEvents/checkForReplayGap/etc.) parents under + // copilot.resume.request instead of becoming an orphan. + return otelContext.with(rootContext, () => startInner(controller)) + }, + }) - const closeController = () => { - if (controllerClosed) return - controllerClosed = true - try { - controller.close() - } catch { - // Controller already closed by runtime/client - } - } + async function startInner(controller: ReadableStreamDefaultController) { + let cursor = afterCursor || '0' + let controllerClosed = false + let sawTerminalEvent = false + let currentRequestId = extractRunRequestId(run) + // Stamp the logical request id + chat id on the resume root as soon + // as we resolve them from the run row, so TraceQL joins work on + // resume legs the same way they do on the original POST. + if (currentRequestId) { + rootSpan.setAttribute(TraceAttr.RequestId, currentRequestId) + rootSpan.setAttribute(TraceAttr.SimRequestId, currentRequestId) + } + if (run?.chatId) { + rootSpan.setAttribute(TraceAttr.ChatId, run.chatId) + } - const enqueueEvent = (payload: unknown) => { - if (controllerClosed) return false - try { - controller.enqueue(encodeSSEEnvelope(payload)) - return true - } catch { - controllerClosed = true - return false - } + const closeController = () => { + if (controllerClosed) return + controllerClosed = true + try { + controller.close() + } catch { + // Controller already closed by runtime/client } + } - const abortListener = () => { + const enqueueEvent = (payload: unknown) => { + if (controllerClosed) return false + try { + controller.enqueue(encodeSSEEnvelope(payload)) + return true + } catch { controllerClosed = true + return false } - request.signal.addEventListener('abort', abortListener, { once: true }) - - const flushEvents = async () => { - const events = await readEvents(streamId, cursor) - if (events.length > 0) { - logger.info('[Resume] Flushing events', { - streamId, - afterCursor: cursor, - eventCount: events.length, - }) + } + + const abortListener = () => { + controllerClosed = true + } + request.signal.addEventListener('abort', abortListener, { once: true }) + + const flushEvents = async () => { + const events = await readEvents(streamId, cursor) + if (events.length > 0) { + totalEventsFlushed += events.length + logger.debug('[Resume] Flushing events', { + streamId, + afterCursor: cursor, + eventCount: events.length, + }) + } + for (const envelope of events) { + cursor = envelope.stream.cursor ?? String(envelope.seq) + currentRequestId = extractEnvelopeRequestId(envelope) || currentRequestId + if (envelope.type === MothershipStreamV1EventType.complete) { + sawTerminalEvent = true } - for (const envelope of events) { - cursor = envelope.stream.cursor ?? String(envelope.seq) - if (envelope.type === MothershipStreamV1EventType.complete) { - sawTerminalEvent = true - } - if (!enqueueEvent(envelope)) { - break - } + if (!enqueueEvent(envelope)) { + break } } + } - const emitTerminalIfMissing = ( - status: MothershipStreamV1CompletionStatus, - options?: { message?: string; code: string; reason?: string } - ) => { - if (controllerClosed || sawTerminalEvent) { - return + const emitTerminalIfMissing = ( + status: MothershipStreamV1CompletionStatus, + options?: { message?: string; code: string; reason?: string } + ) => { + if (controllerClosed || sawTerminalEvent) { + return + } + for (const envelope of buildResumeTerminalEnvelopes({ + streamId, + afterCursor: cursor, + status, + message: options?.message, + code: options?.code ?? 'resume_terminal', + reason: options?.reason, + requestId: currentRequestId, + })) { + cursor = envelope.stream.cursor ?? String(envelope.seq) + if (envelope.type === MothershipStreamV1EventType.complete) { + sawTerminalEvent = true } - for (const envelope of buildResumeTerminalEnvelopes({ - streamId, - afterCursor: cursor, - status, - message: options?.message, - code: options?.code ?? 'resume_terminal', - reason: options?.reason, - })) { - cursor = envelope.stream.cursor ?? String(envelope.seq) - if (envelope.type === MothershipStreamV1EventType.complete) { - sawTerminalEvent = true - } - if (!enqueueEvent(envelope)) { - break - } + if (!enqueueEvent(envelope)) { + break } } + } - try { - const gap = await checkForReplayGap(streamId, afterCursor) - if (gap) { - for (const envelope of gap.envelopes) { - enqueueEvent(envelope) - } - return + try { + const gap = await checkForReplayGap(streamId, afterCursor, currentRequestId) + if (gap) { + for (const envelope of gap.envelopes) { + enqueueEvent(envelope) } + return + } - await flushEvents() + await flushEvents() - while (!controllerClosed && Date.now() - startTime < MAX_STREAM_MS) { - const currentRun = await getLatestRunForStream(streamId, authenticatedUserId).catch( - (err) => { - logger.warn('Failed to poll latest run for stream', { - streamId, - error: err instanceof Error ? err.message : String(err), - }) - return null - } - ) - if (!currentRun) { - emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, { - message: 'The stream could not be recovered because its run metadata is unavailable.', - code: 'resume_run_unavailable', - reason: 'run_unavailable', + while (!controllerClosed && Date.now() - startTime < MAX_STREAM_MS) { + pollIterations += 1 + const currentRun = await getLatestRunForStream(streamId, authenticatedUserId).catch( + (err) => { + logger.warn('Failed to poll latest run for stream', { + streamId, + error: err instanceof Error ? err.message : String(err), }) - break + return null } + ) + if (!currentRun) { + emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, { + message: 'The stream could not be recovered because its run metadata is unavailable.', + code: 'resume_run_unavailable', + reason: 'run_unavailable', + }) + break + } - await flushEvents() - - if (controllerClosed) { - break - } - if (isTerminalStatus(currentRun.status)) { - emitTerminalIfMissing(currentRun.status, { - message: - currentRun.status === MothershipStreamV1CompletionStatus.error - ? typeof currentRun.error === 'string' - ? currentRun.error - : 'The recovered stream ended with an error.' - : undefined, - code: 'resume_terminal_status', - reason: 'terminal_status', - }) - break - } + currentRequestId = extractRunRequestId(currentRun) || currentRequestId - if (request.signal.aborted) { - controllerClosed = true - break - } + await flushEvents() - await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) + if (controllerClosed) { + break } - if (!controllerClosed && Date.now() - startTime >= MAX_STREAM_MS) { - emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, { - message: 'The stream recovery timed out before completion.', - code: 'resume_timeout', - reason: 'timeout', + if (isTerminalStatus(currentRun.status)) { + emitTerminalIfMissing(currentRun.status, { + message: + currentRun.status === MothershipStreamV1CompletionStatus.error + ? typeof currentRun.error === 'string' + ? currentRun.error + : 'The recovered stream ended with an error.' + : undefined, + code: 'resume_terminal_status', + reason: 'terminal_status', }) + break } - } catch (error) { - if (!controllerClosed && !request.signal.aborted) { - logger.warn('Stream replay failed', { - streamId, - error: error instanceof Error ? error.message : String(error), - }) - emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, { - message: 'The stream replay failed before completion.', - code: 'resume_internal', - reason: 'stream_replay_failed', - }) + + if (request.signal.aborted) { + controllerClosed = true + break } - } finally { - request.signal.removeEventListener('abort', abortListener) - closeController() + + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) } - }, - }) + if (!controllerClosed && Date.now() - startTime >= MAX_STREAM_MS) { + emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, { + message: 'The stream recovery timed out before completion.', + code: 'resume_timeout', + reason: 'timeout', + }) + } + } catch (error) { + if (!controllerClosed && !request.signal.aborted) { + logger.warn('Stream replay failed', { + streamId, + error: error instanceof Error ? error.message : String(error), + }) + emitTerminalIfMissing(MothershipStreamV1CompletionStatus.error, { + message: 'The stream replay failed before completion.', + code: 'resume_internal', + reason: 'stream_replay_failed', + }) + } + markSpanForError(rootSpan, error) + } finally { + request.signal.removeEventListener('abort', abortListener) + closeController() + rootSpan.setAttributes({ + [TraceAttr.CopilotResumeOutcome]: sawTerminalEvent + ? CopilotResumeOutcome.TerminalDelivered + : controllerClosed + ? CopilotResumeOutcome.ClientDisconnected + : CopilotResumeOutcome.EndedWithoutTerminal, + [TraceAttr.CopilotResumeEventCount]: totalEventsFlushed, + [TraceAttr.CopilotResumePollIterations]: pollIterations, + [TraceAttr.CopilotResumeDurationMs]: Date.now() - startTime, + }) + rootSpan.end() + } + } return new Response(stream, { headers: SSE_RESPONSE_HEADERS }) } diff --git a/apps/sim/app/api/copilot/confirm/route.ts b/apps/sim/app/api/copilot/confirm/route.ts index 83aea100f6b..aa3f3e90010 100644 --- a/apps/sim/app/api/copilot/confirm/route.ts +++ b/apps/sim/app/api/copilot/confirm/route.ts @@ -13,6 +13,9 @@ import { getRunSegment, upsertAsyncToolCall, } from '@/lib/copilot/async-runs/repository' +import { CopilotConfirmOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' import { publishToolConfirmation } from '@/lib/copilot/persistence/tool-confirm' import { authenticateCopilotRequestSessionOnly, @@ -22,6 +25,7 @@ import { createRequestTracker, createUnauthorizedResponse, } from '@/lib/copilot/request/http' +import { withIncomingGoSpan } from '@/lib/copilot/request/otel' const logger = createLogger('CopilotConfirmAPI') @@ -112,93 +116,112 @@ async function updateToolCallStatus( } } -/** - * POST /api/copilot/confirm - * Accept client tool completion or detach confirmations. - */ +// POST /api/copilot/confirm — delivery path for client-executed tool +// results. Correlate via `toolCallId` when the awaiting chat stream +// stalls. export async function POST(req: NextRequest) { const tracker = createRequestTracker() - try { - // Authenticate user using consolidated helper - const { userId: authenticatedUserId, isAuthenticated } = - await authenticateCopilotRequestSessionOnly() - - if (!isAuthenticated) { - return createUnauthorizedResponse() - } - - const body = await req.json() - const { toolCallId, status, message, data } = ConfirmationSchema.parse(body) - const existing = await getAsyncToolCall(toolCallId).catch((err) => { - logger.warn('Failed to fetch async tool call', { - toolCallId, - error: err instanceof Error ? err.message : String(err), - }) - return null - }) - - if (!existing) { - return createNotFoundResponse('Tool call not found') - } - - const run = await getRunSegment(existing.runId).catch((err) => { - logger.warn('Failed to fetch run segment', { - runId: existing.runId, - error: err instanceof Error ? err.message : String(err), - }) - return null - }) - if (!run) { - return createNotFoundResponse('Tool call run not found') - } - if (run.userId !== authenticatedUserId) { - return NextResponse.json({ error: 'Forbidden' }, { status: 403 }) - } - - // Update the durable tool call status and wake any waiters. - const updated = await updateToolCallStatus(existing, status, message, data) - - if (!updated) { - logger.error(`[${tracker.requestId}] Failed to update tool call status`, { - userId: authenticatedUserId, - toolCallId, - status, - internalStatus: status, - message, - }) - return createBadRequestResponse('Failed to update tool call status or tool call not found') + return withIncomingGoSpan( + req.headers, + TraceSpan.CopilotConfirmToolResult, + { [TraceAttr.RequestId]: tracker.requestId }, + async (span) => { + try { + const { userId: authenticatedUserId, isAuthenticated } = + await authenticateCopilotRequestSessionOnly() + + if (!isAuthenticated || !authenticatedUserId) { + span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Unauthorized) + return createUnauthorizedResponse() + } + + const body = await req.json() + const { toolCallId, status, message, data } = ConfirmationSchema.parse(body) + span.setAttributes({ + [TraceAttr.ToolCallId]: toolCallId, + [TraceAttr.ToolConfirmationStatus]: status, + [TraceAttr.UserId]: authenticatedUserId, + }) + + const existing = await getAsyncToolCall(toolCallId).catch((err) => { + logger.warn('Failed to fetch async tool call', { + toolCallId, + error: err instanceof Error ? err.message : String(err), + }) + return null + }) + + if (!existing) { + span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.ToolCallNotFound) + return createNotFoundResponse('Tool call not found') + } + if (existing.toolName) span.setAttribute(TraceAttr.ToolName, existing.toolName) + if (existing.runId) span.setAttribute(TraceAttr.RunId, existing.runId) + + const run = await getRunSegment(existing.runId).catch((err) => { + logger.warn('Failed to fetch run segment', { + runId: existing.runId, + error: err instanceof Error ? err.message : String(err), + }) + return null + }) + if (!run) { + span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.RunNotFound) + return createNotFoundResponse('Tool call run not found') + } + if (run.userId !== authenticatedUserId) { + span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Forbidden) + return NextResponse.json({ error: 'Forbidden' }, { status: 403 }) + } + + const updated = await updateToolCallStatus(existing, status, message, data) + + if (!updated) { + logger.error(`[${tracker.requestId}] Failed to update tool call status`, { + userId: authenticatedUserId, + toolCallId, + status, + internalStatus: status, + message, + }) + span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.UpdateFailed) + // DB write failed — 500, not 400. 400 is a client-shape error. + return createInternalServerErrorResponse('Failed to update tool call status') + } + + span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.Delivered) + return NextResponse.json({ + success: true, + message: message || `Tool call ${toolCallId} has been ${status.toLowerCase()}`, + toolCallId, + status, + }) + } catch (error) { + const duration = tracker.getDuration() + + if (error instanceof z.ZodError) { + logger.error(`[${tracker.requestId}] Request validation error:`, { + duration, + errors: error.errors, + }) + span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.ValidationError) + return createBadRequestResponse( + `Invalid request data: ${error.errors.map((e) => e.message).join(', ')}` + ) + } + + logger.error(`[${tracker.requestId}] Unexpected error:`, { + duration, + error: error instanceof Error ? error.message : 'Unknown error', + stack: error instanceof Error ? error.stack : undefined, + }) + + span.setAttribute(TraceAttr.CopilotConfirmOutcome, CopilotConfirmOutcome.InternalError) + return createInternalServerErrorResponse( + error instanceof Error ? error.message : 'Internal server error' + ) + } } - - const duration = tracker.getDuration() - - return NextResponse.json({ - success: true, - message: message || `Tool call ${toolCallId} has been ${status.toLowerCase()}`, - toolCallId, - status, - }) - } catch (error) { - const duration = tracker.getDuration() - - if (error instanceof z.ZodError) { - logger.error(`[${tracker.requestId}] Request validation error:`, { - duration, - errors: error.errors, - }) - return createBadRequestResponse( - `Invalid request data: ${error.errors.map((e) => e.message).join(', ')}` - ) - } - - logger.error(`[${tracker.requestId}] Unexpected error:`, { - duration, - error: error instanceof Error ? error.message : 'Unknown error', - stack: error instanceof Error ? error.stack : undefined, - }) - - return createInternalServerErrorResponse( - error instanceof Error ? error.message : 'Internal server error' - ) - } + ) } diff --git a/apps/sim/app/api/copilot/models/route.ts b/apps/sim/app/api/copilot/models/route.ts index 7e23e38df69..8d5a61432a4 100644 --- a/apps/sim/app/api/copilot/models/route.ts +++ b/apps/sim/app/api/copilot/models/route.ts @@ -1,6 +1,7 @@ import { createLogger } from '@sim/logger' import { type NextRequest, NextResponse } from 'next/server' import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { fetchGo } from '@/lib/copilot/request/go/fetch' import { authenticateCopilotRequestSessionOnly } from '@/lib/copilot/request/http' interface AvailableModel { @@ -43,10 +44,12 @@ export async function GET(_req: NextRequest) { } try { - const response = await fetch(`${SIM_AGENT_API_URL}/api/get-available-models`, { + const response = await fetchGo(`${SIM_AGENT_API_URL}/api/get-available-models`, { method: 'GET', headers, cache: 'no-store', + spanName: 'sim → go /api/get-available-models', + operation: 'get_available_models', }) const payload = await response.json().catch(() => ({})) diff --git a/apps/sim/app/api/copilot/stats/route.ts b/apps/sim/app/api/copilot/stats/route.ts index 75ed6d096b1..10d3520bde2 100644 --- a/apps/sim/app/api/copilot/stats/route.ts +++ b/apps/sim/app/api/copilot/stats/route.ts @@ -1,6 +1,7 @@ import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { fetchGo } from '@/lib/copilot/request/go/fetch' import { authenticateCopilotRequestSessionOnly, createBadRequestResponse, @@ -39,13 +40,15 @@ export async function POST(req: NextRequest) { diffAccepted, } - const agentRes = await fetch(`${SIM_AGENT_API_URL}/api/stats`, { + const agentRes = await fetchGo(`${SIM_AGENT_API_URL}/api/stats`, { method: 'POST', headers: { 'Content-Type': 'application/json', ...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}), }, body: JSON.stringify(payload), + spanName: 'sim → go /api/stats', + operation: 'stats_ingest', }) // Prefer not to block clients; still relay status diff --git a/apps/sim/app/api/mcp/copilot/route.ts b/apps/sim/app/api/mcp/copilot/route.ts index f2bc6a2754f..cf8c1d8e170 100644 --- a/apps/sim/app/api/mcp/copilot/route.ts +++ b/apps/sim/app/api/mcp/copilot/route.ts @@ -19,6 +19,7 @@ import { validateOAuthAccessToken } from '@/lib/auth/oauth-token' import { getHighestPrioritySubscription } from '@/lib/billing/core/subscription' import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context' import { ORCHESTRATION_TIMEOUT_MS, SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { createRequestId } from '@/lib/copilot/request/http' import { runHeadlessCopilotLifecycle } from '@/lib/copilot/request/lifecycle/headless' import { orchestrateSubagentStream } from '@/lib/copilot/request/subagent' import { ensureHandlersRegistered, executeTool } from '@/lib/copilot/tool-executor' @@ -59,7 +60,8 @@ async function authenticateCopilotApiKey(apiKey: string): Promise) || {} if (args.plan && !context.plan) { @@ -834,6 +848,7 @@ async function handleSubagentToolCall( userId, workflowId: args.workflowId as string | undefined, workspaceId: args.workspaceId as string | undefined, + simRequestId, abortSignal, } ) diff --git a/apps/sim/app/workspace/[workspaceId]/components/message-actions/message-actions.tsx b/apps/sim/app/workspace/[workspaceId]/components/message-actions/message-actions.tsx index 67c7dd92c8f..a86350a8881 100644 --- a/apps/sim/app/workspace/[workspaceId]/components/message-actions/message-actions.tsx +++ b/apps/sim/app/workspace/[workspaceId]/components/message-actions/message-actions.tsx @@ -138,35 +138,60 @@ export function MessageActions({ content, chatId, userQuery, requestId }: Messag } }, []) - if (!content) return null + const hasContent = Boolean(content) + const canSubmitFeedback = Boolean(chatId && userQuery) + if (!hasContent && !canSubmitFeedback) return null return ( <>
- - - + {hasContent && ( + + + + + + {copied ? 'Copied message' : 'Copy message'} + + + )} + {canSubmitFeedback && ( + <> + + + + + Good response + + + + + + Bad response + + + )}
diff --git a/apps/sim/app/workspace/[workspaceId]/home/hooks/use-chat.ts b/apps/sim/app/workspace/[workspaceId]/home/hooks/use-chat.ts index 8575e9a1b44..93ed060be3d 100644 --- a/apps/sim/app/workspace/[workspaceId]/home/hooks/use-chat.ts +++ b/apps/sim/app/workspace/[workspaceId]/home/hooks/use-chat.ts @@ -85,6 +85,7 @@ import { markRunToolManuallyStopped, reportManualRunToolStop, } from '@/lib/copilot/tools/client/run-tool-execution' +import { setCurrentChatTraceparent } from '@/lib/copilot/tools/client/trace-context' import { isWorkflowToolName } from '@/lib/copilot/tools/workflow-tools' import { generateId } from '@/lib/core/utils/uuid' import { getNextWorkflowColor } from '@/lib/workflows/colors' @@ -1271,6 +1272,14 @@ export function useChat( const activeTurnRef = useRef(null) const pendingUserMsgRef = useRef(null) const streamIdRef = useRef(undefined) + // W3C traceparent from the chat POST response; echoed on + // abort/stop/confirm/replay so side-channel calls join the same + // trace instead of becoming disconnected roots. + const streamTraceparentRef = useRef(undefined) + // The `request.id` from the active stream's trace events. Forwarded + // to /chat/stop so the persisted aborted message carries it (keeps + // the copy-request-ID button functional after refetch). + const streamRequestIdRef = useRef(undefined) const locallyTerminalStreamIdRef = useRef(undefined) const lastCursorRef = useRef('0') const sendingRef = useRef(false) @@ -1309,6 +1318,9 @@ export function useChat( activeTurnRef.current = null pendingUserMsgRef.current = null streamIdRef.current = undefined + streamRequestIdRef.current = undefined + streamTraceparentRef.current = undefined + setCurrentChatTraceparent(undefined) lastCursorRef.current = '0' resetStreamingBuffers() }, [resetStreamingBuffers]) @@ -1808,8 +1820,10 @@ export function useChat( try { const pendingLines: string[] = [] - readLoop: while (true) { + while (true) { if (pendingLines.length === 0) { + // Don't read another chunk after `complete` has drained. + if (sawCompleteEvent) break const { done, value } = await reader.read() if (done) break if (isStale()) continue @@ -1849,6 +1863,7 @@ export function useChat( if (parsed.trace?.requestId && parsed.trace.requestId !== streamRequestId) { streamRequestId = parsed.trace.requestId + streamRequestIdRef.current = streamRequestId flush() } if (parsed.stream?.streamId) { @@ -2465,9 +2480,12 @@ export function useChat( } case MothershipStreamV1EventType.complete: { sawCompleteEvent = true - // `complete` is terminal for this stream, even if the transport takes a moment - // longer to close. - break readLoop + // `complete` is the end-of-turn marker; drain whatever + // else arrived in the same TCP chunk (trailing text, + // followups, run metadata) before stopping. Do NOT + // await another read — events after `complete` would + // be a server bug. + continue } } } @@ -2528,7 +2546,12 @@ export function useChat( ): Promise => { const response = await fetch( `/api/mothership/chat/stream?streamId=${encodeURIComponent(streamId)}&after=${encodeURIComponent(afterCursor)}&batch=true`, - { signal } + { + signal, + ...(streamTraceparentRef.current + ? { headers: { traceparent: streamTraceparentRef.current } } + : {}), + } ) if (!response.ok) { throw new Error(`Stream resume batch failed: ${response.status}`) @@ -2599,7 +2622,12 @@ export function useChat( const sseRes = await fetch( `/api/mothership/chat/stream?streamId=${encodeURIComponent(streamId)}&after=${encodeURIComponent(latestCursor)}`, - { signal: activeAbort.signal } + { + signal: activeAbort.signal, + ...(streamTraceparentRef.current + ? { headers: { traceparent: streamTraceparentRef.current } } + : {}), + } ) if (!sseRes.ok || !sseRes.body) { throw new Error(RECONNECT_TAIL_ERROR) @@ -2840,12 +2868,18 @@ export function useChat( streamId?: string content?: string blocks?: ContentBlock[] + // `stopGeneration` must snapshot these BEFORE clearActiveTurn() + // nulls the refs, or the fetch sees undefined. + requestId?: string + traceparent?: string }) => { const chatId = overrides?.chatId ?? chatIdRef.current const streamId = overrides?.streamId ?? streamIdRef.current if (!chatId || !streamId) return const content = overrides?.content ?? streamingContentRef.current + const requestId = overrides?.requestId ?? streamRequestIdRef.current + const traceparent = overrides?.traceparent ?? streamTraceparentRef.current const sourceBlocks = overrides?.blocks ?? streamingBlocksRef.current const storedBlocks = sourceBlocks.map((block) => { @@ -2878,12 +2912,16 @@ export function useChat( try { const res = await fetch(stopPathRef.current, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + ...(traceparent ? { traceparent } : {}), + }, body: JSON.stringify({ chatId, streamId, content, ...(storedBlocks.length > 0 && { contentBlocks: storedBlocks }), + ...(requestId ? { requestId } : {}), }), }) if (!res.ok) { @@ -2922,9 +2960,36 @@ export function useChat( const messagesRef = useRef(messages) messagesRef.current = messages + /** + * Notify downstream consumers that a turn has ended and, if a + * follow-up message is queued, kick the dispatcher. Safe to call + * from both the normal-completion path (`finalize`) and the + * abort/stop path (`stopGeneration`), which previously short- + * circuited without notifying — queued messages then sat until the + * user manually re-sent. Idempotent w.r.t. `onStreamEnd` (one call + * per terminal transition); the dispatcher itself de-dupes. + */ + const notifyTurnEnded = useCallback( + (options: { error: boolean; skipQueueDispatch?: boolean }) => { + const hasQueuedFollowUp = !options.error && messageQueueRef.current.length > 0 + if (!options.error) { + const cid = chatIdRef.current + if (cid && onStreamEndRef.current) { + onStreamEndRef.current(cid, messagesRef.current) + } + } + if (!options.error && !options.skipQueueDispatch && hasQueuedFollowUp) { + void enqueueQueueDispatchRef.current({ type: 'send_head' }) + } + return hasQueuedFollowUp + }, + [] + ) + const finalize = useCallback( (options?: { error?: boolean }) => { - const hasQueuedFollowUp = !options?.error && messageQueueRef.current.length > 0 + const isError = !!options?.error + const hasQueuedFollowUp = !isError && messageQueueRef.current.length > 0 reconcileTerminalPreviewSessions() locallyTerminalStreamIdRef.current = streamIdRef.current ?? activeTurnRef.current?.userMessageId ?? undefined @@ -2932,23 +2997,15 @@ export function useChat( setTransportIdle() abortControllerRef.current = null invalidateChatQueries({ includeDetail: !hasQueuedFollowUp }) - - if (!options?.error) { - const cid = chatIdRef.current - if (cid && onStreamEndRef.current) { - onStreamEndRef.current(cid, messagesRef.current) - } - } - - if (options?.error) { - return - } - - if (hasQueuedFollowUp) { - void enqueueQueueDispatchRef.current({ type: 'send_head' }) - } + notifyTurnEnded({ error: isError }) }, - [clearActiveTurn, invalidateChatQueries, reconcileTerminalPreviewSessions, setTransportIdle] + [ + clearActiveTurn, + invalidateChatQueries, + notifyTurnEnded, + reconcileTerminalPreviewSessions, + setTransportIdle, + ] ) finalizeRef.current = finalize @@ -3160,6 +3217,14 @@ export function useChat( signal: abortController.signal, }) + // Capture for propagation on side-channel calls + non-React + // tool-completion callbacks (via trace-context singleton). + const traceparent = response.headers.get('traceparent') + if (traceparent) { + streamTraceparentRef.current = traceparent + setCurrentChatTraceparent(traceparent) + } + if (!response.ok) { const errorData = await response.json().catch(() => ({})) if (response.status === 409) { @@ -3401,6 +3466,12 @@ export function useChat( ...(block.options ? { options: [...block.options] } : {}), ...(block.toolCall ? { toolCall: { ...block.toolCall } } : {}), })) + // Snapshot BEFORE clearActiveTurn() nulls the refs. Both + // persistPartialResponse and the abort/stop fetches run inside + // stopBarrier below, after several awaits — the refs are long + // gone by the time the fetches serialize their headers. + const stopRequestIdSnapshot = streamRequestIdRef.current + const stopTraceparentSnapshot = streamTraceparentRef.current locallyTerminalStreamIdRef.current = sid streamGenRef.current++ @@ -3460,7 +3531,12 @@ export function useChat( ? (async () => { const res = await fetch('/api/mothership/chat/abort', { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + ...(stopTraceparentSnapshot + ? { traceparent: stopTraceparentSnapshot } + : {}), + }, body: JSON.stringify({ streamId: sid, ...(resolvedChatId ? { chatId: resolvedChatId } : {}), @@ -3483,6 +3559,8 @@ export function useChat( streamId: sid, content: stopContentSnapshot, blocks: stopBlocksSnapshot, + requestId: stopRequestIdSnapshot, + traceparent: stopTraceparentSnapshot, }) } @@ -3496,6 +3574,8 @@ export function useChat( pendingStopPromiseRef.current = stopBarrier try { await stopBarrier + // Dispatch queued follow-ups after Stop resolves. + notifyTurnEnded({ error: false }) } catch (err) { setError(err instanceof Error ? err.message : 'Failed to stop the previous response') throw err @@ -3507,6 +3587,7 @@ export function useChat( }, [ cancelActiveWorkflowExecutions, invalidateChatQueries, + notifyTurnEnded, persistPartialResponse, queryClient, resetEphemeralPreviewState, diff --git a/apps/sim/instrumentation-node.ts b/apps/sim/instrumentation-node.ts index 0f6bd4352d7..54804e1955c 100644 --- a/apps/sim/instrumentation-node.ts +++ b/apps/sim/instrumentation-node.ts @@ -1,20 +1,32 @@ -/** - * Sim OpenTelemetry - Server-side Instrumentation - */ +// Sim OTel bootstrap. Filter by `mothership.origin` or span-name +// prefix (`sim-mothership:` / `go-mothership:`) to separate the two +// halves of a mothership trace in the OTLP backend. import type { Attributes, Context, Link, SpanKind } from '@opentelemetry/api' -import { DiagConsoleLogger, DiagLogLevel, diag } from '@opentelemetry/api' -import type { Sampler, SamplingResult } from '@opentelemetry/sdk-trace-base' +import { DiagConsoleLogger, DiagLogLevel, diag, TraceFlags, trace } from '@opentelemetry/api' +import type { + ReadableSpan, + Sampler, + SamplingResult, + Span, + SpanProcessor, +} from '@opentelemetry/sdk-trace-base' import { createLogger } from '@sim/logger' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' import { env } from './lib/core/config/env' diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.ERROR) const logger = createLogger('OTelInstrumentation') +const MOTHERSHIP_ORIGIN = 'sim-mothership' as const +const SPAN_NAME_PREFIX = `${MOTHERSHIP_ORIGIN}: ` + +const SERVICE_INSTANCE_SLUG = 'sim' as const + const DEFAULT_TELEMETRY_CONFIG = { endpoint: env.TELEMETRY_ENDPOINT || 'https://telemetry.simstudio.ai/v1/traces', - serviceName: 'sim-studio', + serviceName: 'mothership', serviceVersion: '0.1.0', serverSide: { enabled: true }, batchSettings: { @@ -25,29 +37,95 @@ const DEFAULT_TELEMETRY_CONFIG = { }, } -/** - * Span name prefixes we want to KEEP - */ -const ALLOWED_SPAN_PREFIXES = [ - 'platform.', // Our platform events - 'gen_ai.', // GenAI semantic convention spans - 'workflow.', // Workflow execution spans - 'block.', // Block execution spans - 'http.client.', // Our API block HTTP calls - 'function.', // Function block execution - 'router.', // Router block evaluation - 'condition.', // Condition block evaluation - 'loop.', // Loop block execution - 'parallel.', // Parallel block execution -] +// Allowlist of span-name prefixes exported from this process. +// Non-mothership code (workflow executor, block runtime, framework +// noise) is dropped. Broaden carefully — `http.` etc. would reopen +// the firehose. +const ALLOWED_SPAN_PREFIXES = ['gen_ai.', 'copilot.', 'sim →', 'sim.', 'tool.execute'] function isBusinessSpan(spanName: string): boolean { return ALLOWED_SPAN_PREFIXES.some((prefix) => spanName.startsWith(prefix)) } +// Parse `OTEL_EXPORTER_OTLP_HEADERS`: `key1=value1,key2=value2` +// (URL-encoded values, whitespace tolerated). +function parseOtlpHeadersEnv(raw: string): Record { + const out: Record = {} + if (!raw) return out + for (const part of raw.split(',')) { + const trimmed = part.trim() + if (!trimmed) continue + const eq = trimmed.indexOf('=') + if (eq <= 0) continue + const key = trimmed.slice(0, eq).trim() + const rawVal = trimmed.slice(eq + 1).trim() + let val = rawVal + try { + val = decodeURIComponent(rawVal) + } catch { + // value wasn't URL-encoded; keep as-is. + } + if (key) out[key] = val + } + return out +} + +// Append `/v1/traces` to the OTLP base URL unless already present. +// The HTTP exporter doesn't auto-suffix the signal path even though +// the spec says the env var is a base URL. +function normalizeOtlpTracesUrl(url: string): string { + if (!url) return url + try { + const u = new URL(url) + if (u.pathname.endsWith('/v1/traces')) return url + const base = url.replace(/\/$/, '') + return `${base}/v1/traces` + } catch { + return url + } +} + +// Sampling ratio from env (mirrors Go's `samplerFromEnv`); fallback +// is 100% everywhere. Retention caps cost, not sampling. +function resolveSamplingRatio(_isLocalEndpoint: boolean): number { + const raw = process.env.TELEMETRY_SAMPLING_RATIO || process.env.OTEL_TRACES_SAMPLER_ARG || '' + if (raw) { + const parsed = Number.parseFloat(raw) + if (Number.isFinite(parsed)) { + if (parsed <= 0) return 0 + if (parsed >= 1) return 1 + return parsed + } + } + return 1.0 +} + +// Tags allowed spans with `mothership.origin` and prepends +// `sim-mothership:` to the span name so backends can visually split +// the two halves even when service.name is shared. +class MothershipOriginSpanProcessor implements SpanProcessor { + onStart(span: Span): void { + const name = span.name + if (!isBusinessSpan(name)) { + return + } + span.setAttribute(TraceAttr.MothershipOrigin, MOTHERSHIP_ORIGIN) + if (!name.startsWith(SPAN_NAME_PREFIX)) { + span.updateName(`${SPAN_NAME_PREFIX}${name}`) + } + } + onEnd(_span: ReadableSpan): void {} + shutdown(): Promise { + return Promise.resolve() + } + forceFlush(): Promise { + return Promise.resolve() + } +} + async function initializeOpenTelemetry() { try { - if (env.NEXT_TELEMETRY_DISABLED === '1') { + if (env.NEXT_TELEMETRY_DISABLED === '1' || process.env.NEXT_TELEMETRY_DISABLED === '1') { logger.info('OpenTelemetry disabled via NEXT_TELEMETRY_DISABLED=1') return } @@ -59,11 +137,29 @@ async function initializeOpenTelemetry() { telemetryConfig = DEFAULT_TELEMETRY_CONFIG } + // Prefer the OTel spec env var, fall back to legacy TELEMETRY_ENDPOINT. + const resolvedEndpoint = + process.env.OTEL_EXPORTER_OTLP_ENDPOINT || + process.env.TELEMETRY_ENDPOINT || + env.TELEMETRY_ENDPOINT || + telemetryConfig.endpoint + telemetryConfig = { + ...telemetryConfig, + endpoint: resolvedEndpoint, + serviceName: 'mothership', + } + if (telemetryConfig.serverSide?.enabled === false) { logger.info('Server-side OpenTelemetry disabled in config') return } + logger.info('OpenTelemetry init', { + endpoint: telemetryConfig.endpoint, + serviceName: telemetryConfig.serviceName, + origin: MOTHERSHIP_ORIGIN, + }) + const { NodeSDK } = await import('@opentelemetry/sdk-node') const { defaultResource, resourceFromAttributes } = await import('@opentelemetry/resources') const { ATTR_SERVICE_NAME, ATTR_SERVICE_VERSION, ATTR_DEPLOYMENT_ENVIRONMENT } = await import( @@ -71,11 +167,14 @@ async function initializeOpenTelemetry() { ) const { OTLPTraceExporter } = await import('@opentelemetry/exporter-trace-otlp-http') const { BatchSpanProcessor } = await import('@opentelemetry/sdk-trace-node') - const { ParentBasedSampler, TraceIdRatioBasedSampler, SamplingDecision } = await import( + const { TraceIdRatioBasedSampler, SamplingDecision } = await import( '@opentelemetry/sdk-trace-base' ) - const createBusinessSpanSampler = (baseSampler: Sampler): Sampler => ({ + // Drops Next framework spans, inherits SAMPLED from business + // parents, and re-samples business roots fresh (don't delegate to + // ParentBased — its unsampled-parent path is AlwaysOff by default). + const createBusinessSpanSampler = (rootRatioSampler: Sampler): Sampler => ({ shouldSample( context: Context, traceId: string, @@ -88,25 +187,60 @@ async function initializeOpenTelemetry() { return { decision: SamplingDecision.NOT_RECORD } } + const parentSpanContext = trace.getSpanContext(context) + const parentIsSampled = + !!parentSpanContext && + (parentSpanContext.traceFlags & TraceFlags.SAMPLED) === TraceFlags.SAMPLED + + if (parentIsSampled) { + return { decision: SamplingDecision.RECORD_AND_SAMPLED } + } + if (isBusinessSpan(spanName)) { - return baseSampler.shouldSample(context, traceId, spanName, spanKind, attributes, links) + return rootRatioSampler.shouldSample( + context, + traceId, + spanName, + spanKind, + attributes, + links + ) } return { decision: SamplingDecision.NOT_RECORD } }, - toString(): string { - return `BusinessSpanSampler{baseSampler=${baseSampler.toString()}}` + return `BusinessSpanSampler{rootSampler=${rootRatioSampler.toString()}}` }, }) + const otlpHeaders = parseOtlpHeadersEnv(process.env.OTEL_EXPORTER_OTLP_HEADERS || '') + const exporterUrl = normalizeOtlpTracesUrl(telemetryConfig.endpoint) + const exporter = new OTLPTraceExporter({ - url: telemetryConfig.endpoint, - headers: {}, + url: exporterUrl, + headers: otlpHeaders, timeoutMillis: Math.min(telemetryConfig.batchSettings.exportTimeoutMillis, 10000), keepAlive: false, }) + // Surface export failures (BatchSpanProcessor swallows them otherwise). + const origExport = exporter.export.bind(exporter) + exporter.export = (spans, resultCallback) => { + origExport(spans, (result) => { + if (result?.code !== 0) { + // eslint-disable-next-line no-console + console.error('[OTEL] exporter export failed', { + endpoint: telemetryConfig.endpoint, + resultCode: result?.code, + error: result?.error?.message, + spanCount: spans.length, + }) + } + resultCallback(result) + }) + } + const batchProcessor = new BatchSpanProcessor(exporter, { maxQueueSize: telemetryConfig.batchSettings.maxQueueSize, maxExportBatchSize: telemetryConfig.batchSettings.maxExportBatchSize, @@ -114,28 +248,48 @@ async function initializeOpenTelemetry() { exportTimeoutMillis: telemetryConfig.batchSettings.exportTimeoutMillis, }) + // Unique instance id per origin keeps Jaeger's clock-skew adjuster + // from grouping Sim+Go spans together (they'd see multi-second + // drift as intra-service and emit spurious warnings). + const serviceInstanceId = `${telemetryConfig.serviceName}-${SERVICE_INSTANCE_SLUG}` const resource = defaultResource().merge( resourceFromAttributes({ [ATTR_SERVICE_NAME]: telemetryConfig.serviceName, [ATTR_SERVICE_VERSION]: telemetryConfig.serviceVersion, - [ATTR_DEPLOYMENT_ENVIRONMENT]: env.NODE_ENV || 'development', - 'service.namespace': 'sim-ai-platform', + // OTEL_ → DEPLOYMENT_ENVIRONMENT → NODE_ENV; matches Go's + // `resourceEnvFromEnv()` so both halves tag the same value. + [ATTR_DEPLOYMENT_ENVIRONMENT]: + process.env.OTEL_DEPLOYMENT_ENVIRONMENT || + process.env.DEPLOYMENT_ENVIRONMENT || + env.NODE_ENV || + 'development', + 'service.namespace': 'mothership', + 'service.instance.id': serviceInstanceId, + 'mothership.origin': MOTHERSHIP_ORIGIN, 'telemetry.sdk.name': 'opentelemetry', 'telemetry.sdk.language': 'nodejs', 'telemetry.sdk.version': '1.0.0', }) ) - const baseSampler = new ParentBasedSampler({ - root: new TraceIdRatioBasedSampler(0.1), + const isLocalEndpoint = /localhost|127\.0\.0\.1/i.test(telemetryConfig.endpoint) + const samplingRatio = resolveSamplingRatio(isLocalEndpoint) + const rootRatioSampler = new TraceIdRatioBasedSampler(samplingRatio) + const sampler = createBusinessSpanSampler(rootRatioSampler) + + logger.info('OpenTelemetry sampler configured', { + samplingRatio, + endpoint: telemetryConfig.endpoint, + origin: MOTHERSHIP_ORIGIN, }) - const sampler = createBusinessSpanSampler(baseSampler) + + // Origin-prefix must run before batch so the rename/attr is captured. + const spanProcessors: SpanProcessor[] = [new MothershipOriginSpanProcessor(), batchProcessor] const sdk = new NodeSDK({ resource, - spanProcessor: batchProcessor, + spanProcessors, sampler, - traceExporter: exporter, }) sdk.start() @@ -152,7 +306,11 @@ async function initializeOpenTelemetry() { process.on('SIGTERM', shutdownOtel) process.on('SIGINT', shutdownOtel) - logger.info('OpenTelemetry instrumentation initialized with business span filtering') + logger.info('OpenTelemetry instrumentation initialized', { + serviceName: telemetryConfig.serviceName, + origin: MOTHERSHIP_ORIGIN, + samplingRatio, + }) } catch (error) { logger.error('Failed to initialize OpenTelemetry instrumentation', error) } diff --git a/apps/sim/lib/copilot/async-runs/repository.ts b/apps/sim/lib/copilot/async-runs/repository.ts index c18d27cbbec..ddbe18bb61a 100644 --- a/apps/sim/lib/copilot/async-runs/repository.ts +++ b/apps/sim/lib/copilot/async-runs/repository.ts @@ -1,3 +1,4 @@ +import { trace } from '@opentelemetry/api' import { db } from '@sim/db' import { type CopilotAsyncToolStatus, @@ -8,6 +9,9 @@ import { } from '@sim/db/schema' import { createLogger } from '@sim/logger' import { and, desc, eq, inArray, isNull } from 'drizzle-orm' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { markSpanForError } from '@/lib/copilot/request/otel' import { ASYNC_TOOL_STATUS, type AsyncCompletionData, @@ -16,6 +20,38 @@ import { } from './lifecycle' const logger = createLogger('CopilotAsyncRunsRepo') +// Resolve the tracer lazily per-call to avoid capturing the NoOp tracer +// before NodeSDK installs the global TracerProvider (Next.js 16/Turbopack +// can evaluate modules before instrumentation-node.ts finishes). +const getAsyncRunsTracer = () => trace.getTracer('sim-copilot-async-runs', '1.0.0') + +// Wrap an async DB op in a client-kind span with canonical `db.*` attrs. +// Cancellation is routed through `markSpanForError` so aborts record the +// exception event but don't paint spans red. +async function withDbSpan( + name: string, + op: string, + table: string, + attrs: Record, + fn: () => Promise +): Promise { + const span = getAsyncRunsTracer().startSpan(name, { + attributes: { + [TraceAttr.DbSystem]: 'postgresql', + [TraceAttr.DbOperation]: op, + [TraceAttr.DbSqlTable]: table, + ...Object.fromEntries(Object.entries(attrs).filter(([, v]) => v !== undefined)), + }, + }) + try { + return await fn() + } catch (error) { + markSpanForError(span, error) + throw error + } finally { + span.end() + } +} export interface CreateRunSegmentInput { id?: string @@ -34,26 +70,43 @@ export interface CreateRunSegmentInput { } export async function createRunSegment(input: CreateRunSegmentInput) { - const [run] = await db - .insert(copilotRuns) - .values({ - ...(input.id ? { id: input.id } : {}), - executionId: input.executionId, - parentRunId: input.parentRunId ?? null, - chatId: input.chatId, - userId: input.userId, - workflowId: input.workflowId ?? null, - workspaceId: input.workspaceId ?? null, - streamId: input.streamId, - agent: input.agent ?? null, - model: input.model ?? null, - provider: input.provider ?? null, - requestContext: input.requestContext ?? {}, - status: input.status ?? 'active', - }) - .returning() - - return run + return withDbSpan( + TraceSpan.CopilotAsyncRunsCreateRunSegment, + 'INSERT', + 'copilot_runs', + { + [TraceAttr.CopilotExecutionId]: input.executionId, + [TraceAttr.ChatId]: input.chatId, + [TraceAttr.StreamId]: input.streamId, + [TraceAttr.UserId]: input.userId, + [TraceAttr.CopilotRunParentId]: input.parentRunId ?? undefined, + [TraceAttr.CopilotRunAgent]: input.agent ?? undefined, + [TraceAttr.CopilotRunModel]: input.model ?? undefined, + [TraceAttr.CopilotRunProvider]: input.provider ?? undefined, + [TraceAttr.CopilotRunStatus]: input.status ?? 'active', + }, + async () => { + const [run] = await db + .insert(copilotRuns) + .values({ + ...(input.id ? { id: input.id } : {}), + executionId: input.executionId, + parentRunId: input.parentRunId ?? null, + chatId: input.chatId, + userId: input.userId, + workflowId: input.workflowId ?? null, + workspaceId: input.workspaceId ?? null, + streamId: input.streamId, + agent: input.agent ?? null, + model: input.model ?? null, + provider: input.provider ?? null, + requestContext: input.requestContext ?? {}, + status: input.status ?? 'active', + }) + .returning() + return run + } + ) } export async function updateRunStatus( @@ -65,32 +118,53 @@ export async function updateRunStatus( requestContext?: Record } = {} ) { - const [run] = await db - .update(copilotRuns) - .set({ - status, - completedAt: updates.completedAt, - error: updates.error, - requestContext: updates.requestContext, - updatedAt: new Date(), - }) - .where(eq(copilotRuns.id, runId)) - .returning() - - return run ?? null + return withDbSpan( + TraceSpan.CopilotAsyncRunsUpdateRunStatus, + 'UPDATE', + 'copilot_runs', + { + [TraceAttr.RunId]: runId, + [TraceAttr.CopilotRunStatus]: status, + [TraceAttr.CopilotRunHasError]: !!updates.error, + [TraceAttr.CopilotRunHasCompletedAt]: !!updates.completedAt, + }, + async () => { + const [run] = await db + .update(copilotRuns) + .set({ + status, + completedAt: updates.completedAt, + error: updates.error, + requestContext: updates.requestContext, + updatedAt: new Date(), + }) + .where(eq(copilotRuns.id, runId)) + .returning() + return run ?? null + } + ) } export async function getLatestRunForExecution(executionId: string) { - const [run] = await db - .select() - .from(copilotRuns) - .where(eq(copilotRuns.executionId, executionId)) - .orderBy(desc(copilotRuns.startedAt)) - .limit(1) - - return run ?? null + return withDbSpan( + TraceSpan.CopilotAsyncRunsGetLatestForExecution, + 'SELECT', + 'copilot_runs', + { [TraceAttr.CopilotExecutionId]: executionId }, + async () => { + const [run] = await db + .select() + .from(copilotRuns) + .where(eq(copilotRuns.executionId, executionId)) + .orderBy(desc(copilotRuns.startedAt)) + .limit(1) + return run ?? null + } + ) } +// Un-instrumented: called from a 4 Hz resume poll; per-call spans +// swamped traces. Use Prom histograms if latency visibility is needed. export async function getLatestRunForStream(streamId: string, userId?: string) { const conditions = userId ? and(eq(copilotRuns.streamId, streamId), eq(copilotRuns.userId, userId)) @@ -101,13 +175,20 @@ export async function getLatestRunForStream(streamId: string, userId?: string) { .where(conditions) .orderBy(desc(copilotRuns.startedAt)) .limit(1) - return run ?? null } export async function getRunSegment(runId: string) { - const [run] = await db.select().from(copilotRuns).where(eq(copilotRuns.id, runId)).limit(1) - return run ?? null + return withDbSpan( + TraceSpan.CopilotAsyncRunsGetRunSegment, + 'SELECT', + 'copilot_runs', + { [TraceAttr.RunId]: runId }, + async () => { + const [run] = await db.select().from(copilotRuns).where(eq(copilotRuns.id, runId)).limit(1) + return run ?? null + } + ) } export async function createRunCheckpoint(input: { @@ -117,18 +198,29 @@ export async function createRunCheckpoint(input: { agentState: Record providerRequest: Record }) { - const [checkpoint] = await db - .insert(copilotRunCheckpoints) - .values({ - runId: input.runId, - pendingToolCallId: input.pendingToolCallId, - conversationSnapshot: input.conversationSnapshot, - agentState: input.agentState, - providerRequest: input.providerRequest, - }) - .returning() - - return checkpoint + return withDbSpan( + TraceSpan.CopilotAsyncRunsCreateRunCheckpoint, + 'INSERT', + 'copilot_run_checkpoints', + { + [TraceAttr.RunId]: input.runId, + [TraceAttr.CopilotCheckpointPendingToolCallId]: input.pendingToolCallId, + }, + async () => { + const [checkpoint] = await db + .insert(copilotRunCheckpoints) + .values({ + runId: input.runId, + pendingToolCallId: input.pendingToolCallId, + conversationSnapshot: input.conversationSnapshot, + agentState: input.agentState, + providerRequest: input.providerRequest, + }) + .returning() + + return checkpoint + } + ) } export async function upsertAsyncToolCall(input: { @@ -139,67 +231,87 @@ export async function upsertAsyncToolCall(input: { args?: Record status?: CopilotAsyncToolStatus }) { - const existing = await getAsyncToolCall(input.toolCallId) - const incomingStatus = input.status ?? 'pending' - if ( - existing && - (isTerminalAsyncStatus(existing.status) || isDeliveredAsyncStatus(existing.status)) && - !isTerminalAsyncStatus(incomingStatus) && - !isDeliveredAsyncStatus(incomingStatus) - ) { - logger.info('Ignoring async tool upsert that would downgrade terminal state', { - toolCallId: input.toolCallId, - existingStatus: existing.status, - incomingStatus, - }) - return existing - } - const effectiveRunId = input.runId ?? existing?.runId ?? null - if (!effectiveRunId) { - logger.warn('upsertAsyncToolCall missing runId and no existing row', { - toolCallId: input.toolCallId, - toolName: input.toolName, - status: input.status ?? 'pending', - }) - return null - } - - const now = new Date() - const [row] = await db - .insert(copilotAsyncToolCalls) - .values({ - runId: effectiveRunId, - checkpointId: input.checkpointId ?? null, - toolCallId: input.toolCallId, - toolName: input.toolName, - args: input.args ?? {}, - status: incomingStatus, - updatedAt: now, - }) - .onConflictDoUpdate({ - target: copilotAsyncToolCalls.toolCallId, - set: { - runId: effectiveRunId, - checkpointId: input.checkpointId ?? null, - toolName: input.toolName, - args: input.args ?? {}, - status: incomingStatus, - updatedAt: now, - }, - }) - .returning() - - return row + return withDbSpan( + TraceSpan.CopilotAsyncRunsUpsertAsyncToolCall, + 'UPSERT', + 'copilot_async_tool_calls', + { + [TraceAttr.ToolCallId]: input.toolCallId, + [TraceAttr.ToolName]: input.toolName, + [TraceAttr.CopilotAsyncToolStatus]: input.status ?? 'pending', + [TraceAttr.RunId]: input.runId ?? undefined, + }, + async () => { + const existing = await getAsyncToolCall(input.toolCallId) + const incomingStatus = input.status ?? 'pending' + if ( + existing && + (isTerminalAsyncStatus(existing.status) || isDeliveredAsyncStatus(existing.status)) && + !isTerminalAsyncStatus(incomingStatus) && + !isDeliveredAsyncStatus(incomingStatus) + ) { + logger.info('Ignoring async tool upsert that would downgrade terminal state', { + toolCallId: input.toolCallId, + existingStatus: existing.status, + incomingStatus, + }) + return existing + } + const effectiveRunId = input.runId ?? existing?.runId ?? null + if (!effectiveRunId) { + logger.warn('upsertAsyncToolCall missing runId and no existing row', { + toolCallId: input.toolCallId, + toolName: input.toolName, + status: input.status ?? 'pending', + }) + return null + } + + const now = new Date() + const [row] = await db + .insert(copilotAsyncToolCalls) + .values({ + runId: effectiveRunId, + checkpointId: input.checkpointId ?? null, + toolCallId: input.toolCallId, + toolName: input.toolName, + args: input.args ?? {}, + status: incomingStatus, + updatedAt: now, + }) + .onConflictDoUpdate({ + target: copilotAsyncToolCalls.toolCallId, + set: { + runId: effectiveRunId, + checkpointId: input.checkpointId ?? null, + toolName: input.toolName, + args: input.args ?? {}, + status: incomingStatus, + updatedAt: now, + }, + }) + .returning() + + return row + } + ) } export async function getAsyncToolCall(toolCallId: string) { - const [row] = await db - .select() - .from(copilotAsyncToolCalls) - .where(eq(copilotAsyncToolCalls.toolCallId, toolCallId)) - .limit(1) - - return row ?? null + return withDbSpan( + TraceSpan.CopilotAsyncRunsGetAsyncToolCall, + 'SELECT', + 'copilot_async_tool_calls', + { [TraceAttr.ToolCallId]: toolCallId }, + async () => { + const [row] = await db + .select() + .from(copilotAsyncToolCalls) + .where(eq(copilotAsyncToolCalls.toolCallId, toolCallId)) + .limit(1) + return row ?? null + } + ) } export async function markAsyncToolStatus( @@ -213,28 +325,41 @@ export async function markAsyncToolStatus( completedAt?: Date | null } = {} ) { - const claimedAt = - updates.claimedAt !== undefined - ? updates.claimedAt - : status === 'running' && updates.claimedBy - ? new Date() - : undefined - - const [row] = await db - .update(copilotAsyncToolCalls) - .set({ - status, - claimedBy: updates.claimedBy, - claimedAt, - result: updates.result, - error: updates.error, - completedAt: updates.completedAt, - updatedAt: new Date(), - }) - .where(eq(copilotAsyncToolCalls.toolCallId, toolCallId)) - .returning() - - return row ?? null + return withDbSpan( + TraceSpan.CopilotAsyncRunsMarkAsyncToolStatus, + 'UPDATE', + 'copilot_async_tool_calls', + { + [TraceAttr.ToolCallId]: toolCallId, + [TraceAttr.CopilotAsyncToolStatus]: status, + [TraceAttr.CopilotAsyncToolHasError]: !!updates.error, + [TraceAttr.CopilotAsyncToolClaimedBy]: updates.claimedBy ?? undefined, + }, + async () => { + const claimedAt = + updates.claimedAt !== undefined + ? updates.claimedAt + : status === 'running' && updates.claimedBy + ? new Date() + : undefined + + const [row] = await db + .update(copilotAsyncToolCalls) + .set({ + status, + claimedBy: updates.claimedBy, + claimedAt, + result: updates.result, + error: updates.error, + completedAt: updates.completedAt, + updatedAt: new Date(), + }) + .where(eq(copilotAsyncToolCalls.toolCallId, toolCallId)) + .returning() + + return row ?? null + } + ) } export async function markAsyncToolRunning(toolCallId: string, claimedBy: string) { @@ -278,57 +403,91 @@ export async function markAsyncToolDelivered(toolCallId: string) { } export async function listAsyncToolCallsForRun(runId: string) { - return db - .select() - .from(copilotAsyncToolCalls) - .where(eq(copilotAsyncToolCalls.runId, runId)) - .orderBy(desc(copilotAsyncToolCalls.createdAt)) + return withDbSpan( + TraceSpan.CopilotAsyncRunsListForRun, + 'SELECT', + 'copilot_async_tool_calls', + { [TraceAttr.RunId]: runId }, + async () => + db + .select() + .from(copilotAsyncToolCalls) + .where(eq(copilotAsyncToolCalls.runId, runId)) + .orderBy(desc(copilotAsyncToolCalls.createdAt)) + ) } export async function getAsyncToolCalls(toolCallIds: string[]) { if (toolCallIds.length === 0) return [] - return db - .select() - .from(copilotAsyncToolCalls) - .where(inArray(copilotAsyncToolCalls.toolCallId, toolCallIds)) + return withDbSpan( + TraceSpan.CopilotAsyncRunsGetMany, + 'SELECT', + 'copilot_async_tool_calls', + { [TraceAttr.CopilotAsyncToolIdsCount]: toolCallIds.length }, + async () => + db + .select() + .from(copilotAsyncToolCalls) + .where(inArray(copilotAsyncToolCalls.toolCallId, toolCallIds)) + ) } export async function claimCompletedAsyncToolCall(toolCallId: string, workerId: string) { - const [row] = await db - .update(copilotAsyncToolCalls) - .set({ - claimedBy: workerId, - claimedAt: new Date(), - updatedAt: new Date(), - }) - .where( - and( - eq(copilotAsyncToolCalls.toolCallId, toolCallId), - inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']), - isNull(copilotAsyncToolCalls.claimedBy) - ) - ) - .returning() - - return row ?? null + return withDbSpan( + TraceSpan.CopilotAsyncRunsClaimCompleted, + 'UPDATE', + 'copilot_async_tool_calls', + { + [TraceAttr.ToolCallId]: toolCallId, + [TraceAttr.CopilotAsyncToolWorkerId]: workerId, + }, + async () => { + const [row] = await db + .update(copilotAsyncToolCalls) + .set({ + claimedBy: workerId, + claimedAt: new Date(), + updatedAt: new Date(), + }) + .where( + and( + eq(copilotAsyncToolCalls.toolCallId, toolCallId), + inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']), + isNull(copilotAsyncToolCalls.claimedBy) + ) + ) + .returning() + return row ?? null + } + ) } export async function releaseCompletedAsyncToolClaim(toolCallId: string, workerId: string) { - const [row] = await db - .update(copilotAsyncToolCalls) - .set({ - claimedBy: null, - claimedAt: null, - updatedAt: new Date(), - }) - .where( - and( - eq(copilotAsyncToolCalls.toolCallId, toolCallId), - inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']), - eq(copilotAsyncToolCalls.claimedBy, workerId) - ) - ) - .returning() - - return row ?? null + return withDbSpan( + TraceSpan.CopilotAsyncRunsReleaseClaim, + 'UPDATE', + 'copilot_async_tool_calls', + { + [TraceAttr.ToolCallId]: toolCallId, + [TraceAttr.CopilotAsyncToolWorkerId]: workerId, + }, + async () => { + const [row] = await db + .update(copilotAsyncToolCalls) + .set({ + claimedBy: null, + claimedAt: null, + updatedAt: new Date(), + }) + .where( + and( + eq(copilotAsyncToolCalls.toolCallId, toolCallId), + inArray(copilotAsyncToolCalls.status, ['completed', 'failed', 'cancelled']), + eq(copilotAsyncToolCalls.claimedBy, workerId) + ) + ) + .returning() + return row ?? null + } + ) } diff --git a/apps/sim/lib/copilot/chat/post.ts b/apps/sim/lib/copilot/chat/post.ts index 8581621d1f2..bc78deb8888 100644 --- a/apps/sim/lib/copilot/chat/post.ts +++ b/apps/sim/lib/copilot/chat/post.ts @@ -1,3 +1,4 @@ +import { type Context as OtelContext, context as otelContextApi } from '@opentelemetry/api' import { db } from '@sim/db' import { copilotChats } from '@sim/db/schema' import { createLogger } from '@sim/logger' @@ -19,11 +20,14 @@ import { finalizeAssistantTurn } from '@/lib/copilot/chat/terminal-state' import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context' import { COPILOT_REQUEST_MODES } from '@/lib/copilot/constants' import { - createBadRequestResponse, - createRequestTracker, - createUnauthorizedResponse, -} from '@/lib/copilot/request/http' + CopilotChatPersistOutcome, + CopilotTransport, +} from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { createBadRequestResponse, createUnauthorizedResponse } from '@/lib/copilot/request/http' import { createSSEStream, SSE_RESPONSE_HEADERS } from '@/lib/copilot/request/lifecycle/start' +import { startCopilotOtelRoot, withCopilotSpan } from '@/lib/copilot/request/otel' import { acquirePendingChatStream, getPendingChatStreamId, @@ -257,6 +261,15 @@ async function persistUserMessage(params: { contexts?: UnifiedChatRequest['contexts'] workspaceId?: string notifyWorkspaceStatus: boolean + /** + * Root context for the mothership request. When present the persist + * span is created explicitly under it, which avoids relying on + * AsyncLocalStorage propagation — some upstream awaits (Next.js + * framework frames, Turbopack-instrumented I/O) can swap the active + * store out from under us in dev, which would otherwise leave this + * span parented to the about-to-be-dropped Next.js HTTP span. + */ + parentOtelContext?: OtelContext }): Promise { const { chatId, @@ -266,31 +279,60 @@ async function persistUserMessage(params: { contexts, workspaceId, notifyWorkspaceStatus, + parentOtelContext, } = params if (!chatId) return undefined - const userMsg = buildPersistedUserMessage({ - id: userMessageId, - content: message, - fileAttachments, - contexts, - }) + return withCopilotSpan( + TraceSpan.CopilotChatPersistUserMessage, + { + [TraceAttr.DbSystem]: 'postgresql', + [TraceAttr.DbSqlTable]: 'copilot_chats', + [TraceAttr.ChatId]: chatId, + [TraceAttr.ChatUserMessageId]: userMessageId, + [TraceAttr.ChatMessageBytes]: message.length, + [TraceAttr.ChatFileAttachmentCount]: fileAttachments?.length ?? 0, + [TraceAttr.ChatContextCount]: contexts?.length ?? 0, + ...(workspaceId ? { [TraceAttr.WorkspaceId]: workspaceId } : {}), + }, + async (span) => { + const userMsg = buildPersistedUserMessage({ + id: userMessageId, + content: message, + fileAttachments, + contexts, + }) - const [updated] = await db - .update(copilotChats) - .set({ - messages: sql`${copilotChats.messages} || ${JSON.stringify([userMsg])}::jsonb`, - conversationId: userMessageId, - updatedAt: new Date(), - }) - .where(eq(copilotChats.id, chatId)) - .returning({ messages: copilotChats.messages }) + const [updated] = await db + .update(copilotChats) + .set({ + messages: sql`${copilotChats.messages} || ${JSON.stringify([userMsg])}::jsonb`, + conversationId: userMessageId, + updatedAt: new Date(), + }) + .where(eq(copilotChats.id, chatId)) + .returning({ messages: copilotChats.messages }) + + const messagesAfter = Array.isArray(updated?.messages) ? updated.messages : undefined + span.setAttributes({ + [TraceAttr.ChatPersistOutcome]: updated + ? CopilotChatPersistOutcome.Appended + : CopilotChatPersistOutcome.ChatNotFound, + [TraceAttr.ChatMessagesAfter]: messagesAfter?.length ?? 0, + }) - if (notifyWorkspaceStatus && updated && workspaceId) { - taskPubSub?.publishStatusChanged({ workspaceId, chatId, type: 'started' }) - } + if (notifyWorkspaceStatus && updated && workspaceId) { + taskPubSub?.publishStatusChanged({ + workspaceId, + chatId, + type: 'started', + }) + } - return Array.isArray(updated?.messages) ? updated.messages : undefined + return messagesAfter + }, + parentOtelContext + ) } async function buildInitialExecutionContext(params: { @@ -335,12 +377,42 @@ function buildOnComplete(params: { requestId: string workspaceId?: string notifyWorkspaceStatus: boolean + /** + * Root agent span for this request. When present, the final + * assistant message + invoked tool calls are recorded as + * `gen_ai.output.messages` on it before persistence runs. Keeps + * the Honeycomb Gen AI view complete across both the Sim root + * span and the Go-side `llm.stream` spans. + */ + otelRoot?: { + setOutputMessages: (output: { + assistantText?: string + toolCalls?: Array<{ id: string; name: string; arguments?: Record }> + }) => void + } }) { - const { chatId, userMessageId, requestId, workspaceId, notifyWorkspaceStatus } = params + const { chatId, userMessageId, requestId, workspaceId, notifyWorkspaceStatus, otelRoot } = params return async (result: OrchestratorResult) => { + if (otelRoot && result.success) { + otelRoot.setOutputMessages({ + assistantText: result.content, + toolCalls: result.toolCalls?.map((tc) => ({ + id: tc.id, + name: tc.name, + arguments: tc.params, + })), + }) + } + if (!chatId) return + // On cancel, /chat/stop is the sole DB writer — it persists + // partial content AND clears conversationId in one UPDATE. If we + // finalize here first the filter misses and content vanishes. + // Real errors still finalize so the stream marker clears. + if (result.cancelled) return + try { await finalizeAssistantTurn({ chatId, @@ -528,10 +600,23 @@ async function resolveBranch(params: { } export async function handleUnifiedChatPost(req: NextRequest) { - const tracker = createRequestTracker(false) let actualChatId: string | undefined let userMessageId = '' let chatStreamLockAcquired = false + // Started once we've parsed the body (need userMessageId to stamp as + // streamId). Every subsequent span (persistUserMessage, + // createRunSegment, the whole SSE stream, etc.) nests under this + // root via AsyncLocalStorage / explicit propagation, and the stream's + // terminal code path calls finish() when the request actually ends. + // Errors thrown from the handler before the stream starts are + // finished here in the catch below. + let otelRoot: ReturnType | undefined + // Canonical logical ID; assigned from otelRoot.requestId (the OTel + // trace ID) as soon as startCopilotOtelRoot runs. Empty only in the + // narrow pre-otelRoot window where errors don't correlate anyway. + let requestId = '' + const executionId = crypto.randomUUID() + const runId = crypto.randomUUID() try { const session = await getSession() @@ -541,210 +626,331 @@ export async function handleUnifiedChatPost(req: NextRequest) { const authenticatedUserId = session.user.id const body = ChatMessageSchema.parse(await req.json()) - const normalizedContexts = normalizeContexts(body.contexts) + const normalizedContexts = normalizeContexts(body.contexts) ?? [] userMessageId = body.userMessageId || crypto.randomUUID() - const branch = await resolveBranch({ - authenticatedUserId, - workflowId: body.workflowId, - workflowName: body.workflowName, - workspaceId: body.workspaceId, - model: body.model, - mode: body.mode, - provider: body.provider, + otelRoot = startCopilotOtelRoot({ + streamId: userMessageId, + executionId, + runId, + transport: CopilotTransport.Stream, + userMessagePreview: body.message, }) - if (branch instanceof NextResponse) { - return branch + if (otelRoot.requestId) { + requestId = otelRoot.requestId } - - let currentChat: ChatLoadResult['chat'] = null - let conversationHistory: unknown[] = [] - let chatIsNew = false - actualChatId = body.chatId - - if (body.chatId || body.createNewChat) { - const chatResult = await resolveOrCreateChat({ - chatId: body.chatId, - userId: authenticatedUserId, - ...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}), - workspaceId: branch.workspaceId, - model: branch.titleModel, - type: branch.kind === 'workflow' ? 'copilot' : 'mothership', - }) - currentChat = chatResult.chat - actualChatId = chatResult.chatId || body.chatId - chatIsNew = chatResult.isNew - conversationHistory = Array.isArray(chatResult.conversationHistory) - ? chatResult.conversationHistory - : [] - - if (body.chatId && !currentChat) { - return NextResponse.json({ error: 'Chat not found' }, { status: 404 }) + // `setInputMessages` is internally gated on + // OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT; safe to call. + otelRoot.setInputMessages({ userMessage: body.message }) + + // Wrap the rest of the handler so nested spans attach to the + // root via AsyncLocalStorage (otherwise they orphan into new traces). + const activeOtelRoot = otelRoot + return await otelContextApi.with(activeOtelRoot.context, async () => { + const branch = await withCopilotSpan( + TraceSpan.CopilotChatResolveBranch, + { + [TraceAttr.WorkflowId]: body.workflowId ?? '', + [TraceAttr.WorkspaceId]: body.workspaceId ?? '', + }, + () => + resolveBranch({ + authenticatedUserId, + workflowId: body.workflowId, + workflowName: body.workflowName, + workspaceId: body.workspaceId, + model: body.model, + mode: body.mode, + provider: body.provider, + }), + activeOtelRoot.context + ) + if (branch instanceof NextResponse) { + return branch } - } - if (chatIsNew && actualChatId && body.resourceAttachments?.length) { - await persistChatResources( - actualChatId, - body.resourceAttachments.map((r) => ({ - type: r.type, - id: r.id, - title: r.title ?? GENERIC_RESOURCE_TITLE[r.type], - })) - ) - } + let currentChat: ChatLoadResult['chat'] = null + let conversationHistory: unknown[] = [] + let chatIsNew = false + actualChatId = body.chatId - if (actualChatId) { - chatStreamLockAcquired = await acquirePendingChatStream(actualChatId, userMessageId) - if (!chatStreamLockAcquired) { - const activeStreamId = await getPendingChatStreamId(actualChatId) - return NextResponse.json( + if (body.chatId || body.createNewChat) { + const chatResult = await withCopilotSpan( + TraceSpan.CopilotChatResolveOrCreateChat, { - error: 'A response is already in progress for this chat.', - ...(activeStreamId ? { activeStreamId } : {}), + [TraceAttr.ChatPreexisting]: !!body.chatId, + [TraceAttr.CopilotChatIsNew]: !!body.createNewChat, }, - { status: 409 } + () => + resolveOrCreateChat({ + chatId: body.chatId, + userId: authenticatedUserId, + ...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}), + workspaceId: branch.workspaceId, + model: branch.titleModel, + type: branch.kind === 'workflow' ? 'copilot' : 'mothership', + }), + activeOtelRoot.context ) + currentChat = chatResult.chat + actualChatId = chatResult.chatId || body.chatId + chatIsNew = chatResult.isNew + conversationHistory = Array.isArray(chatResult.conversationHistory) + ? chatResult.conversationHistory + : [] + + if (body.chatId && !currentChat) { + return NextResponse.json({ error: 'Chat not found' }, { status: 404 }) + } } - } - const workspaceId = branch.workspaceId - const userPermissionPromise = workspaceId - ? getUserEntityPermissions(authenticatedUserId, 'workspace', workspaceId).catch((error) => { - logger.warn('Failed to load user permissions', { - error: error instanceof Error ? error.message : String(error), - workspaceId, - }) - return null - }) - : Promise.resolve(null) - const workspaceContextPromise = - branch.kind === 'workspace' - ? generateWorkspaceContext(branch.workspaceId, authenticatedUserId) - : Promise.resolve(undefined) - const agentContextsPromise = resolveAgentContexts({ - contexts: normalizedContexts, - resourceAttachments: body.resourceAttachments, - userId: authenticatedUserId, - message: body.message, - workspaceId, - chatId: actualChatId, - requestId: tracker.requestId, - }) - const persistedMessagesPromise = persistUserMessage({ - chatId: actualChatId, - userMessageId, - message: body.message, - fileAttachments: body.fileAttachments, - contexts: normalizedContexts, - workspaceId, - notifyWorkspaceStatus: branch.notifyWorkspaceStatus, - }) - const executionContextPromise = branch.buildExecutionContext({ - userId: authenticatedUserId, - chatId: actualChatId, - userTimezone: body.userTimezone, - messageId: userMessageId, - }) + if (chatIsNew && actualChatId && body.resourceAttachments?.length) { + await persistChatResources( + actualChatId, + body.resourceAttachments.map((r) => ({ + type: r.type, + id: r.id, + title: r.title ?? GENERIC_RESOURCE_TITLE[r.type], + })) + ) + } - const [agentContexts, userPermission, workspaceContext, persistedMessages, executionContext] = - await Promise.all([ - agentContextsPromise, - userPermissionPromise, - workspaceContextPromise, - persistedMessagesPromise, - executionContextPromise, - ]) - - if (persistedMessages) { - conversationHistory = persistedMessages.filter((message) => { - const record = message as Record - return record.id !== userMessageId + let pendingStreamWaitMs = 0 + if (actualChatId) { + const lockStart = Date.now() + chatStreamLockAcquired = await acquirePendingChatStream(actualChatId, userMessageId) + pendingStreamWaitMs = Date.now() - lockStart + if (!chatStreamLockAcquired) { + const activeStreamId = await getPendingChatStreamId(actualChatId) + return NextResponse.json( + { + error: 'A response is already in progress for this chat.', + ...(activeStreamId ? { activeStreamId } : {}), + }, + { status: 409 } + ) + } + } + + // Stamp request-shape metadata on the root `gen_ai.agent.execute` + // span now that `branch`, attachment counts, and the pending-stream + // wait are all known. This turns dashboard slicing by + // `copilot.surface` / `copilot.mode` / `copilot.interrupted_prior_stream` + // into a simple TraceQL filter. + activeOtelRoot.setRequestShape({ + branchKind: branch.kind, + mode: body.mode, + model: body.model, + provider: body.provider, + createNewChat: body.createNewChat, + prefetch: body.prefetch, + fileAttachmentsCount: body.fileAttachments?.length ?? 0, + resourceAttachmentsCount: body.resourceAttachments?.length ?? 0, + contextsCount: normalizedContexts.length, + commandsCount: body.commands?.length ?? 0, + pendingStreamWaitMs, }) - } - const requestPayload = - branch.kind === 'workflow' - ? await branch.buildPayload({ - message: body.message, - userId: authenticatedUserId, - userMessageId, - chatId: actualChatId, - contexts: agentContexts, - fileAttachments: body.fileAttachments, - userPermission: userPermission ?? undefined, - userTimezone: body.userTimezone, - workflowId: branch.workflowId, - workflowName: branch.workflowName, - workspaceId: branch.workspaceId, - mode: branch.mode, - provider: branch.provider, - commands: body.commands, - prefetch: body.prefetch, - implicitFeedback: body.implicitFeedback, + const workspaceId = branch.workspaceId + const userPermissionPromise = workspaceId + ? getUserEntityPermissions(authenticatedUserId, 'workspace', workspaceId).catch((error) => { + logger.warn('Failed to load user permissions', { + error: error instanceof Error ? error.message : String(error), + workspaceId, + }) + return null }) - : await branch.buildPayload({ + : Promise.resolve(null) + // Wrap the pre-LLM prep work in spans so the trace waterfall shows + // where time is going between "request received" and "llm.stream + // opens". Previously these ran bare under the root and inflated the + // apparent "gap" before the model call. Each promise is its own + // span; they run concurrently under Promise.all below. + const workspaceContextPromise = + branch.kind === 'workspace' + ? withCopilotSpan( + TraceSpan.CopilotChatBuildWorkspaceContext, + { [TraceAttr.WorkspaceId]: branch.workspaceId }, + () => generateWorkspaceContext(branch.workspaceId, authenticatedUserId), + activeOtelRoot.context + ) + : Promise.resolve(undefined) + const agentContextsPromise = withCopilotSpan( + TraceSpan.CopilotChatResolveAgentContexts, + { + [TraceAttr.CopilotContextsCount]: normalizedContexts.length, + [TraceAttr.CopilotResourceAttachmentsCount]: body.resourceAttachments?.length ?? 0, + }, + () => + resolveAgentContexts({ + contexts: normalizedContexts, + resourceAttachments: body.resourceAttachments, + userId: authenticatedUserId, message: body.message, + workspaceId, + chatId: actualChatId, + requestId, + }), + activeOtelRoot.context + ) + const persistedMessagesPromise = persistUserMessage({ + chatId: actualChatId, + userMessageId, + message: body.message, + fileAttachments: body.fileAttachments, + contexts: normalizedContexts, + workspaceId, + notifyWorkspaceStatus: branch.notifyWorkspaceStatus, + parentOtelContext: activeOtelRoot.context, + }) + const executionContextPromise = withCopilotSpan( + TraceSpan.CopilotChatBuildExecutionContext, + { [TraceAttr.CopilotBranchKind]: branch.kind }, + () => + branch.buildExecutionContext({ userId: authenticatedUserId, - userMessageId, chatId: actualChatId, - contexts: agentContexts, - fileAttachments: body.fileAttachments, - userPermission: userPermission ?? undefined, userTimezone: body.userTimezone, - workspaceContext, - }) + messageId: userMessageId, + }), + activeOtelRoot.context + ) - const executionId = crypto.randomUUID() - const runId = crypto.randomUUID() + const [agentContexts, userPermission, workspaceContext, persistedMessages, executionContext] = + await Promise.all([ + agentContextsPromise, + userPermissionPromise, + workspaceContextPromise, + persistedMessagesPromise, + executionContextPromise, + ]) + + if (persistedMessages) { + conversationHistory = persistedMessages.filter((message) => { + const record = message as Record + return record.id !== userMessageId + }) + } - const stream = createSSEStream({ - requestPayload, - userId: authenticatedUserId, - streamId: userMessageId, - executionId, - runId, - chatId: actualChatId, - currentChat, - isNewChat: conversationHistory.length === 0, - message: body.message, - titleModel: branch.titleModel, - ...(branch.titleProvider ? { titleProvider: branch.titleProvider } : {}), - requestId: tracker.requestId, - workspaceId, - orchestrateOptions: { + // buildPayload is the last synchronous step before the outbound + // Sim → Go HTTP call. It runs per-tool schema generation (subscription + // lookup + registry iteration, cached 30s) and file upload tracking + // per attachment. Wrapping it so we can see how much of the + // "before llm.stream" gap lives here vs elsewhere. + const requestPayload = await withCopilotSpan( + TraceSpan.CopilotChatBuildPayload, + { + [TraceAttr.CopilotBranchKind]: branch.kind, + [TraceAttr.CopilotFileAttachmentsCount]: body.fileAttachments?.length ?? 0, + [TraceAttr.CopilotContextsCount]: normalizedContexts.length, + }, + () => + branch.kind === 'workflow' + ? branch.buildPayload({ + message: body.message, + userId: authenticatedUserId, + userMessageId, + chatId: actualChatId, + contexts: agentContexts, + fileAttachments: body.fileAttachments, + userPermission: userPermission ?? undefined, + userTimezone: body.userTimezone, + workflowId: branch.workflowId, + workflowName: branch.workflowName, + workspaceId: branch.workspaceId, + mode: branch.mode, + provider: branch.provider, + commands: body.commands, + prefetch: body.prefetch, + implicitFeedback: body.implicitFeedback, + }) + : branch.buildPayload({ + message: body.message, + userId: authenticatedUserId, + userMessageId, + chatId: actualChatId, + contexts: agentContexts, + fileAttachments: body.fileAttachments, + userPermission: userPermission ?? undefined, + userTimezone: body.userTimezone, + workspaceContext, + }), + activeOtelRoot.context + ) + + if (actualChatId) { + activeOtelRoot.span.setAttribute(TraceAttr.ChatId, actualChatId) + } + if (workspaceId) { + activeOtelRoot.span.setAttribute(TraceAttr.WorkspaceId, workspaceId) + } + + const stream = createSSEStream({ + requestPayload, userId: authenticatedUserId, - ...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}), - ...(branch.kind === 'workspace' ? { workspaceId: branch.workspaceId } : {}), - chatId: actualChatId, + streamId: userMessageId, executionId, runId, - goRoute: branch.goRoute, - autoExecuteTools: true, - interactive: true, - executionContext, - onComplete: buildOnComplete({ - chatId: actualChatId, - userMessageId, - requestId: tracker.requestId, - workspaceId, - notifyWorkspaceStatus: branch.notifyWorkspaceStatus, - }), - onError: buildOnError({ + chatId: actualChatId, + currentChat, + isNewChat: conversationHistory.length === 0, + message: body.message, + titleModel: branch.titleModel, + ...(branch.titleProvider ? { titleProvider: branch.titleProvider } : {}), + requestId, + workspaceId, + otelRoot: activeOtelRoot, + orchestrateOptions: { + userId: authenticatedUserId, + ...(branch.kind === 'workflow' ? { workflowId: branch.workflowId } : {}), + ...(branch.kind === 'workspace' ? { workspaceId: branch.workspaceId } : {}), chatId: actualChatId, - userMessageId, - requestId: tracker.requestId, - workspaceId, - notifyWorkspaceStatus: branch.notifyWorkspaceStatus, - }), - }, - }) + executionId, + runId, + goRoute: branch.goRoute, + autoExecuteTools: true, + interactive: true, + executionContext, + onComplete: buildOnComplete({ + chatId: actualChatId, + userMessageId, + requestId, + workspaceId, + notifyWorkspaceStatus: branch.notifyWorkspaceStatus, + otelRoot, + }), + onError: buildOnError({ + chatId: actualChatId, + userMessageId, + requestId, + workspaceId, + notifyWorkspaceStatus: branch.notifyWorkspaceStatus, + }), + }, + }) - return new Response(stream, { headers: SSE_RESPONSE_HEADERS }) + // Expose the root gen_ai.agent.execute span's trace identity to + // the browser so subsequent HTTP calls (stop, abort, confirm, + // SSE reconnect) can echo it back as `traceparent` — making + // all side-channel work on this request appear as child spans + // of this same trace in Tempo instead of disconnected roots. + // W3C traceparent format: `00---`. + const rootCtx = activeOtelRoot.span.spanContext() + const rootTraceparent = `00-${rootCtx.traceId}-${rootCtx.spanId}-${ + (rootCtx.traceFlags & 0x1) === 0x1 ? '01' : '00' + }` + return new Response(stream, { + headers: { + ...SSE_RESPONSE_HEADERS, + traceparent: rootTraceparent, + }, + }) + }) // end otelContextApi.with } catch (error) { if (chatStreamLockAcquired && actualChatId && userMessageId) { await releasePendingChatStream(actualChatId, userMessageId) } + otelRoot?.finish('error', error) if (error instanceof z.ZodError) { return NextResponse.json( @@ -753,13 +959,15 @@ export async function handleUnifiedChatPost(req: NextRequest) { ) } - logger.error(`[${tracker.requestId}] Error handling unified chat request`, { + logger.error(`[${requestId}] Error handling unified chat request`, { error: error instanceof Error ? error.message : 'Unknown error', stack: error instanceof Error ? error.stack : undefined, }) return NextResponse.json( - { error: error instanceof Error ? error.message : 'Internal server error' }, + { + error: error instanceof Error ? error.message : 'Internal server error', + }, { status: 500 } ) } diff --git a/apps/sim/lib/copilot/chat/terminal-state.ts b/apps/sim/lib/copilot/chat/terminal-state.ts index f09c1c861bb..f0f43cb6bb0 100644 --- a/apps/sim/lib/copilot/chat/terminal-state.ts +++ b/apps/sim/lib/copilot/chat/terminal-state.ts @@ -2,6 +2,10 @@ import { db } from '@sim/db' import { copilotChats } from '@sim/db/schema' import { and, eq, sql } from 'drizzle-orm' import type { PersistedMessage } from '@/lib/copilot/chat/persisted-message' +import { CopilotChatFinalizeOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { withCopilotSpan } from '@/lib/copilot/request/otel' interface FinalizeAssistantTurnParams { chatId: string @@ -19,39 +23,65 @@ export async function finalizeAssistantTurn({ userMessageId, assistantMessage, }: FinalizeAssistantTurnParams): Promise { - const [row] = await db - .select({ messages: copilotChats.messages }) - .from(copilotChats) - .where(eq(copilotChats.id, chatId)) - .limit(1) + return withCopilotSpan( + TraceSpan.CopilotChatFinalizeAssistantTurn, + { + [TraceAttr.DbSystem]: 'postgresql', + [TraceAttr.DbSqlTable]: 'copilot_chats', + [TraceAttr.ChatId]: chatId, + [TraceAttr.ChatUserMessageId]: userMessageId, + [TraceAttr.ChatHasAssistantMessage]: !!assistantMessage, + }, + async (span) => { + const [row] = await db + .select({ messages: copilotChats.messages }) + .from(copilotChats) + .where(eq(copilotChats.id, chatId)) + .limit(1) - const messages: Record[] = Array.isArray(row?.messages) ? row.messages : [] - const userIdx = messages.findIndex((message) => message.id === userMessageId) - const alreadyHasResponse = - userIdx >= 0 && - userIdx + 1 < messages.length && - (messages[userIdx + 1] as Record)?.role === 'assistant' - const canAppendAssistant = userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse - const updateWhere = and( - eq(copilotChats.id, chatId), - eq(copilotChats.conversationId, userMessageId) - ) + const messages: Record[] = Array.isArray(row?.messages) ? row.messages : [] + span.setAttribute(TraceAttr.ChatExistingMessageCount, messages.length) + const userIdx = messages.findIndex((message) => message.id === userMessageId) + const alreadyHasResponse = + userIdx >= 0 && + userIdx + 1 < messages.length && + (messages[userIdx + 1] as Record)?.role === 'assistant' + const canAppendAssistant = + userIdx >= 0 && userIdx === messages.length - 1 && !alreadyHasResponse + const updateWhere = and( + eq(copilotChats.id, chatId), + eq(copilotChats.conversationId, userMessageId) + ) - const baseUpdate = { - conversationId: null, - updatedAt: new Date(), - } + const baseUpdate = { + conversationId: null, + updatedAt: new Date(), + } - if (assistantMessage && canAppendAssistant) { - await db - .update(copilotChats) - .set({ - ...baseUpdate, - messages: sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`, - }) - .where(updateWhere) - return - } + if (assistantMessage && canAppendAssistant) { + await db + .update(copilotChats) + .set({ + ...baseUpdate, + messages: sql`${copilotChats.messages} || ${JSON.stringify([assistantMessage])}::jsonb`, + }) + .where(updateWhere) + span.setAttribute( + TraceAttr.ChatFinalizeOutcome, + CopilotChatFinalizeOutcome.AppendedAssistant + ) + return + } - await db.update(copilotChats).set(baseUpdate).where(updateWhere) + await db.update(copilotChats).set(baseUpdate).where(updateWhere) + span.setAttribute( + TraceAttr.ChatFinalizeOutcome, + assistantMessage + ? alreadyHasResponse + ? 'assistant_already_persisted' + : 'stale_user_message' + : 'cleared_stream_marker_only' + ) + } + ) } diff --git a/apps/sim/lib/copilot/constants.ts b/apps/sim/lib/copilot/constants.ts index 1718cfc1d9d..475c659f4e5 100644 --- a/apps/sim/lib/copilot/constants.ts +++ b/apps/sim/lib/copilot/constants.ts @@ -34,9 +34,6 @@ export const STREAM_STORAGE_KEY = 'copilot_active_stream' /** POST — send a chat message through the unified mothership chat surface. */ export const MOTHERSHIP_CHAT_API_PATH = '/api/mothership/chat' -/** Backwards-compatible alias while remaining callers migrate. */ -export const COPILOT_CHAT_API_PATH = MOTHERSHIP_CHAT_API_PATH - /** POST — confirm or reject a tool call. */ export const COPILOT_CONFIRM_API_PATH = '/api/copilot/confirm' diff --git a/apps/sim/lib/copilot/generated/mothership-stream-v1-schema.ts b/apps/sim/lib/copilot/generated/mothership-stream-v1-schema.ts index 6394988d439..1c670b37b54 100644 --- a/apps/sim/lib/copilot/generated/mothership-stream-v1-schema.ts +++ b/apps/sim/lib/copilot/generated/mothership-stream-v1-schema.ts @@ -1316,6 +1316,11 @@ export const MOTHERSHIP_STREAM_V1_SCHEMA: JsonSchema = { MothershipStreamV1Trace: { additionalProperties: false, properties: { + goTraceId: { + description: + 'OTel trace ID from the first Go ingress. May differ from requestId when Sim assigns the canonical request identity.', + type: 'string', + }, requestId: { type: 'string', }, diff --git a/apps/sim/lib/copilot/generated/mothership-stream-v1.ts b/apps/sim/lib/copilot/generated/mothership-stream-v1.ts index 24841264e6c..ef7f2e065fb 100644 --- a/apps/sim/lib/copilot/generated/mothership-stream-v1.ts +++ b/apps/sim/lib/copilot/generated/mothership-stream-v1.ts @@ -66,6 +66,10 @@ export interface MothershipStreamV1StreamRef { streamId: string } export interface MothershipStreamV1Trace { + /** + * OTel trace ID from the first Go ingress. May differ from requestId when Sim assigns the canonical request identity. + */ + goTraceId?: string requestId: string spanId?: string } diff --git a/apps/sim/lib/copilot/generated/request-trace-v1.ts b/apps/sim/lib/copilot/generated/request-trace-v1.ts index f8d2bd06e01..31a60bb5159 100644 --- a/apps/sim/lib/copilot/generated/request-trace-v1.ts +++ b/apps/sim/lib/copilot/generated/request-trace-v1.ts @@ -34,6 +34,7 @@ export interface RequestTraceV1SimReport { startMs: number streamId?: string usage?: RequestTraceV1UsageSummary + userMessage?: string } /** * This interface was referenced by `RequestTraceV1SimReport`'s JSON-Schema @@ -112,6 +113,7 @@ export interface RequestTraceV1SimReport1 { startMs: number streamId?: string usage?: RequestTraceV1UsageSummary + userMessage?: string } export const RequestTraceV1Outcome = { diff --git a/apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts b/apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts new file mode 100644 index 00000000000..0172fa14d83 --- /dev/null +++ b/apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts @@ -0,0 +1,359 @@ +// AUTO-GENERATED FILE. DO NOT EDIT. +// +// Source: copilot/copilot/contracts/trace-attribute-values-v1.schema.json +// Regenerate with: bun run trace-attribute-values-contract:generate +// +// Canonical closed-set value vocabularies for mothership OTel +// attributes. Call sites should reference e.g. +// `CopilotRequestCancelReason.ExplicitStop` rather than the raw +// string literal, so typos become compile errors and the Go contract +// remains the single source of truth. + +export const AbortBackend = { + InProcess: 'in_process', + Redis: 'redis', +} as const + +export type AbortBackendKey = keyof typeof AbortBackend +export type AbortBackendValue = (typeof AbortBackend)[AbortBackendKey] + +export const AbortRedisResult = { + Error: 'error', + Ok: 'ok', + Slow: 'slow', +} as const + +export type AbortRedisResultKey = keyof typeof AbortRedisResult +export type AbortRedisResultValue = (typeof AbortRedisResult)[AbortRedisResultKey] + +export const AuthKeyMatch = { + Enterprise: 'enterprise', + None: 'none', + User: 'user', +} as const + +export type AuthKeyMatchKey = keyof typeof AuthKeyMatch +export type AuthKeyMatchValue = (typeof AuthKeyMatch)[AuthKeyMatchKey] + +export const BillingAnalyticsOutcome = { + Duplicate: 'duplicate', + RetriesExhausted: 'retries_exhausted', + Success: 'success', + Unknown: 'unknown', +} as const + +export type BillingAnalyticsOutcomeKey = keyof typeof BillingAnalyticsOutcome +export type BillingAnalyticsOutcomeValue = + (typeof BillingAnalyticsOutcome)[BillingAnalyticsOutcomeKey] + +export const BillingFlushOutcome = { + CheckpointAlreadyClaimed: 'checkpoint_already_claimed', + CheckpointLoadFailed: 'checkpoint_load_failed', + Flushed: 'flushed', + NoCheckpoint: 'no_checkpoint', + NoSnapshot: 'no_snapshot', + SkippedUnconfigured: 'skipped_unconfigured', +} as const + +export type BillingFlushOutcomeKey = keyof typeof BillingFlushOutcome +export type BillingFlushOutcomeValue = (typeof BillingFlushOutcome)[BillingFlushOutcomeKey] + +export const BillingRouteOutcome = { + AuthFailed: 'auth_failed', + Billed: 'billed', + BillingDisabled: 'billing_disabled', + DuplicateIdempotencyKey: 'duplicate_idempotency_key', + InternalError: 'internal_error', + InvalidBody: 'invalid_body', +} as const + +export type BillingRouteOutcomeKey = keyof typeof BillingRouteOutcome +export type BillingRouteOutcomeValue = (typeof BillingRouteOutcome)[BillingRouteOutcomeKey] + +export const CopilotAbortOutcome = { + BadRequest: 'bad_request', + FallbackPersistFailed: 'fallback_persist_failed', + MissingMessageId: 'missing_message_id', + MissingStreamId: 'missing_stream_id', + NoChatId: 'no_chat_id', + Ok: 'ok', + SettleTimeout: 'settle_timeout', + Settled: 'settled', + Unauthorized: 'unauthorized', +} as const + +export type CopilotAbortOutcomeKey = keyof typeof CopilotAbortOutcome +export type CopilotAbortOutcomeValue = (typeof CopilotAbortOutcome)[CopilotAbortOutcomeKey] + +export const CopilotBranchKind = { + Workflow: 'workflow', + Workspace: 'workspace', +} as const + +export type CopilotBranchKindKey = keyof typeof CopilotBranchKind +export type CopilotBranchKindValue = (typeof CopilotBranchKind)[CopilotBranchKindKey] + +export const CopilotChatFinalizeOutcome = { + AppendedAssistant: 'appended_assistant', + AssistantAlreadyPersisted: 'assistant_already_persisted', + ClearedStreamMarkerOnly: 'cleared_stream_marker_only', + StaleUserMessage: 'stale_user_message', +} as const + +export type CopilotChatFinalizeOutcomeKey = keyof typeof CopilotChatFinalizeOutcome +export type CopilotChatFinalizeOutcomeValue = + (typeof CopilotChatFinalizeOutcome)[CopilotChatFinalizeOutcomeKey] + +export const CopilotChatPersistOutcome = { + Appended: 'appended', + ChatNotFound: 'chat_not_found', +} as const + +export type CopilotChatPersistOutcomeKey = keyof typeof CopilotChatPersistOutcome +export type CopilotChatPersistOutcomeValue = + (typeof CopilotChatPersistOutcome)[CopilotChatPersistOutcomeKey] + +export const CopilotConfirmOutcome = { + Delivered: 'delivered', + Forbidden: 'forbidden', + InternalError: 'internal_error', + RunNotFound: 'run_not_found', + ToolCallNotFound: 'tool_call_not_found', + Unauthorized: 'unauthorized', + UpdateFailed: 'update_failed', + ValidationError: 'validation_error', +} as const + +export type CopilotConfirmOutcomeKey = keyof typeof CopilotConfirmOutcome +export type CopilotConfirmOutcomeValue = (typeof CopilotConfirmOutcome)[CopilotConfirmOutcomeKey] + +export const CopilotFinalizeOutcome = { + Aborted: 'aborted', + Error: 'error', + Success: 'success', +} as const + +export type CopilotFinalizeOutcomeKey = keyof typeof CopilotFinalizeOutcome +export type CopilotFinalizeOutcomeValue = (typeof CopilotFinalizeOutcome)[CopilotFinalizeOutcomeKey] + +export const CopilotLeg = { + SimToGo: 'sim_to_go', +} as const + +export type CopilotLegKey = keyof typeof CopilotLeg +export type CopilotLegValue = (typeof CopilotLeg)[CopilotLegKey] + +export const CopilotOutputFileOutcome = { + Failed: 'failed', + Uploaded: 'uploaded', +} as const + +export type CopilotOutputFileOutcomeKey = keyof typeof CopilotOutputFileOutcome +export type CopilotOutputFileOutcomeValue = + (typeof CopilotOutputFileOutcome)[CopilotOutputFileOutcomeKey] + +export const CopilotRecoveryOutcome = { + GapDetected: 'gap_detected', + InRange: 'in_range', +} as const + +export type CopilotRecoveryOutcomeKey = keyof typeof CopilotRecoveryOutcome +export type CopilotRecoveryOutcomeValue = (typeof CopilotRecoveryOutcome)[CopilotRecoveryOutcomeKey] + +export const CopilotRequestCancelReason = { + ClientDisconnect: 'client_disconnect', + ExplicitStop: 'explicit_stop', + Timeout: 'timeout', + Unknown: 'unknown', +} as const + +export type CopilotRequestCancelReasonKey = keyof typeof CopilotRequestCancelReason +export type CopilotRequestCancelReasonValue = + (typeof CopilotRequestCancelReason)[CopilotRequestCancelReasonKey] + +export const CopilotResourcesOp = { + Delete: 'delete', + None: 'none', + Upsert: 'upsert', +} as const + +export type CopilotResourcesOpKey = keyof typeof CopilotResourcesOp +export type CopilotResourcesOpValue = (typeof CopilotResourcesOp)[CopilotResourcesOpKey] + +export const CopilotResumeOutcome = { + BatchDelivered: 'batch_delivered', + ClientDisconnected: 'client_disconnected', + EndedWithoutTerminal: 'ended_without_terminal', + StreamNotFound: 'stream_not_found', + TerminalDelivered: 'terminal_delivered', +} as const + +export type CopilotResumeOutcomeKey = keyof typeof CopilotResumeOutcome +export type CopilotResumeOutcomeValue = (typeof CopilotResumeOutcome)[CopilotResumeOutcomeKey] + +export const CopilotStopOutcome = { + ChatNotFound: 'chat_not_found', + InternalError: 'internal_error', + NoMatchingRow: 'no_matching_row', + Persisted: 'persisted', + Unauthorized: 'unauthorized', + ValidationError: 'validation_error', +} as const + +export type CopilotStopOutcomeKey = keyof typeof CopilotStopOutcome +export type CopilotStopOutcomeValue = (typeof CopilotStopOutcome)[CopilotStopOutcomeKey] + +export const CopilotSurface = { + Copilot: 'copilot', + Mothership: 'mothership', +} as const + +export type CopilotSurfaceKey = keyof typeof CopilotSurface +export type CopilotSurfaceValue = (typeof CopilotSurface)[CopilotSurfaceKey] + +export const CopilotTableOutcome = { + EmptyContent: 'empty_content', + EmptyRows: 'empty_rows', + Failed: 'failed', + Imported: 'imported', + InvalidJsonShape: 'invalid_json_shape', + InvalidShape: 'invalid_shape', + RowLimitExceeded: 'row_limit_exceeded', + TableNotFound: 'table_not_found', + Wrote: 'wrote', +} as const + +export type CopilotTableOutcomeKey = keyof typeof CopilotTableOutcome +export type CopilotTableOutcomeValue = (typeof CopilotTableOutcome)[CopilotTableOutcomeKey] + +export const CopilotTableSourceFormat = { + Csv: 'csv', + Json: 'json', +} as const + +export type CopilotTableSourceFormatKey = keyof typeof CopilotTableSourceFormat +export type CopilotTableSourceFormatValue = + (typeof CopilotTableSourceFormat)[CopilotTableSourceFormatKey] + +export const CopilotTransport = { + Batch: 'batch', + Headless: 'headless', + Stream: 'stream', +} as const + +export type CopilotTransportKey = keyof typeof CopilotTransport +export type CopilotTransportValue = (typeof CopilotTransport)[CopilotTransportKey] + +export const CopilotValidateOutcome = { + InternalAuthFailed: 'internal_auth_failed', + InternalError: 'internal_error', + InvalidBody: 'invalid_body', + Ok: 'ok', + UsageExceeded: 'usage_exceeded', + UserNotFound: 'user_not_found', +} as const + +export type CopilotValidateOutcomeKey = keyof typeof CopilotValidateOutcome +export type CopilotValidateOutcomeValue = (typeof CopilotValidateOutcome)[CopilotValidateOutcomeKey] + +export const CopilotVfsOutcome = { + PassthroughFitsBudget: 'passthrough_fits_budget', + PassthroughNoMetadata: 'passthrough_no_metadata', + PassthroughNoSharp: 'passthrough_no_sharp', + RejectedNoMetadata: 'rejected_no_metadata', + RejectedNoSharp: 'rejected_no_sharp', + RejectedTooLargeAfterResize: 'rejected_too_large_after_resize', + Resized: 'resized', +} as const + +export type CopilotVfsOutcomeKey = keyof typeof CopilotVfsOutcome +export type CopilotVfsOutcomeValue = (typeof CopilotVfsOutcome)[CopilotVfsOutcomeKey] + +export const CopilotVfsReadOutcome = { + BinaryPlaceholder: 'binary_placeholder', + DocumentParsed: 'document_parsed', + DocumentTooLarge: 'document_too_large', + ImagePrepared: 'image_prepared', + ImageTooLarge: 'image_too_large', + ParseFailed: 'parse_failed', + ReadFailed: 'read_failed', + TextRead: 'text_read', + TextTooLarge: 'text_too_large', +} as const + +export type CopilotVfsReadOutcomeKey = keyof typeof CopilotVfsReadOutcome +export type CopilotVfsReadOutcomeValue = (typeof CopilotVfsReadOutcome)[CopilotVfsReadOutcomeKey] + +export const CopilotVfsReadPath = { + Binary: 'binary', + Image: 'image', + ParseableDocument: 'parseable_document', + Text: 'text', +} as const + +export type CopilotVfsReadPathKey = keyof typeof CopilotVfsReadPath +export type CopilotVfsReadPathValue = (typeof CopilotVfsReadPath)[CopilotVfsReadPathKey] + +export const LlmErrorStage = { + BuildRequest: 'build_request', + Decode: 'decode', + HttpBuild: 'http_build', + HttpStatus: 'http_status', + Invoke: 'invoke', + MarshalRequest: 'marshal_request', + StreamClose: 'stream_close', +} as const + +export type LlmErrorStageKey = keyof typeof LlmErrorStage +export type LlmErrorStageValue = (typeof LlmErrorStage)[LlmErrorStageKey] + +export const RateLimitOutcome = { + Allowed: 'allowed', + IncrError: 'incr_error', + Limited: 'limited', +} as const + +export type RateLimitOutcomeKey = keyof typeof RateLimitOutcome +export type RateLimitOutcomeValue = (typeof RateLimitOutcome)[RateLimitOutcomeKey] + +export const ToolAsyncWaiterResolution = { + ContextCancelled: 'context_cancelled', + Poll: 'poll', + Pubsub: 'pubsub', + StoredAfterClose: 'stored_after_close', + StoredBeforeSubscribe: 'stored_before_subscribe', + StoredPostSubscribe: 'stored_post_subscribe', + SubscriptionClosed: 'subscription_closed', + Unknown: 'unknown', +} as const + +export type ToolAsyncWaiterResolutionKey = keyof typeof ToolAsyncWaiterResolution +export type ToolAsyncWaiterResolutionValue = + (typeof ToolAsyncWaiterResolution)[ToolAsyncWaiterResolutionKey] + +export const ToolErrorKind = { + Dispatch: 'dispatch', + NotFound: 'not_found', +} as const + +export type ToolErrorKindKey = keyof typeof ToolErrorKind +export type ToolErrorKindValue = (typeof ToolErrorKind)[ToolErrorKindKey] + +export const ToolExecutor = { + Client: 'client', + Go: 'go', + Sim: 'sim', +} as const + +export type ToolExecutorKey = keyof typeof ToolExecutor +export type ToolExecutorValue = (typeof ToolExecutor)[ToolExecutorKey] + +export const ToolStoreStatus = { + Cancelled: 'cancelled', + Completed: 'completed', + Failed: 'failed', + Pending: 'pending', +} as const + +export type ToolStoreStatusKey = keyof typeof ToolStoreStatus +export type ToolStoreStatusValue = (typeof ToolStoreStatus)[ToolStoreStatusKey] diff --git a/apps/sim/lib/copilot/generated/trace-attributes-v1.ts b/apps/sim/lib/copilot/generated/trace-attributes-v1.ts new file mode 100644 index 00000000000..a64c897a001 --- /dev/null +++ b/apps/sim/lib/copilot/generated/trace-attributes-v1.ts @@ -0,0 +1,988 @@ +// AUTO-GENERATED FILE. DO NOT EDIT. +// +// Source: copilot/copilot/contracts/trace-attributes-v1.schema.json +// Regenerate with: bun run trace-attributes-contract:generate +// +// Canonical custom mothership OTel span attribute keys. Call sites +// should reference `TraceAttr.` (e.g. +// `TraceAttr.ChatId`, `TraceAttr.ToolCallId`) rather than raw +// string literals, so the Go-side contract is the single source of +// truth and typos become compile errors. +// +// For OTel semantic-convention keys (`http.*`, `db.*`, +// `gen_ai.*`, `net.*`, `messaging.*`, `service.*`, +// `deployment.environment`), import from +// `@opentelemetry/semantic-conventions` directly — those are owned +// by the upstream OTel spec, not by this contract. + +export const TraceAttr = { + AbortBackend: 'abort.backend', + AbortFound: 'abort.found', + AbortRedisResult: 'abort.redis_result', + AnalyticsAborted: 'analytics.aborted', + AnalyticsBilledTotalCost: 'analytics.billed_total_cost', + AnalyticsCacheReadTokens: 'analytics.cache_read_tokens', + AnalyticsCacheWriteTokens: 'analytics.cache_write_tokens', + AnalyticsCustomerType: 'analytics.customer_type', + AnalyticsDurationMs: 'analytics.duration_ms', + AnalyticsError: 'analytics.error', + AnalyticsInputTokens: 'analytics.input_tokens', + AnalyticsModel: 'analytics.model', + AnalyticsOutputTokens: 'analytics.output_tokens', + AnalyticsProvider: 'analytics.provider', + AnalyticsSource: 'analytics.source', + AnalyticsToolCallCount: 'analytics.tool_call_count', + ApiKeyId: 'api_key.id', + ApiKeyName: 'api_key.name', + AuthIncomingInternal: 'auth.incoming_internal', + AuthKeyMatch: 'auth.key.match', + AuthKeyPreview: 'auth.key.preview', + AuthKeySource: 'auth.key.source', + AuthKeyType: 'auth.key.type', + AuthProvider: 'auth.provider', + AuthValidateStatusCode: 'auth.validate.status_code', + AwsRegion: 'aws.region', + BedrockErrorCode: 'bedrock.error_code', + BedrockModelId: 'bedrock.model_id', + BedrockRequestBodyBytesRetry: 'bedrock.request.body_bytes_retry', + BillingAttempts: 'billing.attempts', + BillingChangeType: 'billing.change_type', + BillingCostInputUsd: 'billing.cost.input_usd', + BillingCostOutputUsd: 'billing.cost.output_usd', + BillingCostTotalUsd: 'billing.cost.total_usd', + BillingCostUsd: 'billing.cost_usd', + BillingCustomerType: 'billing.customer_type', + BillingDuplicate: 'billing.duplicate', + BillingDurationMs: 'billing.duration_ms', + BillingHasIdempotencyKey: 'billing.has_idempotency_key', + BillingIdempotencyKey: 'billing.idempotency_key', + BillingInterval: 'billing.interval', + BillingIsMcp: 'billing.is_mcp', + BillingLlmCost: 'billing.llm_cost', + BillingNewPlan: 'billing.new_plan', + BillingOutcome: 'billing.outcome', + BillingPlan: 'billing.plan', + BillingPreviousPlan: 'billing.previous_plan', + BillingServiceCharges: 'billing.service_charges', + BillingSource: 'billing.source', + BillingTotalCost: 'billing.total_cost', + BillingUsageCurrent: 'billing.usage.current', + BillingUsageExceeded: 'billing.usage.exceeded', + BillingUsageLimit: 'billing.usage.limit', + BlockId: 'block.id', + BlockName: 'block.name', + BlockType: 'block.type', + ChatActiveMessagesBytes: 'chat.active_messages_bytes', + ChatActiveMessagesCount: 'chat.active_messages_count', + ChatAppendBytes: 'chat.append_bytes', + ChatAppendCount: 'chat.append_count', + ChatArtifactKeys: 'chat.artifact_keys', + ChatArtifactsBytes: 'chat.artifacts_bytes', + ChatAuthType: 'chat.auth_type', + ChatContextCount: 'chat.context_count', + ChatContextUsage: 'chat.context_usage', + ChatContinuationMessagesBefore: 'chat.continuation.messages_before', + ChatContinuationToolResultBytes: 'chat.continuation.tool_result_bytes', + ChatContinuationToolResultFailure: 'chat.continuation.tool_result_failure', + ChatContinuationToolResultSuccess: 'chat.continuation.tool_result_success', + ChatContinuationToolResults: 'chat.continuation.tool_results', + ChatContinuationTotalToolCalls: 'chat.continuation.total_tool_calls', + ChatExistingMessageCount: 'chat.existing_message_count', + ChatFileAttachmentCount: 'chat.file_attachment_count', + ChatFinalizeOutcome: 'chat.finalize.outcome', + ChatFound: 'chat.found', + ChatHasAssistantMessage: 'chat.has_assistant_message', + ChatHasOutputConfigs: 'chat.has_output_configs', + ChatId: 'chat.id', + ChatMessageBytes: 'chat.message_bytes', + ChatMessagesAfter: 'chat.messages_after', + ChatMessagesBytes: 'chat.messages_bytes', + ChatMessagesCount: 'chat.messages_count', + ChatPersistOutcome: 'chat.persist.outcome', + ChatPreexisting: 'chat.preexisting', + ChatRollbackIndex: 'chat.rollback_index', + ChatTokensUsed: 'chat.tokens_used', + ChatType: 'chat.type', + ChatUserMessageId: 'chat.user_message_id', + CheckpointAge: 'checkpoint.age', + CheckpointAttemptsBytes: 'checkpoint.attempts_bytes', + CheckpointBytesAssistantToolUse: 'checkpoint.bytes.assistant_tool_use', + CheckpointBytesCurrentMessages: 'checkpoint.bytes.current_messages', + CheckpointBytesImmediateResults: 'checkpoint.bytes.immediate_results', + CheckpointBytesPendingToolCalls: 'checkpoint.bytes.pending_tool_calls', + CheckpointBytesProviderRequest: 'checkpoint.bytes.provider_request', + CheckpointBytesRequestContext: 'checkpoint.bytes.request_context', + CheckpointBytesToolUsage: 'checkpoint.bytes.tool_usage', + CheckpointCachedCredentialsBytes: 'checkpoint.cached_credentials_bytes', + CheckpointClaimed: 'checkpoint.claimed', + CheckpointClaimedNow: 'checkpoint.claimed_now', + CheckpointCompletedBytes: 'checkpoint.completed_bytes', + CheckpointCompletedSteps: 'checkpoint.completed_steps', + CheckpointCurrentMessages: 'checkpoint.current_messages', + CheckpointDecisionsBytes: 'checkpoint.decisions_bytes', + CheckpointFound: 'checkpoint.found', + CheckpointFrames: 'checkpoint.frames', + CheckpointId: 'checkpoint.id', + CheckpointImmediateResults: 'checkpoint.immediate_results', + CheckpointMessageId: 'checkpoint.message_id', + CheckpointPendingBytes: 'checkpoint.pending_bytes', + CheckpointPendingSteps: 'checkpoint.pending_steps', + CheckpointPendingToolCount: 'checkpoint.pending_tool_count', + CheckpointRows: 'checkpoint.rows', + CheckpointTaskId: 'checkpoint.task_id', + CheckpointTotalToolCalls: 'checkpoint.total_tool_calls', + CheckpointWorkflowSnapshotBytes: 'checkpoint.workflow_snapshot_bytes', + ClientVersion: 'client.version', + ConditionId: 'condition.id', + ConditionName: 'condition.name', + ConditionResult: 'condition.result', + ContextReduceBudgetChars: 'context.reduce.budget_chars', + ContextReduceCaller: 'context.reduce.caller', + ContextReduceDidReduce: 'context.reduce.did_reduce', + ContextReduceInputChars: 'context.reduce.input_chars', + ContextReduceInputMessages: 'context.reduce.input_messages', + ContextReduceOutcome: 'context.reduce.outcome', + ContextReduceOutputChars: 'context.reduce.output_chars', + ContextReduceOutputMessages: 'context.reduce.output_messages', + ContextReduced: 'context.reduced', + ContextSummarizeInputChars: 'context.summarize.input_chars', + ContextSummarizeOutputChars: 'context.summarize.output_chars', + CopilotAbortControllerFired: 'copilot.abort.controller_fired', + CopilotAbortGoMarkerOk: 'copilot.abort.go_marker_ok', + CopilotAbortLocalAborted: 'copilot.abort.local_aborted', + CopilotAbortMarkerWritten: 'copilot.abort.marker_written', + CopilotAbortOutcome: 'copilot.abort.outcome', + CopilotAbortUnknownReason: 'copilot.abort.unknown_reason', + CopilotAsyncToolClaimedBy: 'copilot.async_tool.claimed_by', + CopilotAsyncToolHasError: 'copilot.async_tool.has_error', + CopilotAsyncToolIdsCount: 'copilot.async_tool.ids_count', + CopilotAsyncToolStatus: 'copilot.async_tool.status', + CopilotAsyncToolWorkerId: 'copilot.async_tool.worker_id', + CopilotBranchKind: 'copilot.branch.kind', + CopilotChatIsNew: 'copilot.chat.is_new', + CopilotCheckpointPendingToolCallId: 'copilot.checkpoint.pending_tool_call_id', + CopilotCommandsCount: 'copilot.commands.count', + CopilotConfirmOutcome: 'copilot.confirm.outcome', + CopilotContextsCount: 'copilot.contexts.count', + CopilotExecutionId: 'copilot.execution.id', + CopilotFileAttachmentsCount: 'copilot.file_attachments.count', + CopilotFinalizeOutcome: 'copilot.finalize.outcome', + CopilotInterruptedPriorStream: 'copilot.interrupted_prior_stream', + CopilotLeg: 'copilot.leg', + CopilotMode: 'copilot.mode', + CopilotOperation: 'copilot.operation', + CopilotOutputFileBytes: 'copilot.output_file.bytes', + CopilotOutputFileFormat: 'copilot.output_file.format', + CopilotOutputFileId: 'copilot.output_file.id', + CopilotOutputFileName: 'copilot.output_file.name', + CopilotOutputFileOutcome: 'copilot.output_file.outcome', + CopilotPendingStreamWaitMs: 'copilot.pending_stream.wait_ms', + CopilotPrefetch: 'copilot.prefetch', + CopilotPublisherClientDisconnected: 'copilot.publisher.client_disconnected', + CopilotPublisherSawComplete: 'copilot.publisher.saw_complete', + CopilotRecoveryLatestSeq: 'copilot.recovery.latest_seq', + CopilotRecoveryOldestSeq: 'copilot.recovery.oldest_seq', + CopilotRecoveryOutcome: 'copilot.recovery.outcome', + CopilotRecoveryRequestedAfterSeq: 'copilot.recovery.requested_after_seq', + CopilotRequestCancelReason: 'copilot.request.cancel_reason', + CopilotRequestOutcome: 'copilot.request.outcome', + CopilotResourceAttachmentsCount: 'copilot.resource_attachments.count', + CopilotResourcesAborted: 'copilot.resources.aborted', + CopilotResourcesOp: 'copilot.resources.op', + CopilotResourcesRemovedCount: 'copilot.resources.removed_count', + CopilotResourcesUpsertedCount: 'copilot.resources.upserted_count', + CopilotResultContentBlocks: 'copilot.result.content_blocks', + CopilotResultContentLength: 'copilot.result.content_length', + CopilotResultToolCalls: 'copilot.result.tool_calls', + CopilotResumeAfterCursor: 'copilot.resume.after_cursor', + CopilotResumeDurationMs: 'copilot.resume.duration_ms', + CopilotResumeEventCount: 'copilot.resume.event_count', + CopilotResumeOutcome: 'copilot.resume.outcome', + CopilotResumePollIterations: 'copilot.resume.poll_iterations', + CopilotResumePreviewSessionCount: 'copilot.resume.preview_session_count', + CopilotRoute: 'copilot.route', + CopilotRunAgent: 'copilot.run.agent', + CopilotRunHasCompletedAt: 'copilot.run.has_completed_at', + CopilotRunHasError: 'copilot.run.has_error', + CopilotRunModel: 'copilot.run.model', + CopilotRunParentId: 'copilot.run.parent_id', + CopilotRunProvider: 'copilot.run.provider', + CopilotRunStatus: 'copilot.run.status', + CopilotStopAppendedAssistant: 'copilot.stop.appended_assistant', + CopilotStopBlocksCount: 'copilot.stop.blocks_count', + CopilotStopContentLength: 'copilot.stop.content_length', + CopilotStopOutcome: 'copilot.stop.outcome', + CopilotStream: 'copilot.stream', + CopilotSurface: 'copilot.surface', + CopilotTableId: 'copilot.table.id', + CopilotTableOutcome: 'copilot.table.outcome', + CopilotTableRowCount: 'copilot.table.row_count', + CopilotTableSourceContentBytes: 'copilot.table.source.content_bytes', + CopilotTableSourceFormat: 'copilot.table.source.format', + CopilotTableSourcePath: 'copilot.table.source.path', + CopilotTraceSpanCount: 'copilot.trace.span_count', + CopilotTransport: 'copilot.transport', + CopilotUserMessagePreview: 'copilot.user.message_preview', + CopilotValidateOutcome: 'copilot.validate.outcome', + CopilotVfsFileExtension: 'copilot.vfs.file.extension', + CopilotVfsFileMediaType: 'copilot.vfs.file.media_type', + CopilotVfsFileName: 'copilot.vfs.file.name', + CopilotVfsFileSizeBytes: 'copilot.vfs.file.size_bytes', + CopilotVfsHasAlpha: 'copilot.vfs.has_alpha', + CopilotVfsInputBytes: 'copilot.vfs.input.bytes', + CopilotVfsInputHeight: 'copilot.vfs.input.height', + CopilotVfsInputMediaTypeClaimed: 'copilot.vfs.input.media_type_claimed', + CopilotVfsInputMediaTypeDetected: 'copilot.vfs.input.media_type_detected', + CopilotVfsInputWidth: 'copilot.vfs.input.width', + CopilotVfsMetadataFailed: 'copilot.vfs.metadata.failed', + CopilotVfsOutcome: 'copilot.vfs.outcome', + CopilotVfsOutputBytes: 'copilot.vfs.output.bytes', + CopilotVfsOutputMediaType: 'copilot.vfs.output.media_type', + CopilotVfsReadImageResized: 'copilot.vfs.read.image.resized', + CopilotVfsReadOutcome: 'copilot.vfs.read.outcome', + CopilotVfsReadOutputBytes: 'copilot.vfs.read.output.bytes', + CopilotVfsReadOutputLines: 'copilot.vfs.read.output.lines', + CopilotVfsReadOutputMediaType: 'copilot.vfs.read.output.media_type', + CopilotVfsReadPath: 'copilot.vfs.read.path', + CopilotVfsResizeAttempts: 'copilot.vfs.resize.attempts', + CopilotVfsResizeChosenDimension: 'copilot.vfs.resize.chosen_dimension', + CopilotVfsResizeChosenQuality: 'copilot.vfs.resize.chosen_quality', + CopilotVfsResizeDimension: 'copilot.vfs.resize.dimension', + CopilotVfsResizeFitsBudget: 'copilot.vfs.resize.fits_budget', + CopilotVfsResizeOutputBytes: 'copilot.vfs.resize.output_bytes', + CopilotVfsResizeQuality: 'copilot.vfs.resize.quality', + CopilotVfsResized: 'copilot.vfs.resized', + CopilotVfsSharpLoadFailed: 'copilot.vfs.sharp.load_failed', + CostDefaultCost: 'cost.default_cost', + CredentialSetId: 'credential_set.id', + CredentialSetName: 'credential_set.name', + DbOperation: 'db.operation', + DbSqlTable: 'db.sql.table', + DbSystem: 'db.system', + DeploymentEnvironment: 'deployment.environment', + DeploymentVersion: 'deployment.version', + DocumentFileSize: 'document.file_size', + DocumentMimeType: 'document.mime_type', + DocumentsCount: 'documents.count', + DocumentsUploadType: 'documents.upload_type', + Error: 'error', + ErrorCode: 'error.code', + ErrorInternal: 'error.internal', + ErrorMessage: 'error.message', + ErrorType: 'error.type', + EventName: 'event.name', + EventTimestamp: 'event.timestamp', + ExecutionBlocksExecuted: 'execution.blocks_executed', + ExecutionDurationMs: 'execution.duration_ms', + ExecutionErrorMessage: 'execution.error_message', + ExecutionHasErrors: 'execution.has_errors', + ExecutionStatus: 'execution.status', + ExecutionTotalCost: 'execution.total_cost', + ExecutionTrigger: 'execution.trigger', + FunctionExecutionTimeMs: 'function.execution_time_ms', + FunctionId: 'function.id', + FunctionName: 'function.name', + GenAiAgentId: 'gen_ai.agent.id', + GenAiAgentName: 'gen_ai.agent.name', + GenAiCostInput: 'gen_ai.cost.input', + GenAiCostOutput: 'gen_ai.cost.output', + GenAiCostTotal: 'gen_ai.cost.total', + GenAiInputMessages: 'gen_ai.input.messages', + GenAiOperationName: 'gen_ai.operation.name', + GenAiOutputMessages: 'gen_ai.output.messages', + GenAiRequestAssistantMessages: 'gen_ai.request.assistant_messages', + GenAiRequestContentBlocks: 'gen_ai.request.content_blocks', + GenAiRequestHasCacheControl: 'gen_ai.request.has_cache_control', + GenAiRequestImageBlocks: 'gen_ai.request.image_blocks', + GenAiRequestImageDataBytes: 'gen_ai.request.image_data_bytes', + GenAiRequestMaxMessageBlocks: 'gen_ai.request.max_message_blocks', + GenAiRequestMessagesCount: 'gen_ai.request.messages.count', + GenAiRequestModel: 'gen_ai.request.model', + GenAiRequestSystemChars: 'gen_ai.request.system_chars', + GenAiRequestTextBlocks: 'gen_ai.request.text_blocks', + GenAiRequestToolResultBlocks: 'gen_ai.request.tool_result_blocks', + GenAiRequestToolUseBlocks: 'gen_ai.request.tool_use_blocks', + GenAiRequestToolsCount: 'gen_ai.request.tools.count', + GenAiRequestUserMessages: 'gen_ai.request.user_messages', + GenAiSystem: 'gen_ai.system', + GenAiToolName: 'gen_ai.tool.name', + GenAiUsageCacheCreationTokens: 'gen_ai.usage.cache_creation_tokens', + GenAiUsageCacheReadTokens: 'gen_ai.usage.cache_read_tokens', + GenAiUsageInputTokens: 'gen_ai.usage.input_tokens', + GenAiUsageOutputTokens: 'gen_ai.usage.output_tokens', + GenAiUsageTotalTokens: 'gen_ai.usage.total_tokens', + GenAiWorkflowId: 'gen_ai.workflow.id', + GenAiWorkflowName: 'gen_ai.workflow.name', + HostedKeyEnvVar: 'hosted_key.env_var', + HttpHost: 'http.host', + HttpMethod: 'http.method', + HttpPath: 'http.path', + HttpRemoteAddr: 'http.remote_addr', + HttpRequestContentLength: 'http.request.content_length', + HttpResponseBodyBytes: 'http.response.body_bytes', + HttpResponseContentLength: 'http.response.content_length', + HttpResponseHeadersMs: 'http.response.headers_ms', + HttpResponseTotalMs: 'http.response.total_ms', + HttpRoute: 'http.route', + HttpServerDurationMs: 'http.server.duration_ms', + HttpStatusCode: 'http.status_code', + HttpTarget: 'http.target', + HttpUrl: 'http.url', + HttpUserAgent: 'http.user_agent', + InvitationRole: 'invitation.role', + KnowledgeBaseId: 'knowledge_base.id', + KnowledgeBaseName: 'knowledge_base.name', + LlmErrorStage: 'llm.error_stage', + LlmRequestBodyBytes: 'llm.request.body_bytes', + LlmStreamBytes: 'llm.stream.bytes', + LlmStreamChunks: 'llm.stream.chunks', + LlmStreamFirstChunkBytes: 'llm.stream.first_chunk_bytes', + LlmStreamFirstChunkMs: 'llm.stream.first_chunk_ms', + LlmStreamOpenMs: 'llm.stream.open_ms', + LlmStreamTotalMs: 'llm.stream.total_ms', + LockAcquired: 'lock.acquired', + LockBackend: 'lock.backend', + LockTimedOut: 'lock.timed_out', + LockTimeoutMs: 'lock.timeout_ms', + LoopId: 'loop.id', + LoopIterations: 'loop.iterations', + LoopName: 'loop.name', + McpExecutionStatus: 'mcp.execution_status', + McpServerId: 'mcp.server_id', + McpServerName: 'mcp.server_name', + McpToolName: 'mcp.tool_name', + McpTransport: 'mcp.transport', + MemberRole: 'member.role', + MemoryContentBytes: 'memory.content_bytes', + MemoryFound: 'memory.found', + MemoryPath: 'memory.path', + MemoryRowCount: 'memory.row_count', + MessageId: 'message.id', + MessagingDestinationName: 'messaging.destination.name', + MessagingSystem: 'messaging.system', + ModelDurationMs: 'model.duration_ms', + ModelId: 'model.id', + ModelName: 'model.name', + MothershipOrigin: 'mothership.origin', + NetPeerName: 'net.peer.name', + OauthProvider: 'oauth.provider', + ParallelBranches: 'parallel.branches', + ParallelId: 'parallel.id', + ParallelName: 'parallel.name', + PrefsToolCount: 'prefs.tool_count', + ProcessingChunkSize: 'processing.chunk_size', + ProcessingRecipe: 'processing.recipe', + ProviderId: 'provider.id', + RateLimitAttempt: 'rate_limit.attempt', + RateLimitCount: 'rate_limit.count', + RateLimitDelayMs: 'rate_limit.delay_ms', + RateLimitLimit: 'rate_limit.limit', + RateLimitMaxRetries: 'rate_limit.max_retries', + RateLimitOutcome: 'rate_limit.outcome', + RateLimitRetryAfterMs: 'rate_limit.retry_after_ms', + RequestGoTraceId: 'request.go_trace_id', + RequestId: 'request.id', + RequiredVersion: 'required.version', + ResumeRequestBodyBytes: 'resume.request.body_bytes', + ResumeResultsCount: 'resume.results.count', + ResumeResultsDataBytes: 'resume.results.data_bytes', + ResumeResultsFailureCount: 'resume.results.failure_count', + ResumeResultsSuccessCount: 'resume.results.success_count', + RouterBackendName: 'router.backend_name', + RouterBedrockEnabled: 'router.bedrock_enabled', + RouterBedrockSupportedModel: 'router.bedrock_supported_model', + RouterId: 'router.id', + RouterName: 'router.name', + RouterSelectedBackend: 'router.selected_backend', + RouterSelectedPath: 'router.selected_path', + RunId: 'run.id', + SearchResultsCount: 'search.results_count', + ServiceInstanceId: 'service.instance.id', + ServiceName: 'service.name', + ServiceNamespace: 'service.namespace', + ServiceVersion: 'service.version', + SettleCompleted: 'settle.completed', + SettleTimeoutMs: 'settle.timeout_ms', + SettleWaitMs: 'settle.wait_ms', + SimOperation: 'sim.operation', + SimRequestId: 'sim.request_id', + SpanDurationMs: 'span.duration_ms', + SpanStatus: 'span.status', + SpanType: 'span.type', + StreamId: 'stream.id', + SubagentId: 'subagent.id', + SubagentOutcomeContentBytes: 'subagent.outcome.content_bytes', + SubagentOutcomeError: 'subagent.outcome.error', + SubagentOutcomeStructuredType: 'subagent.outcome.structured_type', + SubagentOutcomeSuccess: 'subagent.outcome.success', + SubagentOutcomeToolCallCount: 'subagent.outcome.tool_call_count', + TaskAge: 'task.age', + TaskDecisionCount: 'task.decision_count', + TaskErrorCount: 'task.error_count', + TaskFound: 'task.found', + TaskId: 'task.id', + TaskListLimit: 'task.list_limit', + TaskRows: 'task.rows', + TaskStatus: 'task.status', + TaskStepCount: 'task.step_count', + TelemetrySdkLanguage: 'telemetry.sdk.language', + TelemetrySdkName: 'telemetry.sdk.name', + TelemetrySdkVersion: 'telemetry.sdk.version', + TemplateId: 'template.id', + TemplateName: 'template.name', + ThrottleReason: 'throttle.reason', + ToolArgsBytes: 'tool.args.bytes', + ToolArgsCount: 'tool.args.count', + ToolArgsPreview: 'tool.args.preview', + ToolAsyncWaiterPollCount: 'tool.async_waiter.poll_count', + ToolAsyncWaiterPubsubDeliveries: 'tool.async_waiter.pubsub_deliveries', + ToolAsyncWaiterResolution: 'tool.async_waiter.resolution', + ToolCallId: 'tool.call_id', + ToolClientExecutable: 'tool.client_executable', + ToolCompletionReceived: 'tool.completion.received', + ToolConfirmationStatus: 'tool.confirmation.status', + ToolDurationMs: 'tool.duration_ms', + ToolErrorKind: 'tool.error_kind', + ToolExecutor: 'tool.executor', + ToolExternalService: 'tool.external.service', + ToolId: 'tool.id', + ToolName: 'tool.name', + ToolOutcome: 'tool.outcome', + ToolOutcomeMessage: 'tool.outcome.message', + ToolParentSpan: 'tool.parent_span', + ToolPayloadBytes: 'tool.payload.bytes', + ToolResultArtifact: 'tool.result.artifact', + ToolResultBytes: 'tool.result.bytes', + ToolResultSuccess: 'tool.result.success', + ToolScheduled: 'tool.scheduled', + ToolStatus: 'tool.status', + ToolStatusCode: 'tool.status_code', + ToolStoreStatus: 'tool.store_status', + ToolSync: 'tool.sync', + ToolTimeoutMs: 'tool.timeout_ms', + TraceAborted: 'trace.aborted', + TraceBilledTotalCost: 'trace.billed_total_cost', + TraceCacheReadTokens: 'trace.cache_read_tokens', + TraceCacheWriteTokens: 'trace.cache_write_tokens', + TraceDurationMs: 'trace.duration_ms', + TraceError: 'trace.error', + TraceGoId: 'trace.go_id', + TraceInputTokens: 'trace.input_tokens', + TraceModel: 'trace.model', + TraceOutcome: 'trace.outcome', + TraceOutputTokens: 'trace.output_tokens', + TraceProvider: 'trace.provider', + TraceRawTotalCost: 'trace.raw_total_cost', + TraceSpanCount: 'trace.span_count', + TraceToolCallCount: 'trace.tool_call_count', + UserAuthMethod: 'user.auth_method', + UserAuthProvider: 'user.auth_provider', + UserId: 'user.id', + WebhookId: 'webhook.id', + WebhookProvider: 'webhook.provider', + WebhookTriggerSuccess: 'webhook.trigger_success', + WorkflowBlockTypes: 'workflow.block_types', + WorkflowBlocksCount: 'workflow.blocks_count', + WorkflowCreatedId: 'workflow.created_id', + WorkflowDurationMs: 'workflow.duration_ms', + WorkflowEdgesCount: 'workflow.edges_count', + WorkflowHasFolder: 'workflow.has_folder', + WorkflowHasWorkspace: 'workflow.has_workspace', + WorkflowId: 'workflow.id', + WorkflowLoopsCount: 'workflow.loops_count', + WorkflowName: 'workflow.name', + WorkflowNewId: 'workflow.new_id', + WorkflowParallelsCount: 'workflow.parallels_count', + WorkflowSourceId: 'workflow.source_id', + WorkflowTrigger: 'workflow.trigger', + WorkspaceId: 'workspace.id', + WorkspaceName: 'workspace.name', +} as const + +export type TraceAttrKey = keyof typeof TraceAttr +export type TraceAttrValue = (typeof TraceAttr)[TraceAttrKey] + +/** Readonly sorted list of every canonical custom attribute key. */ +export const TraceAttrValues: readonly TraceAttrValue[] = [ + 'abort.backend', + 'abort.found', + 'abort.redis_result', + 'analytics.aborted', + 'analytics.billed_total_cost', + 'analytics.cache_read_tokens', + 'analytics.cache_write_tokens', + 'analytics.customer_type', + 'analytics.duration_ms', + 'analytics.error', + 'analytics.input_tokens', + 'analytics.model', + 'analytics.output_tokens', + 'analytics.provider', + 'analytics.source', + 'analytics.tool_call_count', + 'api_key.id', + 'api_key.name', + 'auth.incoming_internal', + 'auth.key.match', + 'auth.key.preview', + 'auth.key.source', + 'auth.key.type', + 'auth.provider', + 'auth.validate.status_code', + 'aws.region', + 'bedrock.error_code', + 'bedrock.model_id', + 'bedrock.request.body_bytes_retry', + 'billing.attempts', + 'billing.change_type', + 'billing.cost.input_usd', + 'billing.cost.output_usd', + 'billing.cost.total_usd', + 'billing.cost_usd', + 'billing.customer_type', + 'billing.duplicate', + 'billing.duration_ms', + 'billing.has_idempotency_key', + 'billing.idempotency_key', + 'billing.interval', + 'billing.is_mcp', + 'billing.llm_cost', + 'billing.new_plan', + 'billing.outcome', + 'billing.plan', + 'billing.previous_plan', + 'billing.service_charges', + 'billing.source', + 'billing.total_cost', + 'billing.usage.current', + 'billing.usage.exceeded', + 'billing.usage.limit', + 'block.id', + 'block.name', + 'block.type', + 'chat.active_messages_bytes', + 'chat.active_messages_count', + 'chat.append_bytes', + 'chat.append_count', + 'chat.artifact_keys', + 'chat.artifacts_bytes', + 'chat.auth_type', + 'chat.context_count', + 'chat.context_usage', + 'chat.continuation.messages_before', + 'chat.continuation.tool_result_bytes', + 'chat.continuation.tool_result_failure', + 'chat.continuation.tool_result_success', + 'chat.continuation.tool_results', + 'chat.continuation.total_tool_calls', + 'chat.existing_message_count', + 'chat.file_attachment_count', + 'chat.finalize.outcome', + 'chat.found', + 'chat.has_assistant_message', + 'chat.has_output_configs', + 'chat.id', + 'chat.message_bytes', + 'chat.messages_after', + 'chat.messages_bytes', + 'chat.messages_count', + 'chat.persist.outcome', + 'chat.preexisting', + 'chat.rollback_index', + 'chat.tokens_used', + 'chat.type', + 'chat.user_message_id', + 'checkpoint.age', + 'checkpoint.attempts_bytes', + 'checkpoint.bytes.assistant_tool_use', + 'checkpoint.bytes.current_messages', + 'checkpoint.bytes.immediate_results', + 'checkpoint.bytes.pending_tool_calls', + 'checkpoint.bytes.provider_request', + 'checkpoint.bytes.request_context', + 'checkpoint.bytes.tool_usage', + 'checkpoint.cached_credentials_bytes', + 'checkpoint.claimed', + 'checkpoint.claimed_now', + 'checkpoint.completed_bytes', + 'checkpoint.completed_steps', + 'checkpoint.current_messages', + 'checkpoint.decisions_bytes', + 'checkpoint.found', + 'checkpoint.frames', + 'checkpoint.id', + 'checkpoint.immediate_results', + 'checkpoint.message_id', + 'checkpoint.pending_bytes', + 'checkpoint.pending_steps', + 'checkpoint.pending_tool_count', + 'checkpoint.rows', + 'checkpoint.task_id', + 'checkpoint.total_tool_calls', + 'checkpoint.workflow_snapshot_bytes', + 'client.version', + 'condition.id', + 'condition.name', + 'condition.result', + 'context.reduce.budget_chars', + 'context.reduce.caller', + 'context.reduce.did_reduce', + 'context.reduce.input_chars', + 'context.reduce.input_messages', + 'context.reduce.outcome', + 'context.reduce.output_chars', + 'context.reduce.output_messages', + 'context.reduced', + 'context.summarize.input_chars', + 'context.summarize.output_chars', + 'copilot.abort.controller_fired', + 'copilot.abort.go_marker_ok', + 'copilot.abort.local_aborted', + 'copilot.abort.marker_written', + 'copilot.abort.outcome', + 'copilot.abort.unknown_reason', + 'copilot.async_tool.claimed_by', + 'copilot.async_tool.has_error', + 'copilot.async_tool.ids_count', + 'copilot.async_tool.status', + 'copilot.async_tool.worker_id', + 'copilot.branch.kind', + 'copilot.chat.is_new', + 'copilot.checkpoint.pending_tool_call_id', + 'copilot.commands.count', + 'copilot.confirm.outcome', + 'copilot.contexts.count', + 'copilot.execution.id', + 'copilot.file_attachments.count', + 'copilot.finalize.outcome', + 'copilot.interrupted_prior_stream', + 'copilot.leg', + 'copilot.mode', + 'copilot.operation', + 'copilot.output_file.bytes', + 'copilot.output_file.format', + 'copilot.output_file.id', + 'copilot.output_file.name', + 'copilot.output_file.outcome', + 'copilot.pending_stream.wait_ms', + 'copilot.prefetch', + 'copilot.publisher.client_disconnected', + 'copilot.publisher.saw_complete', + 'copilot.recovery.latest_seq', + 'copilot.recovery.oldest_seq', + 'copilot.recovery.outcome', + 'copilot.recovery.requested_after_seq', + 'copilot.request.cancel_reason', + 'copilot.request.outcome', + 'copilot.resource_attachments.count', + 'copilot.resources.aborted', + 'copilot.resources.op', + 'copilot.resources.removed_count', + 'copilot.resources.upserted_count', + 'copilot.result.content_blocks', + 'copilot.result.content_length', + 'copilot.result.tool_calls', + 'copilot.resume.after_cursor', + 'copilot.resume.duration_ms', + 'copilot.resume.event_count', + 'copilot.resume.outcome', + 'copilot.resume.poll_iterations', + 'copilot.resume.preview_session_count', + 'copilot.route', + 'copilot.run.agent', + 'copilot.run.has_completed_at', + 'copilot.run.has_error', + 'copilot.run.model', + 'copilot.run.parent_id', + 'copilot.run.provider', + 'copilot.run.status', + 'copilot.stop.appended_assistant', + 'copilot.stop.blocks_count', + 'copilot.stop.content_length', + 'copilot.stop.outcome', + 'copilot.stream', + 'copilot.surface', + 'copilot.table.id', + 'copilot.table.outcome', + 'copilot.table.row_count', + 'copilot.table.source.content_bytes', + 'copilot.table.source.format', + 'copilot.table.source.path', + 'copilot.trace.span_count', + 'copilot.transport', + 'copilot.user.message_preview', + 'copilot.validate.outcome', + 'copilot.vfs.file.extension', + 'copilot.vfs.file.media_type', + 'copilot.vfs.file.name', + 'copilot.vfs.file.size_bytes', + 'copilot.vfs.has_alpha', + 'copilot.vfs.input.bytes', + 'copilot.vfs.input.height', + 'copilot.vfs.input.media_type_claimed', + 'copilot.vfs.input.media_type_detected', + 'copilot.vfs.input.width', + 'copilot.vfs.metadata.failed', + 'copilot.vfs.outcome', + 'copilot.vfs.output.bytes', + 'copilot.vfs.output.media_type', + 'copilot.vfs.read.image.resized', + 'copilot.vfs.read.outcome', + 'copilot.vfs.read.output.bytes', + 'copilot.vfs.read.output.lines', + 'copilot.vfs.read.output.media_type', + 'copilot.vfs.read.path', + 'copilot.vfs.resize.attempts', + 'copilot.vfs.resize.chosen_dimension', + 'copilot.vfs.resize.chosen_quality', + 'copilot.vfs.resize.dimension', + 'copilot.vfs.resize.fits_budget', + 'copilot.vfs.resize.output_bytes', + 'copilot.vfs.resize.quality', + 'copilot.vfs.resized', + 'copilot.vfs.sharp.load_failed', + 'cost.default_cost', + 'credential_set.id', + 'credential_set.name', + 'db.operation', + 'db.sql.table', + 'db.system', + 'deployment.environment', + 'deployment.version', + 'document.file_size', + 'document.mime_type', + 'documents.count', + 'documents.upload_type', + 'error', + 'error.code', + 'error.internal', + 'error.message', + 'error.type', + 'event.name', + 'event.timestamp', + 'execution.blocks_executed', + 'execution.duration_ms', + 'execution.error_message', + 'execution.has_errors', + 'execution.status', + 'execution.total_cost', + 'execution.trigger', + 'function.execution_time_ms', + 'function.id', + 'function.name', + 'gen_ai.agent.id', + 'gen_ai.agent.name', + 'gen_ai.cost.input', + 'gen_ai.cost.output', + 'gen_ai.cost.total', + 'gen_ai.input.messages', + 'gen_ai.operation.name', + 'gen_ai.output.messages', + 'gen_ai.request.assistant_messages', + 'gen_ai.request.content_blocks', + 'gen_ai.request.has_cache_control', + 'gen_ai.request.image_blocks', + 'gen_ai.request.image_data_bytes', + 'gen_ai.request.max_message_blocks', + 'gen_ai.request.messages.count', + 'gen_ai.request.model', + 'gen_ai.request.system_chars', + 'gen_ai.request.text_blocks', + 'gen_ai.request.tool_result_blocks', + 'gen_ai.request.tool_use_blocks', + 'gen_ai.request.tools.count', + 'gen_ai.request.user_messages', + 'gen_ai.system', + 'gen_ai.tool.name', + 'gen_ai.usage.cache_creation_tokens', + 'gen_ai.usage.cache_read_tokens', + 'gen_ai.usage.input_tokens', + 'gen_ai.usage.output_tokens', + 'gen_ai.usage.total_tokens', + 'gen_ai.workflow.id', + 'gen_ai.workflow.name', + 'hosted_key.env_var', + 'http.host', + 'http.method', + 'http.path', + 'http.remote_addr', + 'http.request.content_length', + 'http.response.body_bytes', + 'http.response.content_length', + 'http.response.headers_ms', + 'http.response.total_ms', + 'http.route', + 'http.server.duration_ms', + 'http.status_code', + 'http.target', + 'http.url', + 'http.user_agent', + 'invitation.role', + 'knowledge_base.id', + 'knowledge_base.name', + 'llm.error_stage', + 'llm.request.body_bytes', + 'llm.stream.bytes', + 'llm.stream.chunks', + 'llm.stream.first_chunk_bytes', + 'llm.stream.first_chunk_ms', + 'llm.stream.open_ms', + 'llm.stream.total_ms', + 'lock.acquired', + 'lock.backend', + 'lock.timed_out', + 'lock.timeout_ms', + 'loop.id', + 'loop.iterations', + 'loop.name', + 'mcp.execution_status', + 'mcp.server_id', + 'mcp.server_name', + 'mcp.tool_name', + 'mcp.transport', + 'member.role', + 'memory.content_bytes', + 'memory.found', + 'memory.path', + 'memory.row_count', + 'message.id', + 'messaging.destination.name', + 'messaging.system', + 'model.duration_ms', + 'model.id', + 'model.name', + 'mothership.origin', + 'net.peer.name', + 'oauth.provider', + 'parallel.branches', + 'parallel.id', + 'parallel.name', + 'prefs.tool_count', + 'processing.chunk_size', + 'processing.recipe', + 'provider.id', + 'rate_limit.attempt', + 'rate_limit.count', + 'rate_limit.delay_ms', + 'rate_limit.limit', + 'rate_limit.max_retries', + 'rate_limit.outcome', + 'rate_limit.retry_after_ms', + 'request.go_trace_id', + 'request.id', + 'required.version', + 'resume.request.body_bytes', + 'resume.results.count', + 'resume.results.data_bytes', + 'resume.results.failure_count', + 'resume.results.success_count', + 'router.backend_name', + 'router.bedrock_enabled', + 'router.bedrock_supported_model', + 'router.id', + 'router.name', + 'router.selected_backend', + 'router.selected_path', + 'run.id', + 'search.results_count', + 'service.instance.id', + 'service.name', + 'service.namespace', + 'service.version', + 'settle.completed', + 'settle.timeout_ms', + 'settle.wait_ms', + 'sim.operation', + 'sim.request_id', + 'span.duration_ms', + 'span.status', + 'span.type', + 'stream.id', + 'subagent.id', + 'subagent.outcome.content_bytes', + 'subagent.outcome.error', + 'subagent.outcome.structured_type', + 'subagent.outcome.success', + 'subagent.outcome.tool_call_count', + 'task.age', + 'task.decision_count', + 'task.error_count', + 'task.found', + 'task.id', + 'task.list_limit', + 'task.rows', + 'task.status', + 'task.step_count', + 'telemetry.sdk.language', + 'telemetry.sdk.name', + 'telemetry.sdk.version', + 'template.id', + 'template.name', + 'throttle.reason', + 'tool.args.bytes', + 'tool.args.count', + 'tool.args.preview', + 'tool.async_waiter.poll_count', + 'tool.async_waiter.pubsub_deliveries', + 'tool.async_waiter.resolution', + 'tool.call_id', + 'tool.client_executable', + 'tool.completion.received', + 'tool.confirmation.status', + 'tool.duration_ms', + 'tool.error_kind', + 'tool.executor', + 'tool.external.service', + 'tool.id', + 'tool.name', + 'tool.outcome', + 'tool.outcome.message', + 'tool.parent_span', + 'tool.payload.bytes', + 'tool.result.artifact', + 'tool.result.bytes', + 'tool.result.success', + 'tool.scheduled', + 'tool.status', + 'tool.status_code', + 'tool.store_status', + 'tool.sync', + 'tool.timeout_ms', + 'trace.aborted', + 'trace.billed_total_cost', + 'trace.cache_read_tokens', + 'trace.cache_write_tokens', + 'trace.duration_ms', + 'trace.error', + 'trace.go_id', + 'trace.input_tokens', + 'trace.model', + 'trace.outcome', + 'trace.output_tokens', + 'trace.provider', + 'trace.raw_total_cost', + 'trace.span_count', + 'trace.tool_call_count', + 'user.auth_method', + 'user.auth_provider', + 'user.id', + 'webhook.id', + 'webhook.provider', + 'webhook.trigger_success', + 'workflow.block_types', + 'workflow.blocks_count', + 'workflow.created_id', + 'workflow.duration_ms', + 'workflow.edges_count', + 'workflow.has_folder', + 'workflow.has_workspace', + 'workflow.id', + 'workflow.loops_count', + 'workflow.name', + 'workflow.new_id', + 'workflow.parallels_count', + 'workflow.source_id', + 'workflow.trigger', + 'workspace.id', + 'workspace.name', +] as const diff --git a/apps/sim/lib/copilot/generated/trace-events-v1.ts b/apps/sim/lib/copilot/generated/trace-events-v1.ts new file mode 100644 index 00000000000..056ccbe2946 --- /dev/null +++ b/apps/sim/lib/copilot/generated/trace-events-v1.ts @@ -0,0 +1,44 @@ +// AUTO-GENERATED FILE. DO NOT EDIT. +// +// Source: copilot/copilot/contracts/trace-events-v1.schema.json +// Regenerate with: bun run trace-events-contract:generate +// +// Canonical mothership OTel span event names. Call sites should +// reference `TraceEvent.` (e.g. +// `TraceEvent.RequestCancelled`) rather than raw string literals, +// so the Go-side contract is the single source of truth and typos +// become compile errors. + +export const TraceEvent = { + BedrockInvokeRetryWithoutImages: 'bedrock.invoke.retry_without_images', + CopilotOutputFileError: 'copilot.output_file.error', + CopilotTableError: 'copilot.table.error', + CopilotVfsParseFailed: 'copilot.vfs.parse_failed', + CopilotVfsResizeAttempt: 'copilot.vfs.resize_attempt', + CopilotVfsResizeAttemptFailed: 'copilot.vfs.resize_attempt_failed', + LlmInvokeSent: 'llm.invoke.sent', + LlmStreamFirstChunk: 'llm.stream.first_chunk', + LlmStreamOpened: 'llm.stream.opened', + PgNotifyFailed: 'pg_notify_failed', + RedisSubscribed: 'redis.subscribed', + RequestCancelled: 'request.cancelled', +} as const + +export type TraceEventKey = keyof typeof TraceEvent +export type TraceEventValue = (typeof TraceEvent)[TraceEventKey] + +/** Readonly sorted list of every canonical event name. */ +export const TraceEventValues: readonly TraceEventValue[] = [ + 'bedrock.invoke.retry_without_images', + 'copilot.output_file.error', + 'copilot.table.error', + 'copilot.vfs.parse_failed', + 'copilot.vfs.resize_attempt', + 'copilot.vfs.resize_attempt_failed', + 'llm.invoke.sent', + 'llm.stream.first_chunk', + 'llm.stream.opened', + 'pg_notify_failed', + 'redis.subscribed', + 'request.cancelled', +] as const diff --git a/apps/sim/lib/copilot/generated/trace-spans-v1.ts b/apps/sim/lib/copilot/generated/trace-spans-v1.ts new file mode 100644 index 00000000000..1c97f0dd009 --- /dev/null +++ b/apps/sim/lib/copilot/generated/trace-spans-v1.ts @@ -0,0 +1,151 @@ +// AUTO-GENERATED FILE. DO NOT EDIT. +// +// Source: copilot/copilot/contracts/trace-spans-v1.schema.json +// Regenerate with: bun run trace-spans-contract:generate +// +// Canonical mothership OTel span names. Call sites should reference +// `TraceSpan.` (e.g. `TraceSpan.CopilotVfsReadFile`) +// rather than raw string literals, so the Go-side contract is the +// single source of truth and typos become compile errors. + +export const TraceSpan = { + AnthropicCountTokens: 'anthropic.count_tokens', + AsyncToolStoreSet: 'async_tool_store.set', + AuthRateLimitRecord: 'auth.rate_limit.record', + AuthValidateKey: 'auth.validate_key', + ChatContinueWithToolResults: 'chat.continue_with_tool_results', + ChatExplicitAbortConsume: 'chat.explicit_abort.consume', + ChatExplicitAbortFlushPausedBilling: 'chat.explicit_abort.flush_paused_billing', + ChatExplicitAbortHandle: 'chat.explicit_abort.handle', + ChatExplicitAbortMark: 'chat.explicit_abort.mark', + ChatExplicitAbortPeek: 'chat.explicit_abort.peek', + ChatGateAcquire: 'chat.gate.acquire', + ChatPersistAfterDone: 'chat.persist_after_done', + ChatSetup: 'chat.setup', + ContextReduce: 'context.reduce', + ContextSummarizeChunk: 'context.summarize_chunk', + CopilotAnalyticsFlush: 'copilot.analytics.flush', + CopilotAnalyticsSaveRequest: 'copilot.analytics.save_request', + CopilotAnalyticsUpdateBilling: 'copilot.analytics.update_billing', + CopilotAsyncRunsClaimCompleted: 'copilot.async_runs.claim_completed', + CopilotAsyncRunsCreateRunCheckpoint: 'copilot.async_runs.create_run_checkpoint', + CopilotAsyncRunsCreateRunSegment: 'copilot.async_runs.create_run_segment', + CopilotAsyncRunsGetAsyncToolCall: 'copilot.async_runs.get_async_tool_call', + CopilotAsyncRunsGetLatestForExecution: 'copilot.async_runs.get_latest_for_execution', + CopilotAsyncRunsGetLatestForStream: 'copilot.async_runs.get_latest_for_stream', + CopilotAsyncRunsGetMany: 'copilot.async_runs.get_many', + CopilotAsyncRunsGetRunSegment: 'copilot.async_runs.get_run_segment', + CopilotAsyncRunsListForRun: 'copilot.async_runs.list_for_run', + CopilotAsyncRunsMarkAsyncToolStatus: 'copilot.async_runs.mark_async_tool_status', + CopilotAsyncRunsReleaseClaim: 'copilot.async_runs.release_claim', + CopilotAsyncRunsUpdateRunStatus: 'copilot.async_runs.update_run_status', + CopilotAsyncRunsUpsertAsyncToolCall: 'copilot.async_runs.upsert_async_tool_call', + CopilotAuthValidateApiKey: 'copilot.auth.validate_api_key', + CopilotBillingUpdateCost: 'copilot.billing.update_cost', + CopilotChatAbortActiveStream: 'copilot.chat.abort_active_stream', + CopilotChatAbortStream: 'copilot.chat.abort_stream', + CopilotChatAbortWaitSettle: 'copilot.chat.abort_wait_settle', + CopilotChatAcquirePendingStreamLock: 'copilot.chat.acquire_pending_stream_lock', + CopilotChatBuildExecutionContext: 'copilot.chat.build_execution_context', + CopilotChatBuildPayload: 'copilot.chat.build_payload', + CopilotChatBuildWorkspaceContext: 'copilot.chat.build_workspace_context', + CopilotChatFinalizeAssistantTurn: 'copilot.chat.finalize_assistant_turn', + CopilotChatPersistUserMessage: 'copilot.chat.persist_user_message', + CopilotChatResolveAgentContexts: 'copilot.chat.resolve_agent_contexts', + CopilotChatResolveBranch: 'copilot.chat.resolve_branch', + CopilotChatResolveOrCreateChat: 'copilot.chat.resolve_or_create_chat', + CopilotChatStopStream: 'copilot.chat.stop_stream', + CopilotConfirmToolResult: 'copilot.confirm.tool_result', + CopilotFinalizeStream: 'copilot.finalize_stream', + CopilotRecoveryCheckReplayGap: 'copilot.recovery.check_replay_gap', + CopilotResumeRequest: 'copilot.resume.request', + CopilotSubagentExecute: 'copilot.subagent.execute', + CopilotToolWaitForClientResult: 'copilot.tool.wait_for_client_result', + CopilotToolsHandleResourceSideEffects: 'copilot.tools.handle_resource_side_effects', + CopilotToolsWriteCsvToTable: 'copilot.tools.write_csv_to_table', + CopilotToolsWriteOutputFile: 'copilot.tools.write_output_file', + CopilotToolsWriteOutputTable: 'copilot.tools.write_output_table', + CopilotVfsPrepareImage: 'copilot.vfs.prepare_image', + CopilotVfsReadFile: 'copilot.vfs.read_file', + GenAiAgentExecute: 'gen_ai.agent.execute', + LlmStream: 'llm.stream', + ProviderRouterCountTokens: 'provider.router.count_tokens', + ProviderRouterRoute: 'provider.router.route', + SimUpdateCost: 'sim.update_cost', + SimValidateApiKey: 'sim.validate_api_key', + ToolAsyncWaiterWait: 'tool.async_waiter.wait', + ToolExecute: 'tool.execute', +} as const + +export type TraceSpanKey = keyof typeof TraceSpan +export type TraceSpanValue = (typeof TraceSpan)[TraceSpanKey] + +/** Readonly sorted list of every canonical span name. */ +export const TraceSpanValues: readonly TraceSpanValue[] = [ + 'anthropic.count_tokens', + 'async_tool_store.set', + 'auth.rate_limit.record', + 'auth.validate_key', + 'chat.continue_with_tool_results', + 'chat.explicit_abort.consume', + 'chat.explicit_abort.flush_paused_billing', + 'chat.explicit_abort.handle', + 'chat.explicit_abort.mark', + 'chat.explicit_abort.peek', + 'chat.gate.acquire', + 'chat.persist_after_done', + 'chat.setup', + 'context.reduce', + 'context.summarize_chunk', + 'copilot.analytics.flush', + 'copilot.analytics.save_request', + 'copilot.analytics.update_billing', + 'copilot.async_runs.claim_completed', + 'copilot.async_runs.create_run_checkpoint', + 'copilot.async_runs.create_run_segment', + 'copilot.async_runs.get_async_tool_call', + 'copilot.async_runs.get_latest_for_execution', + 'copilot.async_runs.get_latest_for_stream', + 'copilot.async_runs.get_many', + 'copilot.async_runs.get_run_segment', + 'copilot.async_runs.list_for_run', + 'copilot.async_runs.mark_async_tool_status', + 'copilot.async_runs.release_claim', + 'copilot.async_runs.update_run_status', + 'copilot.async_runs.upsert_async_tool_call', + 'copilot.auth.validate_api_key', + 'copilot.billing.update_cost', + 'copilot.chat.abort_active_stream', + 'copilot.chat.abort_stream', + 'copilot.chat.abort_wait_settle', + 'copilot.chat.acquire_pending_stream_lock', + 'copilot.chat.build_execution_context', + 'copilot.chat.build_payload', + 'copilot.chat.build_workspace_context', + 'copilot.chat.finalize_assistant_turn', + 'copilot.chat.persist_user_message', + 'copilot.chat.resolve_agent_contexts', + 'copilot.chat.resolve_branch', + 'copilot.chat.resolve_or_create_chat', + 'copilot.chat.stop_stream', + 'copilot.confirm.tool_result', + 'copilot.finalize_stream', + 'copilot.recovery.check_replay_gap', + 'copilot.resume.request', + 'copilot.subagent.execute', + 'copilot.tool.wait_for_client_result', + 'copilot.tools.handle_resource_side_effects', + 'copilot.tools.write_csv_to_table', + 'copilot.tools.write_output_file', + 'copilot.tools.write_output_table', + 'copilot.vfs.prepare_image', + 'copilot.vfs.read_file', + 'gen_ai.agent.execute', + 'llm.stream', + 'provider.router.count_tokens', + 'provider.router.route', + 'sim.update_cost', + 'sim.validate_api_key', + 'tool.async_waiter.wait', + 'tool.execute', +] as const diff --git a/apps/sim/lib/copilot/request/go/fetch.test.ts b/apps/sim/lib/copilot/request/go/fetch.test.ts new file mode 100644 index 00000000000..9607a995d8e --- /dev/null +++ b/apps/sim/lib/copilot/request/go/fetch.test.ts @@ -0,0 +1,79 @@ +import { trace } from '@opentelemetry/api' +import { + BasicTracerProvider, + InMemorySpanExporter, + SimpleSpanProcessor, +} from '@opentelemetry/sdk-trace-base' +import { beforeEach, describe, expect, it, vi } from 'vitest' +import { fetchGo } from '@/lib/copilot/request/go/fetch' + +describe('fetchGo', () => { + const exporter = new InMemorySpanExporter() + const provider = new BasicTracerProvider({ + spanProcessors: [new SimpleSpanProcessor(exporter)], + }) + + beforeEach(() => { + exporter.reset() + trace.setGlobalTracerProvider(provider) + vi.restoreAllMocks() + }) + + it('emits a client span with http.* attrs and injects traceparent', async () => { + const fetchMock = vi.fn().mockImplementation(async (_url: string, init: RequestInit) => { + const headers = init.headers as Record + expect(headers.traceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/) + return new Response('ok', { + status: 200, + headers: { 'content-length': '2' }, + }) + }) + vi.stubGlobal('fetch', fetchMock) + + const res = await fetchGo('https://backend.example.com/api/copilot', { + method: 'POST', + body: 'payload', + operation: 'stream', + attributes: { 'copilot.leg': 'sim_to_go' }, + }) + expect(res.status).toBe(200) + + const spans = exporter.getFinishedSpans() + expect(spans).toHaveLength(1) + const attrs = spans[0].attributes + expect(spans[0].name).toBe('sim → go /api/copilot') + expect(attrs['http.method']).toBe('POST') + expect(attrs['http.url']).toBe('https://backend.example.com/api/copilot') + expect(attrs['http.target']).toBe('/api/copilot') + expect(attrs['http.status_code']).toBe(200) + expect(attrs['copilot.operation']).toBe('stream') + expect(attrs['copilot.leg']).toBe('sim_to_go') + expect(typeof attrs['http.response.headers_ms']).toBe('number') + }) + + it('marks span as error on non-2xx response', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValue(new Response('nope', { status: 500 }))) + + const res = await fetchGo('https://backend.example.com/api/tools/resume', { + method: 'POST', + }) + expect(res.status).toBe(500) + + const spans = exporter.getFinishedSpans() + expect(spans).toHaveLength(1) + expect(spans[0].status.code).toBe(2) + }) + + it('records exceptions when fetch throws', async () => { + vi.stubGlobal('fetch', vi.fn().mockRejectedValue(new Error('network boom'))) + + await expect( + fetchGo('https://backend.example.com/api/traces', { method: 'POST' }) + ).rejects.toThrow('network boom') + + const spans = exporter.getFinishedSpans() + expect(spans).toHaveLength(1) + expect(spans[0].status.code).toBe(2) + expect(spans[0].events.some((e) => e.name === 'exception')).toBe(true) + }) +}) diff --git a/apps/sim/lib/copilot/request/go/fetch.ts b/apps/sim/lib/copilot/request/go/fetch.ts new file mode 100644 index 00000000000..3eb5deda044 --- /dev/null +++ b/apps/sim/lib/copilot/request/go/fetch.ts @@ -0,0 +1,107 @@ +import { type Context, context, SpanStatusCode, trace } from '@opentelemetry/api' +import { CopilotLeg } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { traceHeaders } from '@/lib/copilot/request/go/propagation' +import { markSpanForError } from '@/lib/copilot/request/otel' + +// Lazy tracer resolution: module-level `trace.getTracer()` can be evaluated +// before `instrumentation-node.ts` installs the TracerProvider under +// Next.js 16 + Turbopack dev, freezing a NoOp tracer and silently dropping +// every outbound Sim → Go span. Resolving per-call avoids the race. +const getTracer = () => trace.getTracer('sim-copilot-http', '1.0.0') + +interface OutboundFetchOptions extends RequestInit { + otelContext?: Context + spanName?: string + operation?: string + attributes?: Record +} + +/** + * Perform an outbound Sim → Go fetch wrapped in an OTel child span so each + * call shows up as a distinct segment in Jaeger, and propagates the W3C + * traceparent so the Go-side span joins the same trace. + * + * The span captures generic attributes (method, status, duration, response + * size, error code) so any future latency investigation — not just images or + * Bedrock — has uniform metadata to work with. + */ +export async function fetchGo(url: string, options: OutboundFetchOptions = {}): Promise { + const { + otelContext, + spanName, + operation, + attributes, + headers: providedHeaders, + ...init + } = options + + const parsed = safeParseUrl(url) + const pathname = parsed?.pathname ?? url + const method = (init.method ?? 'GET').toUpperCase() + const parentContext = otelContext ?? context.active() + + const span = getTracer().startSpan( + spanName ?? `sim → go ${pathname}`, + { + attributes: { + [TraceAttr.HttpMethod]: method, + [TraceAttr.HttpUrl]: url, + [TraceAttr.HttpTarget]: pathname, + [TraceAttr.NetPeerName]: parsed?.host ?? '', + [TraceAttr.CopilotLeg]: CopilotLeg.SimToGo, + ...(operation ? { [TraceAttr.CopilotOperation]: operation } : {}), + ...(attributes ?? {}), + }, + }, + parentContext + ) + + const activeContext = trace.setSpan(parentContext, span) + const propagatedHeaders = traceHeaders({}, activeContext) + const mergedHeaders = { + ...(providedHeaders as Record | undefined), + ...propagatedHeaders, + } + + const start = performance.now() + try { + const response = await context.with(activeContext, () => + fetch(url, { + ...init, + method, + headers: mergedHeaders, + }) + ) + const elapsedMs = performance.now() - start + const contentLength = Number(response.headers.get('content-length') ?? 0) + span.setAttribute(TraceAttr.HttpStatusCode, response.status) + span.setAttribute(TraceAttr.HttpResponseHeadersMs, Math.round(elapsedMs)) + if (contentLength > 0) { + span.setAttribute(TraceAttr.HttpResponseContentLength, contentLength) + } + if (response.status >= 400) { + span.setStatus({ + code: SpanStatusCode.ERROR, + message: `HTTP ${response.status}`, + }) + } else { + span.setStatus({ code: SpanStatusCode.OK }) + } + return response + } catch (error) { + span.setAttribute(TraceAttr.HttpResponseHeadersMs, Math.round(performance.now() - start)) + markSpanForError(span, error) + throw error + } finally { + span.end() + } +} + +function safeParseUrl(url: string): URL | null { + try { + return new URL(url) + } catch { + return null + } +} diff --git a/apps/sim/lib/copilot/request/go/propagation.ts b/apps/sim/lib/copilot/request/go/propagation.ts new file mode 100644 index 00000000000..51ec28566cc --- /dev/null +++ b/apps/sim/lib/copilot/request/go/propagation.ts @@ -0,0 +1,57 @@ +import { type Context, context } from '@opentelemetry/api' +import { W3CTraceContextPropagator } from '@opentelemetry/core' + +const propagator = new W3CTraceContextPropagator() +const headerSetter = { + set(carrier: Record, key: string, value: string) { + carrier[key] = value + }, +} + +const headerGetter = { + keys(carrier: Headers): string[] { + const out: string[] = [] + carrier.forEach((_, key) => { + out.push(key) + }) + return out + }, + get(carrier: Headers, key: string): string | undefined { + return carrier.get(key) ?? undefined + }, +} + +/** + * Injects W3C trace context (traceparent, tracestate) into outbound HTTP + * headers so Go-side spans join the same OTel trace tree as the calling + * Sim span. + * + * Usage: spread the result into your fetch headers: + * fetch(url, { headers: { ...myHeaders, ...traceHeaders() } }) + */ +export function traceHeaders( + carrier?: Record, + otelContext?: Context +): Record { + const headers: Record = carrier ?? {} + propagator.inject(otelContext ?? context.active(), headers, headerSetter) + return headers +} + +/** + * Extracts W3C trace context from incoming request headers (traceparent / + * tracestate) and returns an OTel Context seeded with the upstream span. + * + * Use this at the top of inbound Sim route handlers that Go calls into + * (e.g. /api/billing/update-cost, /api/copilot/api-keys/validate) so the + * Sim-side span becomes a proper child of the Go-side client span in the + * same trace — closing the round trip in Jaeger. + * + * When no traceparent is present (e.g. calls from a browser or a client + * that hasn't been instrumented), this returns `context.active()` + * unchanged, and any span started under it becomes a new root — the same + * behavior as before this helper existed. + */ +export function contextFromRequestHeaders(headers: Headers): Context { + return propagator.extract(context.active(), headers, headerGetter) +} diff --git a/apps/sim/lib/copilot/request/go/stream.test.ts b/apps/sim/lib/copilot/request/go/stream.test.ts index 64349636b51..f9f80384c8d 100644 --- a/apps/sim/lib/copilot/request/go/stream.test.ts +++ b/apps/sim/lib/copilot/request/go/stream.test.ts @@ -17,7 +17,7 @@ import { runStreamLoop, } from '@/lib/copilot/request/go/stream' import { createEvent } from '@/lib/copilot/request/session' -import { TraceCollector } from '@/lib/copilot/request/trace' +import { RequestTraceV1Outcome, TraceCollector } from '@/lib/copilot/request/trace' import type { ExecutionContext, StreamingContext } from '@/lib/copilot/request/types' function createSseResponse(events: unknown[]): Response { @@ -281,4 +281,55 @@ describe('copilot go stream helpers', () => { context.errors.some((message) => message.includes('Failed to parse SSE event JSON')) ).toBe(true) }) + + it('records a split canonical request id and go trace id from the stream envelope', async () => { + vi.mocked(fetch).mockResolvedValueOnce( + createSseResponse([ + { + v: 1, + type: MothershipStreamV1EventType.text, + seq: 1, + ts: '2026-01-01T00:00:00.000Z', + stream: { streamId: 'stream-1', cursor: '1' }, + trace: { + requestId: 'sim-request-1', + goTraceId: 'go-trace-1', + }, + payload: { + channel: 'assistant', + text: 'hello', + }, + }, + createEvent({ + streamId: 'stream-1', + cursor: '2', + seq: 2, + requestId: 'sim-request-1', + type: MothershipStreamV1EventType.complete, + payload: { + status: MothershipStreamV1CompletionStatus.complete, + }, + }), + ]) + ) + + const context = createStreamingContext() + context.requestId = 'sim-request-1' + const execContext: ExecutionContext = { + userId: 'user-1', + workflowId: 'workflow-1', + } + + await runStreamLoop('https://example.com/mothership/stream', {}, context, execContext, { + timeout: 1000, + }) + + expect(context.requestId).toBe('sim-request-1') + expect( + context.trace.build({ + outcome: RequestTraceV1Outcome.success, + simRequestId: 'sim-request-1', + }).goTraceId + ).toBe('go-trace-1') + }) }) diff --git a/apps/sim/lib/copilot/request/go/stream.ts b/apps/sim/lib/copilot/request/go/stream.ts index 32d1c47b15b..2326808b35f 100644 --- a/apps/sim/lib/copilot/request/go/stream.ts +++ b/apps/sim/lib/copilot/request/go/stream.ts @@ -1,6 +1,9 @@ +import type { Context } from '@opentelemetry/api' import { createLogger } from '@sim/logger' import { ORCHESTRATION_TIMEOUT_MS } from '@/lib/copilot/constants' import { MothershipStreamV1SpanLifecycleEvent } from '@/lib/copilot/generated/mothership-stream-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { fetchGo } from '@/lib/copilot/request/go/fetch' import { buildPreviewContentUpdate, createFilePreviewAdapterState, @@ -91,6 +94,7 @@ export interface StreamLoopOptions extends OrchestratorOptions { * Called when the Go backend's trace ID (go_trace_id) is first received via SSE. */ onGoTraceId?: (goTraceId: string) => void + otelContext?: Context } /** @@ -110,15 +114,31 @@ export async function runStreamLoop( const { timeout = ORCHESTRATION_TIMEOUT_MS, abortSignal } = options const filePreviewAdapterState = createFilePreviewAdapterState() - const fetchSpan = context.trace.startSpan( - `HTTP Request → ${new URL(fetchUrl).pathname}`, - 'sim.http.fetch', - { url: fetchUrl } - ) - const response = await fetch(fetchUrl, { + const pathname = new URL(fetchUrl).pathname + const requestBodyBytes = estimateBodyBytes(fetchOptions.body) + const fetchSpan = context.trace.startSpan(`HTTP Request → ${pathname}`, 'sim.http.fetch', { + url: fetchUrl, + method: fetchOptions.method ?? 'GET', + requestBodyBytes, + }) + const fetchStart = performance.now() + const response = await fetchGo(fetchUrl, { ...fetchOptions, signal: abortSignal, + otelContext: options.otelContext, + spanName: `sim → go ${pathname}`, + operation: 'stream', + attributes: { + [TraceAttr.CopilotStream]: true, + ...(requestBodyBytes ? { [TraceAttr.HttpRequestContentLength]: requestBodyBytes } : {}), + }, }) + const headersElapsedMs = Math.round(performance.now() - fetchStart) + fetchSpan.attributes = { + ...(fetchSpan.attributes ?? {}), + status: response.status, + headersMs: headersElapsedMs, + } if (!response.ok) { context.trace.endSpan(fetchSpan, 'error') @@ -140,17 +160,32 @@ export async function runStreamLoop( } context.trace.endSpan(fetchSpan) + + const bodySpan = context.trace.startSpan(`SSE Body → ${pathname}`, 'sim.http.stream_body', { + url: fetchUrl, + method: fetchOptions.method ?? 'GET', + }) + const bodyStart = performance.now() + let firstEventMs: number | undefined + let eventsReceived = 0 + let endedOn = 'terminal' + const reader = response.body.getReader() const decoder = new TextDecoder() const timeoutId = setTimeout(() => { context.errors.push('Request timed out') context.streamComplete = true + endedOn = 'timeout' reader.cancel().catch(() => {}) }, timeout) try { await processSSEStream(reader, decoder, abortSignal, async (raw) => { + if (eventsReceived === 0) { + firstEventMs = Math.round(performance.now() - bodyStart) + } + eventsReceived += 1 if (abortSignal?.aborted) { context.wasAborted = true return true @@ -174,12 +209,9 @@ export async function runStreamLoop( const envelope = parsedEvent.event const streamEvent = eventToStreamEvent(envelope) if (envelope.trace?.requestId) { - const prev = context.requestId - context.requestId = envelope.trace.requestId - context.trace.setGoTraceId(envelope.trace.requestId) - if (envelope.trace.requestId !== prev) { - options.onGoTraceId?.(envelope.trace.requestId) - } + const goTraceId = envelope.trace.goTraceId || envelope.trace.requestId + context.trace.setGoTraceId(goTraceId) + options.onGoTraceId?.(goTraceId) } if (shouldSkipToolCallEvent(streamEvent) || shouldSkipToolResultEvent(streamEvent)) { @@ -287,18 +319,59 @@ export async function runStreamLoop( requestId: context.requestId, messageId: context.messageId, }) + endedOn = 'closed_no_terminal' throw new CopilotBackendError(message, { status: 503 }) } } catch (error) { if (error instanceof FatalSseEventError && !context.errors.includes(error.message)) { context.errors.push(error.message) } + if (endedOn === 'terminal') { + endedOn = + error instanceof CopilotBackendError + ? 'backend_error' + : error instanceof BillingLimitError + ? 'billing_limit' + : 'error' + } throw error } finally { if (abortSignal?.aborted) { context.wasAborted = true await reader.cancel().catch(() => {}) + if (endedOn === 'terminal') { + endedOn = 'aborted' + } } clearTimeout(timeoutId) + + const bodyDurationMs = Math.round(performance.now() - bodyStart) + bodySpan.attributes = { + ...(bodySpan.attributes ?? {}), + eventsReceived, + firstEventMs, + endedOn, + durationMs: bodyDurationMs, + } + context.trace.endSpan( + bodySpan, + endedOn === 'terminal' ? 'ok' : endedOn === 'aborted' ? 'cancelled' : 'error' + ) + } +} + +function estimateBodyBytes(body: BodyInit | null | undefined): number { + if (!body) { + return 0 + } + if (typeof body === 'string') { + return body.length + } + if (body instanceof ArrayBuffer) { + return body.byteLength + } + if (ArrayBuffer.isView(body)) { + return body.byteLength } + return 0 } diff --git a/apps/sim/lib/copilot/request/handlers/span.ts b/apps/sim/lib/copilot/request/handlers/span.ts index e684b232582..978e6ec0780 100644 --- a/apps/sim/lib/copilot/request/handlers/span.ts +++ b/apps/sim/lib/copilot/request/handlers/span.ts @@ -1,3 +1,62 @@ +import { + MothershipStreamV1SpanLifecycleEvent, + MothershipStreamV1SpanPayloadKind, +} from '@/lib/copilot/generated/mothership-stream-v1' import type { StreamHandler } from './types' -export const handleSpanEvent: StreamHandler = () => {} +/** + * Mirror Go-emitted span lifecycle events onto the Sim-side TraceCollector. + * + * Go publishes `span` events for subagent lifecycles and structured-result + * payloads. For subagents, the start/end pair is also used for UI routing + * elsewhere; here we additionally record a named span on the trace collector + * so the final RequestTraceV1 report shows the full nested structure without + * requiring the reader to inspect the raw envelope stream. + */ +export const handleSpanEvent: StreamHandler = (event, context) => { + if (event.type !== 'span') { + return + } + + const payload = event.payload as { + kind?: string + event?: string + agent?: string + data?: unknown + } + const kind = payload?.kind ?? '' + const evt = payload?.event ?? '' + + if (kind === MothershipStreamV1SpanPayloadKind.subagent) { + const scopeAgent = + typeof payload.agent === 'string' && payload.agent ? payload.agent : 'subagent' + if (evt === MothershipStreamV1SpanLifecycleEvent.start) { + const span = context.trace.startSpan(`subagent:${scopeAgent}`, 'go.subagent', { + agent: scopeAgent, + parentToolCallId: event.scope?.parentToolCallId, + }) + context.subAgentTraceSpans ??= new Map() + context.subAgentTraceSpans.set(`${scopeAgent}:${event.scope?.parentToolCallId || ''}`, span) + } else if (evt === MothershipStreamV1SpanLifecycleEvent.end) { + const key = `${scopeAgent}:${event.scope?.parentToolCallId || ''}` + const span = context.subAgentTraceSpans?.get(key) + if (span) { + context.trace.endSpan(span, 'ok') + context.subAgentTraceSpans?.delete(key) + } + } + return + } + + if ( + kind === MothershipStreamV1SpanPayloadKind.structured_result || + kind === MothershipStreamV1SpanPayloadKind.subagent_result + ) { + const span = context.trace.startSpan(`${kind}:${payload.agent ?? 'main'}`, `go.${kind}`, { + agent: payload.agent, + hasData: payload.data !== undefined, + }) + context.trace.endSpan(span, 'ok') + return + } +} diff --git a/apps/sim/lib/copilot/request/handlers/tool.ts b/apps/sim/lib/copilot/request/handlers/tool.ts index 17da4ecbccf..23d3b7bed14 100644 --- a/apps/sim/lib/copilot/request/handlers/tool.ts +++ b/apps/sim/lib/copilot/request/handlers/tool.ts @@ -7,6 +7,9 @@ import { MothershipStreamV1ToolOutcome, type MothershipStreamV1ToolResultPayload, } from '@/lib/copilot/generated/mothership-stream-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { withCopilotSpan } from '@/lib/copilot/request/otel' import { isToolArgsDeltaStreamEvent, isToolCallStreamEvent, @@ -362,35 +365,56 @@ async function dispatchToolExecution( } } else { toolCall.status = 'executing' - const pendingPromise = (async () => { - await upsertAsyncToolCall({ - runId: context.runId, - toolCallId, - toolName, - args, - status: MothershipStreamV1AsyncToolRecordStatus.running, - }).catch((err) => { - logger.warn(`Failed to persist async tool row for client-executable ${scopeLabel}tool`, { + // Span covers the entire "wait for browser/client to execute this + // tool and report back" window — typically the single largest + // non-LLM latency contributor for mothership requests that use + // client-side tools. Before this, the wait was uninstrumented and + // only visible as a gap in the waterfall. + const pendingPromise = withCopilotSpan( + TraceSpan.CopilotToolWaitForClientResult, + { + [TraceAttr.ToolName]: toolName, + [TraceAttr.ToolCallId]: toolCallId, + [TraceAttr.ToolTimeoutMs]: options.timeout || STREAM_TIMEOUT_MS, + ...(context.runId ? { [TraceAttr.RunId]: context.runId } : {}), + }, + async (span) => { + await upsertAsyncToolCall({ + runId: context.runId, toolCallId, toolName, - error: err instanceof Error ? err.message : String(err), + args, + status: MothershipStreamV1AsyncToolRecordStatus.running, + }).catch((err) => { + logger.warn( + `Failed to persist async tool row for client-executable ${scopeLabel}tool`, + { + toolCallId, + toolName, + error: err instanceof Error ? err.message : String(err), + } + ) }) - }) - const completion = await waitForToolCompletion( - toolCallId, - options.timeout || STREAM_TIMEOUT_MS, - options.abortSignal - ) - handleClientCompletion(toolCall, toolCallId, completion) - await emitSyntheticToolResult(toolCallId, toolCall.name, completion, options) - return ( - completion ?? { - status: MothershipStreamV1ToolOutcome.error, - message: 'Tool completion missing', - data: { error: 'Tool completion missing' }, + const completion = await waitForToolCompletion( + toolCallId, + options.timeout || STREAM_TIMEOUT_MS, + options.abortSignal + ) + span.setAttribute(TraceAttr.ToolCompletionReceived, completion !== undefined) + if (completion) { + span.setAttribute(TraceAttr.ToolOutcome, completion.status) } - ) - })().catch((err) => { + handleClientCompletion(toolCall, toolCallId, completion) + await emitSyntheticToolResult(toolCallId, toolCall.name, completion, options) + return ( + completion ?? { + status: MothershipStreamV1ToolOutcome.error, + message: 'Tool completion missing', + data: { error: 'Tool completion missing' }, + } + ) + } + ).catch((err) => { logger.error(`Client-executable ${scopeLabel}tool wait failed`, { toolCallId, toolName, diff --git a/apps/sim/lib/copilot/request/http.ts b/apps/sim/lib/copilot/request/http.ts index 34eb28cf76a..902f0da662a 100644 --- a/apps/sim/lib/copilot/request/http.ts +++ b/apps/sim/lib/copilot/request/http.ts @@ -38,7 +38,7 @@ export function createRequestId(): string { return generateId() } -export function createShortRequestId(): string { +function createShortRequestId(): string { return generateRequestId() } diff --git a/apps/sim/lib/copilot/request/lifecycle/finalize.ts b/apps/sim/lib/copilot/request/lifecycle/finalize.ts index 676fa7bd176..fa087a4b7d6 100644 --- a/apps/sim/lib/copilot/request/lifecycle/finalize.ts +++ b/apps/sim/lib/copilot/request/lifecycle/finalize.ts @@ -1,33 +1,74 @@ +import { SpanStatusCode, trace } from '@opentelemetry/api' import { createLogger } from '@sim/logger' import { updateRunStatus } from '@/lib/copilot/async-runs/repository' import { MothershipStreamV1CompletionStatus, MothershipStreamV1EventType, } from '@/lib/copilot/generated/mothership-stream-v1' +import { + type RequestTraceV1Outcome, + RequestTraceV1Outcome as RequestTraceV1OutcomeConst, +} from '@/lib/copilot/generated/request-trace-v1' +import { CopilotFinalizeOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' import type { StreamWriter } from '@/lib/copilot/request/session' import type { OrchestratorResult } from '@/lib/copilot/request/types' const logger = createLogger('CopilotStreamFinalize') +const getTracer = () => trace.getTracer('sim-copilot-finalize', '1.0.0') -/** - * Single finalization path for stream results. - * Handles abort / error / success and publishes the terminal event. - * Replaces duplicated blocks in the old chat-streaming.ts. - */ +// Single finalization path. `outcome` is the caller's resolved verdict +// so we don't have to re-derive cancel vs error from raw signals. export async function finalizeStream( result: OrchestratorResult, publisher: StreamWriter, runId: string, - aborted: boolean, + outcome: RequestTraceV1Outcome, requestId: string ): Promise { - if (aborted) { - return handleAborted(result, publisher, runId, requestId) - } - if (!result.success) { - return handleError(result, publisher, runId, requestId) + const spanOutcome = + outcome === RequestTraceV1OutcomeConst.cancelled + ? CopilotFinalizeOutcome.Aborted + : outcome === RequestTraceV1OutcomeConst.success + ? CopilotFinalizeOutcome.Success + : CopilotFinalizeOutcome.Error + const span = getTracer().startSpan(TraceSpan.CopilotFinalizeStream, { + attributes: { + [TraceAttr.CopilotFinalizeOutcome]: spanOutcome, + [TraceAttr.RunId]: runId, + [TraceAttr.RequestId]: requestId, + [TraceAttr.CopilotResultToolCalls]: result.toolCalls?.length ?? 0, + [TraceAttr.CopilotResultContentBlocks]: result.contentBlocks?.length ?? 0, + [TraceAttr.CopilotResultContentLength]: result.content?.length ?? 0, + [TraceAttr.CopilotPublisherSawComplete]: publisher.sawComplete, + [TraceAttr.CopilotPublisherClientDisconnected]: publisher.clientDisconnected, + }, + }) + try { + if (outcome === RequestTraceV1OutcomeConst.cancelled) { + await handleAborted(result, publisher, runId, requestId) + } else if (outcome === RequestTraceV1OutcomeConst.error) { + span.setStatus({ + code: SpanStatusCode.ERROR, + message: result.error || 'orchestration failed', + }) + await handleError(result, publisher, runId, requestId) + } else { + await handleSuccess(publisher, runId, requestId) + } + // Successful + cancelled paths fall through as status-unset → set + // OK so dashboards don't show "incomplete" for normal terminals. + if (outcome !== RequestTraceV1OutcomeConst.error) { + span.setStatus({ code: SpanStatusCode.OK }) + } + } catch (error) { + span.recordException(error instanceof Error ? error : new Error(String(error))) + span.setStatus({ code: SpanStatusCode.ERROR, message: 'finalize threw' }) + throw error + } finally { + span.end() } - return handleSuccess(publisher, runId, requestId) } async function handleAborted( diff --git a/apps/sim/lib/copilot/request/lifecycle/headless.test.ts b/apps/sim/lib/copilot/request/lifecycle/headless.test.ts index 10b8f656689..7af0bfd58d8 100644 --- a/apps/sim/lib/copilot/request/lifecycle/headless.test.ts +++ b/apps/sim/lib/copilot/request/lifecycle/headless.test.ts @@ -2,6 +2,9 @@ * @vitest-environment node */ +import { propagation, trace } from '@opentelemetry/api' +import { W3CTraceContextPropagator } from '@opentelemetry/core' +import { BasicTracerProvider } from '@opentelemetry/sdk-trace-base' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1' import type { OrchestratorResult } from '@/lib/copilot/request/types' @@ -29,6 +32,8 @@ function createLifecycleResult(overrides?: Partial): Orchest describe('runHeadlessCopilotLifecycle', () => { beforeEach(() => { + trace.setGlobalTracerProvider(new BasicTracerProvider()) + propagation.setGlobalPropagator(new W3CTraceContextPropagator()) vi.stubGlobal( 'fetch', vi.fn().mockResolvedValue( @@ -155,6 +160,40 @@ describe('runHeadlessCopilotLifecycle', () => { expect(body.simRequestId).toBe('workflow-request-id') }) + it('passes an OTel context to the lifecycle and trace report', async () => { + let lifecycleTraceparent = '' + runCopilotLifecycle.mockImplementationOnce(async (_payload, options) => { + const { traceHeaders } = await import('@/lib/copilot/request/go/propagation') + lifecycleTraceparent = traceHeaders({}, options.otelContext).traceparent ?? '' + return createLifecycleResult() + }) + + await runHeadlessCopilotLifecycle( + { + message: 'hello', + messageId: 'req-otel', + }, + { + userId: 'user-1', + chatId: 'chat-1', + workflowId: 'workflow-1', + goRoute: '/api/mothership/execute', + interactive: false, + } + ) + + expect(lifecycleTraceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/) + const [, init] = vi.mocked(fetch).mock.calls[0] as [string, RequestInit] + const headers = init.headers as Record + // The outbound trace report now runs inside its own OTel child span, so + // traceparent has the same trace-id as the lifecycle but a different + // span-id. Both must stay on the same trace. + const lifecycleTraceId = lifecycleTraceparent.split('-')[1] + expect(headers.traceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/) + expect(headers.traceparent.split('-')[1]).toBe(lifecycleTraceId) + expect(headers.traceparent.split('-')[2]).not.toBe(lifecycleTraceparent.split('-')[2]) + }) + it('reports an error trace when the lifecycle throws', async () => { runCopilotLifecycle.mockRejectedValueOnce(new Error('kaboom')) diff --git a/apps/sim/lib/copilot/request/lifecycle/headless.ts b/apps/sim/lib/copilot/request/lifecycle/headless.ts index d3e3be12aa4..57a8e27852c 100644 --- a/apps/sim/lib/copilot/request/lifecycle/headless.ts +++ b/apps/sim/lib/copilot/request/lifecycle/headless.ts @@ -4,8 +4,10 @@ import { RequestTraceV1Outcome, RequestTraceV1SpanStatus, } from '@/lib/copilot/generated/request-trace-v1' +import { CopilotTransport } from '@/lib/copilot/generated/trace-attribute-values-v1' import type { CopilotLifecycleOptions } from '@/lib/copilot/request/lifecycle/run' import { runCopilotLifecycle } from '@/lib/copilot/request/lifecycle/run' +import { withCopilotOtelContext } from '@/lib/copilot/request/otel' import { reportTrace, TraceCollector } from '@/lib/copilot/request/trace' import type { OrchestratorResult } from '@/lib/copilot/request/types' import { generateId } from '@/lib/core/utils/uuid' @@ -33,51 +35,72 @@ export async function runHeadlessCopilotLifecycle( let result: OrchestratorResult | undefined let outcome: RequestTraceOutcome = RequestTraceV1Outcome.error - try { - result = await runCopilotLifecycle(requestPayload, { - ...options, - trace, - simRequestId, - }) - outcome = options.abortSignal?.aborted - ? RequestTraceV1Outcome.cancelled - : result.success - ? RequestTraceV1Outcome.success - : RequestTraceV1Outcome.error - return result - } catch (error) { - outcome = options.abortSignal?.aborted - ? RequestTraceV1Outcome.cancelled - : RequestTraceV1Outcome.error - throw error - } finally { - trace.endSpan( - requestSpan, - outcome === RequestTraceV1Outcome.success - ? RequestTraceV1SpanStatus.ok - : outcome === RequestTraceV1Outcome.cancelled - ? RequestTraceV1SpanStatus.cancelled - : RequestTraceV1SpanStatus.error - ) - - try { - await reportTrace( - trace.build({ - outcome, + return withCopilotOtelContext( + { + requestId: simRequestId, + route: options.goRoute, + chatId: options.chatId, + workflowId: options.workflowId, + executionId: options.executionId, + runId: options.runId, + transport: CopilotTransport.Headless, + }, + async (otelContext) => { + try { + result = await runCopilotLifecycle(requestPayload, { + ...options, + trace, simRequestId, - chatId: result?.chatId ?? options.chatId, - runId: options.runId, - executionId: options.executionId, - usage: result?.usage, - cost: result?.cost, + otelContext, }) - ) - } catch (error) { - logger.warn('Failed to report headless trace', { - simRequestId, - chatId: result?.chatId ?? options.chatId, - error: error instanceof Error ? error.message : String(error), - }) + outcome = options.abortSignal?.aborted + ? RequestTraceV1Outcome.cancelled + : result.success + ? RequestTraceV1Outcome.success + : RequestTraceV1Outcome.error + return result + } catch (error) { + outcome = options.abortSignal?.aborted + ? RequestTraceV1Outcome.cancelled + : RequestTraceV1Outcome.error + throw error + } finally { + trace.endSpan( + requestSpan, + outcome === RequestTraceV1Outcome.success + ? RequestTraceV1SpanStatus.ok + : outcome === RequestTraceV1Outcome.cancelled + ? RequestTraceV1SpanStatus.cancelled + : RequestTraceV1SpanStatus.error + ) + + try { + // Best-effort extraction of the prompt from the untyped + // headless payload. Keeps parity with the streaming path + // where `message` is destructured directly. + const userMessage = + typeof requestPayload.message === 'string' ? requestPayload.message : undefined + await reportTrace( + trace.build({ + outcome, + simRequestId, + chatId: result?.chatId ?? options.chatId, + runId: options.runId, + executionId: options.executionId, + userMessage, + usage: result?.usage, + cost: result?.cost, + }), + otelContext + ) + } catch (error) { + logger.warn('Failed to report headless trace', { + simRequestId, + chatId: result?.chatId ?? options.chatId, + error: error instanceof Error ? error.message : String(error), + }) + } + } } - } + ) } diff --git a/apps/sim/lib/copilot/request/lifecycle/run.ts b/apps/sim/lib/copilot/request/lifecycle/run.ts index 1156d3b5928..a664ec3eb0d 100644 --- a/apps/sim/lib/copilot/request/lifecycle/run.ts +++ b/apps/sim/lib/copilot/request/lifecycle/run.ts @@ -1,3 +1,4 @@ +import type { Context } from '@opentelemetry/api' import { createLogger } from '@sim/logger' import { createRunSegment, updateRunStatus } from '@/lib/copilot/async-runs/repository' import { SIM_AGENT_API_URL, SIM_AGENT_VERSION } from '@/lib/copilot/constants' @@ -49,6 +50,7 @@ export interface CopilotLifecycleOptions extends OrchestratorOptions { goRoute?: string trace?: TraceCollector simRequestId?: string + otelContext?: Context onGoTraceId?: (goTraceId: string) => void executionContext?: ExecutionContext } @@ -111,6 +113,7 @@ export async function runCopilotLifecycle( const context = createStreamingContext({ chatId, + requestId: lifecycleOptions.simRequestId, executionId: resolvedExecutionId, runId: resolvedRunId, messageId: payloadMsgId, @@ -122,6 +125,15 @@ export async function runCopilotLifecycle( const result: OrchestratorResult = { success: context.errors.length === 0 && !context.wasAborted, + // `cancelled` is an explicit discriminator so callers can tell + // "user hit Stop" (don't clear the chat row; /chat/stop owns it) + // from "backend errored" (do clear the row so the chat isn't + // stuck with a non-null `conversationId`). An error that also + // happens to fire the abort signal still counts as an error + // path, but practically that doesn't happen in the success + // branch here — if there are errors we never reach a + // wasAborted-without-errors state. + cancelled: context.wasAborted && context.errors.length === 0, content: context.accumulatedContent, contentBlocks: context.contentBlocks, toolCalls: buildToolCallSummaries(context), @@ -136,9 +148,23 @@ export async function runCopilotLifecycle( } catch (error) { const err = error instanceof Error ? error : new Error('Copilot orchestration failed') logger.error('Copilot orchestration failed', { error: err.message }) - await lifecycleOptions.onError?.(err) + // If the abort signal fired, this throw is a consequence of the + // cancel (publisher.publish fails once the client disconnects, a + // downstream Go read throws on ctx cancel, etc.) — NOT a real + // backend error. Don't invoke `onError`, because on the cancel + // path `/api/copilot/chat/stop` is the single DB writer and + // `onError` would race with it via `finalizeAssistantTurn`, + // clearing `conversationId` before stop's UPDATE can match (see + // `buildOnComplete` in chat/post.ts for the full rationale). + // Return `cancelled: true` so upstream classification stays + // consistent with the success-path cancel result. + const wasCancelled = lifecycleOptions.abortSignal?.aborted ?? false + if (!wasCancelled) { + await lifecycleOptions.onError?.(err) + } return { success: false, + cancelled: wasCancelled, content: '', contentBlocks: [], toolCalls: [], @@ -224,7 +250,6 @@ async function runCheckpointLoop( 'Content-Type': 'application/json', ...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}), 'X-Client-Version': SIM_AGENT_VERSION, - ...(options.simRequestId ? { 'X-Sim-Request-ID': options.simRequestId } : {}), }, body: JSON.stringify(payload), }, diff --git a/apps/sim/lib/copilot/request/lifecycle/start.test.ts b/apps/sim/lib/copilot/request/lifecycle/start.test.ts index 9c5ee7adacf..5477fc9994b 100644 --- a/apps/sim/lib/copilot/request/lifecycle/start.test.ts +++ b/apps/sim/lib/copilot/request/lifecycle/start.test.ts @@ -2,7 +2,10 @@ * @vitest-environment node */ -import { beforeEach, describe, expect, it, vi } from 'vitest' +import { propagation, trace } from '@opentelemetry/api' +import { W3CTraceContextPropagator } from '@opentelemetry/core' +import { BasicTracerProvider } from '@opentelemetry/sdk-trace-base' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { MothershipStreamV1EventType } from '@/lib/copilot/generated/mothership-stream-v1' const { @@ -115,6 +118,19 @@ async function drainStream(stream: ReadableStream) { describe('createSSEStream terminal error handling', () => { beforeEach(() => { vi.clearAllMocks() + trace.setGlobalTracerProvider(new BasicTracerProvider()) + propagation.setGlobalPropagator(new W3CTraceContextPropagator()) + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue( + new Response(JSON.stringify({ title: 'Test title' }), { + status: 200, + headers: { + 'Content-Type': 'application/json', + }, + }) + ) + ) resetBuffer.mockResolvedValue(undefined) clearFilePreviewSessions.mockResolvedValue(undefined) scheduleBufferCleanup.mockResolvedValue(undefined) @@ -131,6 +147,10 @@ describe('createSSEStream terminal error handling', () => { updateRunStatus.mockResolvedValue(null) }) + afterEach(() => { + vi.unstubAllGlobals() + }) + it('writes a terminal error event before close when orchestration returns success=false', async () => { runCopilotLifecycle.mockResolvedValue({ success: false, @@ -190,4 +210,39 @@ describe('createSSEStream terminal error handling', () => { ) expect(scheduleBufferCleanup).toHaveBeenCalledWith('stream-1') }) + + it('passes an OTel context into the streaming lifecycle', async () => { + let lifecycleTraceparent = '' + runCopilotLifecycle.mockImplementation(async (_payload, options) => { + const { traceHeaders } = await import('@/lib/copilot/request/go/propagation') + lifecycleTraceparent = traceHeaders({}, options.otelContext).traceparent ?? '' + return { + success: true, + content: 'OK', + contentBlocks: [], + toolCalls: [], + } + }) + + const stream = createSSEStream({ + requestPayload: { message: 'hello' }, + userId: 'user-1', + streamId: 'stream-1', + executionId: 'exec-1', + runId: 'run-1', + currentChat: null, + isNewChat: false, + message: 'hello', + titleModel: 'gpt-5.4', + requestId: 'req-otel', + orchestrateOptions: { + goRoute: '/api/mothership', + workflowId: 'workflow-1', + }, + }) + + await drainStream(stream) + + expect(lifecycleTraceparent).toMatch(/^00-[0-9a-f]{32}-[0-9a-f]{16}-0[0-9a-f]$/) + }) }) diff --git a/apps/sim/lib/copilot/request/lifecycle/start.ts b/apps/sim/lib/copilot/request/lifecycle/start.ts index caf6fb4df87..0d2273a1e8a 100644 --- a/apps/sim/lib/copilot/request/lifecycle/start.ts +++ b/apps/sim/lib/copilot/request/lifecycle/start.ts @@ -1,3 +1,4 @@ +import { type Context, context as otelContextApi } from '@opentelemetry/api' import { db } from '@sim/db' import { copilotChats } from '@sim/db/schema' import { createLogger } from '@sim/logger' @@ -8,13 +9,25 @@ import { MothershipStreamV1EventType, MothershipStreamV1SessionKind, } from '@/lib/copilot/generated/mothership-stream-v1' -import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1' +import { + RequestTraceV1Outcome, + RequestTraceV1SpanStatus, +} from '@/lib/copilot/generated/request-trace-v1' +import { + CopilotRequestCancelReason, + type CopilotRequestCancelReasonValue, + CopilotTransport, +} from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1' import { finalizeStream } from '@/lib/copilot/request/lifecycle/finalize' import type { CopilotLifecycleOptions } from '@/lib/copilot/request/lifecycle/run' import { runCopilotLifecycle } from '@/lib/copilot/request/lifecycle/run' +import { type CopilotLifecycleOutcome, startCopilotOtelRoot } from '@/lib/copilot/request/otel' import { cleanupAbortMarker, clearFilePreviewSessions, + isExplicitStopReason, registerActiveStream, releasePendingChatStream, resetBuffer, @@ -52,6 +65,11 @@ export interface StreamingOrchestrationParams { requestId: string workspaceId?: string orchestrateOptions: Omit + /** + * Pre-started root; child spans bind to it and `finish()` fires on + * termination. Omit to let the stream start its own root (headless). + */ + otelRoot?: ReturnType } export function createSSEStream(params: StreamingOrchestrationParams): ReadableStream { @@ -70,163 +88,303 @@ export function createSSEStream(params: StreamingOrchestrationParams): ReadableS requestId, workspaceId, orchestrateOptions, + otelRoot, } = params + // Reuse caller's root if provided; otherwise start our own. + const activeOtelRoot = + otelRoot ?? + startCopilotOtelRoot({ + requestId, + route: orchestrateOptions.goRoute, + chatId, + workflowId: orchestrateOptions.workflowId, + executionId, + runId, + streamId, + transport: CopilotTransport.Stream, + }) + const abortController = new AbortController() registerActiveStream(streamId, abortController) const publisher = new StreamWriter({ streamId, chatId, requestId }) + // Classify cancel: signal.reason (explicit-stop set) wins, then + // clientDisconnected, else Unknown (latent contract bug — log it). + const recordCancelled = (errorMessage?: string): CopilotRequestCancelReasonValue => { + const rawReason = abortController.signal.reason + let cancelReason: CopilotRequestCancelReasonValue + if (isExplicitStopReason(rawReason)) { + cancelReason = CopilotRequestCancelReason.ExplicitStop + } else if (publisher.clientDisconnected) { + cancelReason = CopilotRequestCancelReason.ClientDisconnect + } else { + cancelReason = CopilotRequestCancelReason.Unknown + const serializedReason = + rawReason === undefined + ? 'undefined' + : rawReason instanceof Error + ? `${rawReason.name}: ${rawReason.message}` + : typeof rawReason === 'string' + ? rawReason + : (() => { + try { + return JSON.stringify(rawReason) + } catch { + return String(rawReason) + } + })() + // Contract violation: add the new reason to AbortReason / + // isExplicitStopReason or extend the classifier. + logger.error(`[${requestId}] Stream cancelled with unknown abort reason`, { + streamId, + chatId, + reason: serializedReason, + }) + activeOtelRoot.span.setAttribute(TraceAttr.CopilotAbortUnknownReason, serializedReason) + } + activeOtelRoot.span.setAttribute(TraceAttr.CopilotRequestCancelReason, cancelReason) + activeOtelRoot.span.addEvent(TraceEvent.RequestCancelled, { + [TraceAttr.CopilotRequestCancelReason]: cancelReason, + ...(errorMessage ? { [TraceAttr.ErrorMessage]: errorMessage } : {}), + }) + return cancelReason + } + const collector = new TraceCollector() return new ReadableStream({ async start(controller) { publisher.attach(controller) - const requestSpan = collector.startSpan('Mothership Request', 'request', { - streamId, - chatId, - runId, - }) - let outcome: 'success' | 'error' | 'cancelled' = 'error' - let lifecycleResult: - | { - usage?: { prompt: number; completion: number } - cost?: { input: number; output: number; total: number } - } - | undefined + // Re-enter the root OTel context — ALS doesn't survive the + // Next handler → ReadableStream.start boundary. + await otelContextApi.with(activeOtelRoot.context, async () => { + const otelContext = activeOtelRoot.context + let rootOutcome: CopilotLifecycleOutcome = RequestTraceV1Outcome.error + let rootError: unknown + try { + const requestSpan = collector.startSpan('Mothership Request', 'request', { + streamId, + chatId, + runId, + }) + let outcome: CopilotLifecycleOutcome = RequestTraceV1Outcome.error + let lifecycleResult: + | { + usage?: { prompt: number; completion: number } + cost?: { input: number; output: number; total: number } + } + | undefined - await Promise.all([resetBuffer(streamId), clearFilePreviewSessions(streamId)]) + await Promise.all([resetBuffer(streamId), clearFilePreviewSessions(streamId)]) - if (chatId) { - createRunSegment({ - id: runId, - executionId, - chatId, - userId, - workflowId: (requestPayload.workflowId as string | undefined) || null, - workspaceId, - streamId, - model: (requestPayload.model as string | undefined) || null, - provider: (requestPayload.provider as string | undefined) || null, - requestContext: { requestId }, - }).catch((error) => { - logger.warn(`[${requestId}] Failed to create copilot run segment`, { - error: error instanceof Error ? error.message : String(error), + if (chatId) { + createRunSegment({ + id: runId, + executionId, + chatId, + userId, + workflowId: (requestPayload.workflowId as string | undefined) || null, + workspaceId, + streamId, + model: (requestPayload.model as string | undefined) || null, + provider: (requestPayload.provider as string | undefined) || null, + requestContext: { requestId }, + }).catch((error) => { + logger.warn(`[${requestId}] Failed to create copilot run segment`, { + error: error instanceof Error ? error.message : String(error), + }) + }) + } + + const abortPoller = startAbortPoller(streamId, abortController, { + requestId, }) - }) - } + publisher.startKeepalive() - const abortPoller = startAbortPoller(streamId, abortController, { requestId }) - publisher.startKeepalive() + if (chatId) { + publisher.publish({ + type: MothershipStreamV1EventType.session, + payload: { + kind: MothershipStreamV1SessionKind.chat, + chatId, + }, + }) + } - if (chatId) { - publisher.publish({ - type: MothershipStreamV1EventType.session, - payload: { - kind: MothershipStreamV1SessionKind.chat, + fireTitleGeneration({ chatId, - }, - }) - } + currentChat, + isNewChat, + message, + titleModel, + titleProvider, + workspaceId, + requestId, + publisher, + otelContext, + }) - fireTitleGeneration({ - chatId, - currentChat, - isNewChat, - message, - titleModel, - titleProvider, - workspaceId, - requestId, - publisher, - }) + try { + const result = await runCopilotLifecycle(requestPayload, { + ...orchestrateOptions, + executionId, + runId, + trace: collector, + simRequestId: requestId, + otelContext, + abortSignal: abortController.signal, + onEvent: async (event) => { + await publisher.publish(event) + }, + }) - try { - const result = await runCopilotLifecycle(requestPayload, { - ...orchestrateOptions, - executionId, - runId, - trace: collector, - simRequestId: requestId, - abortSignal: abortController.signal, - onEvent: async (event) => { - await publisher.publish(event) - }, - }) + lifecycleResult = result + // Outcome classification (priority order): + // 1. `result.success` → success. The orchestrator + // reporting "finished cleanly" wins over any later + // signal change. Matters for the narrow race where + // the user clicks Stop a beat after the stream + // completed. + // 2. `signal.aborted` (from `abortActiveStream` or the + // Redis-marker poller) OR `clientDisconnected` with + // a non-success result → cancelled. `recordCancelled` + // further refines into explicit_stop / client_disconnect + // / unknown via `signal.reason`. + // 3. Otherwise → error. + outcome = result.success + ? RequestTraceV1Outcome.success + : abortController.signal.aborted || publisher.clientDisconnected + ? RequestTraceV1Outcome.cancelled + : RequestTraceV1Outcome.error + if (outcome === RequestTraceV1Outcome.cancelled) { + recordCancelled() + } + // Pass the resolved outcome — not `signal.aborted` — so + // `finalizeStream` classifies the same way we did above. + // A client-disconnect-without-controller-abort still needs + // to hit `handleAborted` (not `handleError`) so the chat + // row gets `cancelled` terminal state instead of `error`. + await finalizeStream(result, publisher, runId, outcome, requestId) + } catch (error) { + // Error-path classification: if the abort signal fired or + // the client disconnected, treat the thrown error as a + // cancel (same rationale as the try-path above). + const wasCancelled = abortController.signal.aborted || publisher.clientDisconnected + outcome = wasCancelled ? RequestTraceV1Outcome.cancelled : RequestTraceV1Outcome.error + if (outcome === RequestTraceV1Outcome.cancelled) { + recordCancelled(error instanceof Error ? error.message : String(error)) + } + if (publisher.clientDisconnected) { + logger.info(`[${requestId}] Stream errored after client disconnect`, { + error: error instanceof Error ? error.message : 'Stream error', + }) + } + // Demote to warn when the throw came from a user-initiated + // cancel — it isn't an "unexpected" failure then, and the + // error-level log pollutes alerting on normal Stop presses. + const logFn = + outcome === RequestTraceV1Outcome.cancelled ? logger.warn : logger.error + logFn.call( + logger, + `[${requestId}] Orchestration ended with ${outcome}:`, + error + ) - lifecycleResult = result - outcome = abortController.signal.aborted - ? RequestTraceV1Outcome.cancelled - : result.success - ? RequestTraceV1Outcome.success - : RequestTraceV1Outcome.error - await finalizeStream(result, publisher, runId, abortController.signal.aborted, requestId) - } catch (error) { - outcome = abortController.signal.aborted - ? RequestTraceV1Outcome.cancelled - : RequestTraceV1Outcome.error - if (publisher.clientDisconnected) { - logger.info(`[${requestId}] Stream errored after client disconnect`, { - error: error instanceof Error ? error.message : 'Stream error', - }) - } - logger.error(`[${requestId}] Unexpected orchestration error:`, error) + const syntheticResult = { + success: false as const, + content: '', + contentBlocks: [], + toolCalls: [], + error: 'An unexpected error occurred while processing the response.', + } + await finalizeStream(syntheticResult, publisher, runId, outcome, requestId) + } finally { + collector.endSpan( + requestSpan, + outcome === RequestTraceV1Outcome.success + ? RequestTraceV1SpanStatus.ok + : outcome === RequestTraceV1Outcome.cancelled + ? RequestTraceV1SpanStatus.cancelled + : RequestTraceV1SpanStatus.error + ) - const syntheticResult = { - success: false as const, - content: '', - contentBlocks: [], - toolCalls: [], - error: 'An unexpected error occurred while processing the response.', - } - await finalizeStream( - syntheticResult, - publisher, - runId, - abortController.signal.aborted, - requestId - ) - } finally { - collector.endSpan( - requestSpan, - outcome === RequestTraceV1Outcome.success - ? 'ok' - : outcome === RequestTraceV1Outcome.cancelled - ? 'cancelled' - : 'error' - ) + clearInterval(abortPoller) + try { + await publisher.close() + } catch (error) { + logger.warn(`[${requestId}] Failed to flush stream persistence during close`, { + error: error instanceof Error ? error.message : String(error), + }) + } + unregisterActiveStream(streamId) + if (chatId) { + await releasePendingChatStream(chatId, streamId) + } + await scheduleBufferCleanup(streamId) + await scheduleFilePreviewSessionCleanup(streamId) + await cleanupAbortMarker(streamId) - clearInterval(abortPoller) - try { - await publisher.close() + const trace = collector.build({ + outcome, + simRequestId: requestId, + streamId, + chatId, + runId, + executionId, + // Pass the raw user prompt through so the Go-side trace + // ingest can stamp it onto the `request_traces.message` + // column at insert time. Avoids relying on the late + // `UpdateAnalytics` UPDATE (which silently misses many + // rows). + userMessage: message, + usage: lifecycleResult?.usage, + cost: lifecycleResult?.cost, + }) + reportTrace(trace, otelContext).catch((err) => { + logger.warn(`[${requestId}] Failed to report trace`, { + error: err instanceof Error ? err.message : String(err), + }) + }) + rootOutcome = outcome + if (lifecycleResult?.usage) { + activeOtelRoot.span.setAttributes({ + [TraceAttr.GenAiUsageInputTokens]: lifecycleResult.usage.prompt ?? 0, + [TraceAttr.GenAiUsageOutputTokens]: lifecycleResult.usage.completion ?? 0, + }) + } + if (lifecycleResult?.cost) { + activeOtelRoot.span.setAttributes({ + [TraceAttr.BillingCostInputUsd]: lifecycleResult.cost.input ?? 0, + [TraceAttr.BillingCostOutputUsd]: lifecycleResult.cost.output ?? 0, + [TraceAttr.BillingCostTotalUsd]: lifecycleResult.cost.total ?? 0, + }) + } + } } catch (error) { - logger.warn(`[${requestId}] Failed to flush stream persistence during close`, { - error: error instanceof Error ? error.message : String(error), - }) - } - unregisterActiveStream(streamId) - if (chatId) { - await releasePendingChatStream(chatId, streamId) + rootOutcome = RequestTraceV1Outcome.error + rootError = error + throw error + } finally { + // `finish` is idempotent, so it's safe whether the POST + // handler started the root (and may also call finish on an + // error path before the stream ran) or we did. + activeOtelRoot.finish(rootOutcome, rootError) } - await scheduleBufferCleanup(streamId) - await scheduleFilePreviewSessionCleanup(streamId) - await cleanupAbortMarker(streamId) - - const trace = collector.build({ - outcome: outcome as 'success' | 'error' | 'cancelled', - simRequestId: requestId, - streamId, - chatId, - runId, - executionId, - usage: lifecycleResult?.usage, - cost: lifecycleResult?.cost, - }) - reportTrace(trace).catch(() => {}) - } + }) }, cancel() { + // The browser's SSE reader closed. Flip `clientDisconnected` so + // in-flight `publisher.publish` calls silently no-op (prevents + // enqueueing on a closed controller). + // + // Browser disconnect is NOT an abort — firing the controller + // here retroactively reclassifies in-flight successful streams + // as aborted and skips assistant persistence. Let the + // orchestrator drain naturally; publish no-ops post-disconnect. + // Explicit Stop still fires the controller via /chat/abort. publisher.markDisconnected() }, }) @@ -246,6 +404,7 @@ function fireTitleGeneration(params: { workspaceId?: string requestId: string publisher: StreamWriter + otelContext?: Context }): void { const { chatId, @@ -257,10 +416,16 @@ function fireTitleGeneration(params: { workspaceId, requestId, publisher, + otelContext, } = params if (!chatId || currentChat?.title || !isNewChat) return - requestChatTitle({ message, model: titleModel, provider: titleProvider }) + requestChatTitle({ + message, + model: titleModel, + provider: titleProvider, + otelContext, + }) .then(async (title) => { if (!title) return await db.update(copilotChats).set({ title }).where(eq(copilotChats.id, chatId)) @@ -269,7 +434,11 @@ function fireTitleGeneration(params: { payload: { kind: MothershipStreamV1SessionKind.title, title }, }) if (workspaceId) { - taskPubSub?.publishStatusChanged({ workspaceId, chatId, type: 'renamed' }) + taskPubSub?.publishStatusChanged({ + workspaceId, + chatId, + type: 'renamed', + }) } }) .catch((error) => { @@ -285,20 +454,35 @@ export async function requestChatTitle(params: { message: string model: string provider?: string + otelContext?: Context }): Promise { - const { message, model, provider } = params + const { message, model, provider, otelContext } = params if (!message || !model) return null - const headers: Record = { 'Content-Type': 'application/json' } + const headers: Record = { + 'Content-Type': 'application/json', + } if (env.COPILOT_API_KEY) { headers['x-api-key'] = env.COPILOT_API_KEY } try { - const response = await fetch(`${SIM_AGENT_API_URL}/api/generate-chat-title`, { + const { fetchGo } = await import('@/lib/copilot/request/go/fetch') + const response = await fetchGo(`${SIM_AGENT_API_URL}/api/generate-chat-title`, { method: 'POST', headers, - body: JSON.stringify({ message, model, ...(provider ? { provider } : {}) }), + body: JSON.stringify({ + message, + model, + ...(provider ? { provider } : {}), + }), + otelContext, + spanName: 'sim → go /api/generate-chat-title', + operation: 'generate_chat_title', + attributes: { + [TraceAttr.GenAiRequestModel]: model, + ...(provider ? { [TraceAttr.GenAiSystem]: provider } : {}), + }, }) const payload = await response.json().catch(() => ({})) diff --git a/apps/sim/lib/copilot/request/otel.ts b/apps/sim/lib/copilot/request/otel.ts new file mode 100644 index 00000000000..ec11e4ce9e2 --- /dev/null +++ b/apps/sim/lib/copilot/request/otel.ts @@ -0,0 +1,527 @@ +import { randomBytes } from 'crypto' +import { + type Context, + context, + ROOT_CONTEXT, + type Span, + type SpanContext, + SpanKind, + SpanStatusCode, + TraceFlags, + trace, +} from '@opentelemetry/api' +import { RequestTraceV1Outcome } from '@/lib/copilot/generated/request-trace-v1' +import { + CopilotBranchKind, + CopilotSurface, + CopilotTransport, +} from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { contextFromRequestHeaders } from '@/lib/copilot/request/go/propagation' + +// OTel GenAI content-capture env var (spec: +// https://opentelemetry.io/docs/specs/semconv/gen-ai/). Mirrored on +// the Go side so a single var controls both halves. +const GENAI_CAPTURE_ENV = 'OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT' + +// OTLP backends commonly reject attrs over 64 KiB; cap proactively. +const GENAI_MESSAGE_ATTR_MAX_BYTES = 60 * 1024 + +function isGenAIMessageCaptureEnabled(): boolean { + const raw = (process.env[GENAI_CAPTURE_ENV] || '').toLowerCase().trim() + return raw === 'true' || raw === '1' || raw === 'yes' +} + +// True if `err` is an AbortSignal-fired cancellation (any runtime +// flavor). Callers suppress ERROR status on cancel paths. +export function isCancellationError(err: unknown): boolean { + if (err == null) return false + if (typeof err === 'object') { + const e = err as { name?: unknown; code?: unknown; message?: unknown } + if (e.name === 'AbortError') return true + if (e.code === 'ABORT_ERR') return true + // Some wrappers stringify into the message but lose the name. + if (typeof e.message === 'string' && /aborted|AbortError/i.test(e.message)) { + return true + } + } + return false +} + +// Record exception + set ERROR only for real failures (cancels stay unset). +export function markSpanForError(span: Span, error: unknown): void { + const asError = error instanceof Error ? error : new Error(String(error)) + span.recordException(asError) + if (!isCancellationError(error)) { + span.setStatus({ + code: SpanStatusCode.ERROR, + message: error instanceof Error ? error.message : String(error), + }) + } +} + +// OTel GenAI message shape (kept minimal). Mirror changes on the Go side. +interface GenAIAgentPart { + type: 'text' | 'tool_call' | 'tool_call_response' + content?: string + id?: string + name?: string + arguments?: Record + response?: string +} + +interface GenAIAgentMessage { + role: 'system' | 'user' | 'assistant' | 'tool' + parts: GenAIAgentPart[] +} + +function marshalAgentMessages(messages: GenAIAgentMessage[]): string | undefined { + if (messages.length === 0) return undefined + const json = JSON.stringify(messages) + if (json.length <= GENAI_MESSAGE_ATTR_MAX_BYTES) return json + // Simple tail-preserving truncation: drop from the front until we + // fit. Matches the Go side's behavior. The last message is + // usually the most diagnostic for span-level outcome. + let remaining = messages.slice() + while (remaining.length > 1) { + remaining = remaining.slice(1) + const candidate = JSON.stringify(remaining) + if (candidate.length <= GENAI_MESSAGE_ATTR_MAX_BYTES) return candidate + } + // Single message still over cap — truncate the text part in place + // with a marker so the partial content is still readable. + const only = remaining[0] + for (const part of only.parts) { + if (part.type === 'text' && part.content) { + const headroom = GENAI_MESSAGE_ATTR_MAX_BYTES - 1024 + if (part.content.length > headroom) { + part.content = `${part.content.slice(0, headroom)}\n\n[truncated: capture cap ${GENAI_MESSAGE_ATTR_MAX_BYTES} bytes]` + } + } + } + const final = JSON.stringify([only]) + return final.length <= GENAI_MESSAGE_ATTR_MAX_BYTES ? final : undefined +} + +interface CopilotAgentInputMessages { + userMessage?: string + systemPrompt?: string +} + +interface CopilotAgentOutputMessages { + assistantText?: string + toolCalls?: Array<{ + id: string + name: string + arguments?: Record + }> +} + +function setAgentInputMessages(span: Span, input: CopilotAgentInputMessages): void { + if (!isGenAIMessageCaptureEnabled()) return + const messages: GenAIAgentMessage[] = [] + if (input.systemPrompt) { + messages.push({ + role: 'system', + parts: [{ type: 'text', content: input.systemPrompt }], + }) + } + if (input.userMessage) { + messages.push({ + role: 'user', + parts: [{ type: 'text', content: input.userMessage }], + }) + } + const serialized = marshalAgentMessages(messages) + if (serialized) { + span.setAttribute(TraceAttr.GenAiInputMessages, serialized) + } +} + +function setAgentOutputMessages(span: Span, output: CopilotAgentOutputMessages): void { + if (!isGenAIMessageCaptureEnabled()) return + const parts: GenAIAgentPart[] = [] + if (output.assistantText) { + parts.push({ type: 'text', content: output.assistantText }) + } + for (const tc of output.toolCalls ?? []) { + parts.push({ + type: 'tool_call', + id: tc.id, + name: tc.name, + ...(tc.arguments ? { arguments: tc.arguments } : {}), + }) + } + if (parts.length === 0) return + const serialized = marshalAgentMessages([{ role: 'assistant', parts }]) + if (serialized) { + span.setAttribute(TraceAttr.GenAiOutputMessages, serialized) + } +} + +export type CopilotLifecycleOutcome = + (typeof RequestTraceV1Outcome)[keyof typeof RequestTraceV1Outcome] + +// Lazy tracer — Next 16/Turbopack can evaluate modules before NodeSDK +// installs the real TracerProvider; resolving per call avoids a +// cached NoOpTracer silently disabling OTel. +export function getCopilotTracer() { + return trace.getTracer('sim-ai-platform', '1.0.0') +} + +function getTracer() { + return getCopilotTracer() +} + +// Wrap an inbound handler that Go called into so its span parents +// under the Go-side trace (via `traceparent`). +export async function withIncomingGoSpan( + headers: Headers, + spanName: string, + attributes: Record | undefined, + fn: (span: Span) => Promise +): Promise { + const parentContext = contextFromRequestHeaders(headers) + const tracer = getTracer() + return tracer.startActiveSpan( + spanName, + { kind: SpanKind.SERVER, attributes }, + parentContext, + async (span) => { + try { + const result = await fn(span) + span.setStatus({ code: SpanStatusCode.OK }) + return result + } catch (error) { + markSpanForError(span, error) + throw error + } finally { + span.end() + } + } + ) +} + +// Wrap a copilot-lifecycle op in an OTel span. Pass `parentContext` +// explicitly when AsyncLocalStorage-tracked context can be dropped +// across multiple awaits (otherwise the child falls back to a framework +// span that the sampler drops). +export async function withCopilotSpan( + spanName: string, + attributes: Record | undefined, + fn: (span: Span) => Promise, + parentContext?: Context +): Promise { + const tracer = getTracer() + const runBody = async (span: Span) => { + try { + const result = await fn(span) + span.setStatus({ code: SpanStatusCode.OK }) + return result + } catch (error) { + markSpanForError(span, error) + throw error + } finally { + span.end() + } + } + if (parentContext) { + return tracer.startActiveSpan(spanName, { attributes }, parentContext, runBody) + } + return tracer.startActiveSpan(spanName, { attributes }, runBody) +} + +// External OTel `tool.execute` span for Sim-side tool work (the Go +// side's `tool.execute` is just the enqueue, stays ~0ms). +export async function withCopilotToolSpan( + input: { + toolName: string + toolCallId: string + runId?: string + chatId?: string + argsBytes?: number + argsPreview?: string + }, + fn: (span: Span) => Promise +): Promise { + const tracer = getTracer() + return tracer.startActiveSpan( + `tool.execute ${input.toolName}`, + { + attributes: { + [TraceAttr.ToolName]: input.toolName, + [TraceAttr.ToolCallId]: input.toolCallId, + [TraceAttr.ToolExecutor]: 'sim', + ...(input.runId ? { [TraceAttr.RunId]: input.runId } : {}), + ...(input.chatId ? { [TraceAttr.ChatId]: input.chatId } : {}), + ...(typeof input.argsBytes === 'number' + ? { [TraceAttr.ToolArgsBytes]: input.argsBytes } + : {}), + // argsPreview can leak pasted credentials in tool args; gate + // behind the GenAI content-capture env var. + ...(input.argsPreview && isGenAIMessageCaptureEnabled() + ? { [TraceAttr.ToolArgsPreview]: input.argsPreview } + : {}), + }, + }, + async (span) => { + try { + const result = await fn(span) + span.setStatus({ code: SpanStatusCode.OK }) + return result + } catch (error) { + markSpanForError(span, error) + throw error + } finally { + span.end() + } + } + ) +} + +function isValidSpanContext(spanContext: SpanContext): boolean { + return ( + /^[0-9a-f]{32}$/.test(spanContext.traceId) && + spanContext.traceId !== '00000000000000000000000000000000' && + /^[0-9a-f]{16}$/.test(spanContext.spanId) && + spanContext.spanId !== '0000000000000000' + ) +} + +function createFallbackSpanContext(): SpanContext { + return { + traceId: randomBytes(16).toString('hex'), + spanId: randomBytes(8).toString('hex'), + traceFlags: TraceFlags.SAMPLED, + } +} + +interface CopilotOtelScope { + // Leave unset on the chat POST — startCopilotOtelRoot will derive + // from the root span's OTel trace ID (same value Grafana uses). + // Set explicitly on paths that need a non-trace-derived ID (headless, + // resume taking an ID from persisted state). + requestId?: string + route?: string + chatId?: string + workflowId?: string + executionId?: string + runId?: string + streamId?: string + transport: 'headless' | 'stream' + userMessagePreview?: string +} + +// Dashboard-column width; long enough for triage disambiguation. +const USER_MESSAGE_PREVIEW_MAX_CHARS = 500 +function buildAgentSpanAttributes( + scope: CopilotOtelScope & { requestId: string } +): Record { + // Gated behind the same env var as full GenAI message capture — a + // 500-char preview is still user prompt content. + const preview = isGenAIMessageCaptureEnabled() + ? truncateUserMessagePreview(scope.userMessagePreview) + : undefined + return { + [TraceAttr.GenAiAgentName]: 'mothership', + [TraceAttr.GenAiAgentId]: + scope.transport === CopilotTransport.Stream ? 'mothership-stream' : 'mothership-headless', + [TraceAttr.GenAiOperationName]: + scope.transport === CopilotTransport.Stream ? 'chat' : 'invoke_agent', + [TraceAttr.RequestId]: scope.requestId, + [TraceAttr.SimRequestId]: scope.requestId, + [TraceAttr.CopilotRoute]: scope.route ?? '', + [TraceAttr.CopilotTransport]: scope.transport, + ...(scope.chatId ? { [TraceAttr.ChatId]: scope.chatId } : {}), + ...(scope.workflowId ? { [TraceAttr.WorkflowId]: scope.workflowId } : {}), + ...(scope.executionId ? { [TraceAttr.CopilotExecutionId]: scope.executionId } : {}), + ...(scope.runId ? { [TraceAttr.RunId]: scope.runId } : {}), + ...(scope.streamId ? { [TraceAttr.StreamId]: scope.streamId } : {}), + ...(preview ? { [TraceAttr.CopilotUserMessagePreview]: preview } : {}), + } +} + +function truncateUserMessagePreview(raw: unknown): string | undefined { + if (typeof raw !== 'string') return undefined + const collapsed = raw.replace(/\s+/g, ' ').trim() + if (!collapsed) return undefined + if (collapsed.length <= USER_MESSAGE_PREVIEW_MAX_CHARS) return collapsed + return `${collapsed.slice(0, USER_MESSAGE_PREVIEW_MAX_CHARS - 1)}…` +} + +// Request-shape metadata known only after branch resolution. Stamped +// on the root span for dashboard filtering. +interface CopilotOtelRequestShape { + branchKind?: 'workflow' | 'workspace' + mode?: string + model?: string + provider?: string + createNewChat?: boolean + prefetch?: boolean + fileAttachmentsCount?: number + resourceAttachmentsCount?: number + contextsCount?: number + commandsCount?: number + pendingStreamWaitMs?: number + interruptedPriorStream?: boolean +} + +interface CopilotOtelRoot { + span: Span + context: Context + finish: (outcome?: CopilotLifecycleOutcome, error?: unknown) => void + setInputMessages: (input: CopilotAgentInputMessages) => void + setOutputMessages: (output: CopilotAgentOutputMessages) => void + setRequestShape: (shape: CopilotOtelRequestShape) => void +} + +export function startCopilotOtelRoot( + scope: CopilotOtelScope +): CopilotOtelRoot & { requestId: string } { + // TRUE root — don't inherit from Next's HTTP handler span (the + // sampler drops those; we'd orphan the whole mothership tree). + const parentContext = ROOT_CONTEXT + // Start with a placeholder `requestId`, then overwrite using the + // span's actual trace ID so the UI copy-button value pastes + // directly into Grafana. + const span = getTracer().startSpan( + TraceSpan.GenAiAgentExecute, + { attributes: buildAgentSpanAttributes({ ...scope, requestId: '' }) }, + parentContext + ) + const carrierSpan = isValidSpanContext(span.spanContext()) + ? span + : trace.wrapSpanContext(createFallbackSpanContext()) + const spanContext = carrierSpan.spanContext() + const requestId = + scope.requestId ?? + (spanContext.traceId && spanContext.traceId.length === 32 ? spanContext.traceId : '') + span.setAttribute(TraceAttr.RequestId, requestId) + span.setAttribute(TraceAttr.SimRequestId, requestId) + const rootContext = trace.setSpan(parentContext, carrierSpan) + + let finished = false + const finish: CopilotOtelRoot['finish'] = (outcome, error) => { + if (finished) return + finished = true + const resolvedOutcome = outcome ?? RequestTraceV1Outcome.success + span.setAttribute(TraceAttr.CopilotRequestOutcome, resolvedOutcome) + if (error) { + markSpanForError(span, error) + if (isCancellationError(error)) { + span.setStatus({ code: SpanStatusCode.OK }) + } + } else if ( + resolvedOutcome === RequestTraceV1Outcome.success || + resolvedOutcome === RequestTraceV1Outcome.cancelled + ) { + // Cancelled = OK so dashboards keying off span status don't + // treat Stop as a failure. Detail lives on cancel_reason. + span.setStatus({ code: SpanStatusCode.OK }) + } + span.end() + } + + return { + span, + context: rootContext, + requestId, + finish, + setInputMessages: (input) => setAgentInputMessages(span, input), + setOutputMessages: (output) => setAgentOutputMessages(span, output), + setRequestShape: (shape) => applyRequestShape(span, shape), + } +} + +// Pending-stream-lock wait above this = inferred send-to-interrupt. +const INTERRUPT_WAIT_MS_THRESHOLD = 50 + +function applyRequestShape(span: Span, shape: CopilotOtelRequestShape): void { + if (shape.branchKind) { + span.setAttribute(TraceAttr.CopilotBranchKind, shape.branchKind) + span.setAttribute( + TraceAttr.CopilotSurface, + shape.branchKind === CopilotBranchKind.Workflow + ? CopilotSurface.Copilot + : CopilotSurface.Mothership + ) + } + if (shape.mode) span.setAttribute(TraceAttr.CopilotMode, shape.mode) + if (shape.model) span.setAttribute(TraceAttr.GenAiRequestModel, shape.model) + if (shape.provider) span.setAttribute(TraceAttr.GenAiSystem, shape.provider) + if (typeof shape.createNewChat === 'boolean') { + span.setAttribute(TraceAttr.CopilotChatIsNew, shape.createNewChat) + } + if (typeof shape.prefetch === 'boolean') { + span.setAttribute(TraceAttr.CopilotPrefetch, shape.prefetch) + } + if (typeof shape.fileAttachmentsCount === 'number') { + span.setAttribute(TraceAttr.CopilotFileAttachmentsCount, shape.fileAttachmentsCount) + } + if (typeof shape.resourceAttachmentsCount === 'number') { + span.setAttribute(TraceAttr.CopilotResourceAttachmentsCount, shape.resourceAttachmentsCount) + } + if (typeof shape.contextsCount === 'number') { + span.setAttribute(TraceAttr.CopilotContextsCount, shape.contextsCount) + } + if (typeof shape.commandsCount === 'number') { + span.setAttribute(TraceAttr.CopilotCommandsCount, shape.commandsCount) + } + if (typeof shape.pendingStreamWaitMs === 'number') { + span.setAttribute(TraceAttr.CopilotPendingStreamWaitMs, shape.pendingStreamWaitMs) + const interrupted = + typeof shape.interruptedPriorStream === 'boolean' + ? shape.interruptedPriorStream + : shape.pendingStreamWaitMs > INTERRUPT_WAIT_MS_THRESHOLD + span.setAttribute(TraceAttr.CopilotInterruptedPriorStream, interrupted) + } else if (typeof shape.interruptedPriorStream === 'boolean') { + span.setAttribute(TraceAttr.CopilotInterruptedPriorStream, shape.interruptedPriorStream) + } +} + +export async function withCopilotOtelContext( + scope: CopilotOtelScope, + fn: (otelContext: Context) => Promise +): Promise { + const parentContext = context.active() + // Same trace-id-derives-requestId dance as startCopilotOtelRoot — see + // that function for the rationale. Stamp a placeholder, read the real + // trace ID off the span, then overwrite. + const span = getTracer().startSpan( + TraceSpan.GenAiAgentExecute, + { attributes: buildAgentSpanAttributes({ ...scope, requestId: scope.requestId ?? '' }) }, + parentContext + ) + const carrierSpan = isValidSpanContext(span.spanContext()) + ? span + : trace.wrapSpanContext(createFallbackSpanContext()) + const spanContext = carrierSpan.spanContext() + const resolvedRequestId = + scope.requestId ?? + (spanContext.traceId && spanContext.traceId.length === 32 ? spanContext.traceId : '') + if (resolvedRequestId) { + span.setAttribute(TraceAttr.RequestId, resolvedRequestId) + span.setAttribute(TraceAttr.SimRequestId, resolvedRequestId) + } + const otelContext = trace.setSpan(parentContext, carrierSpan) + let terminalStatusSet = false + + try { + const result = await context.with(otelContext, () => fn(otelContext)) + span.setStatus({ code: SpanStatusCode.OK }) + terminalStatusSet = true + return result + } catch (error) { + markSpanForError(span, error) + terminalStatusSet = true + throw error + } finally { + if (!terminalStatusSet) { + // Extremely defensive: should be unreachable, but avoids leaking + // an unset span status if some future refactor breaks both arms. + span.setStatus({ code: SpanStatusCode.OK }) + } + span.end() + } +} diff --git a/apps/sim/lib/copilot/request/session/abort.ts b/apps/sim/lib/copilot/request/session/abort.ts index 3502f4a69f8..d55640b9dcd 100644 --- a/apps/sim/lib/copilot/request/session/abort.ts +++ b/apps/sim/lib/copilot/request/session/abort.ts @@ -1,4 +1,8 @@ import { createLogger } from '@sim/logger' +import { AbortBackend } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { withCopilotSpan } from '@/lib/copilot/request/otel' import { acquireLock, getRedisClient, releaseLock } from '@/lib/core/config/redis' import { clearAbortMarker, hasAbortMarker, writeAbortMarker } from './buffer' @@ -120,72 +124,155 @@ export async function acquirePendingChatStream( streamId: string, timeoutMs = 5_000 ): Promise { - const redis = getRedisClient() - if (redis) { - const deadline = Date.now() + timeoutMs - for (;;) { - try { - const acquired = await acquireLock( - getChatStreamLockKey(chatId), - streamId, - CHAT_STREAM_LOCK_TTL_SECONDS - ) - if (acquired) { - registerPendingChatStream(chatId, streamId) - return true - } - if (!pendingChatStreams.has(chatId)) { - const ownerStreamId = await redis.get(getChatStreamLockKey(chatId)) - if (ownerStreamId) { - const settled = await waitForPendingChatStream(chatId, 0, ownerStreamId) - if (settled) { - continue + // Span records wall time spent waiting for the per-chat stream lock. + // Typical case: sub-10ms uncontested acquire. Worst case: up to + // `timeoutMs` spent polling while a prior stream finishes. Previously + // this time looked like "unexplained gap before llm.stream". + return withCopilotSpan( + TraceSpan.CopilotChatAcquirePendingStreamLock, + { + [TraceAttr.ChatId]: chatId, + [TraceAttr.StreamId]: streamId, + [TraceAttr.LockTimeoutMs]: timeoutMs, + }, + async (span) => { + const redis = getRedisClient() + span.setAttribute(TraceAttr.LockBackend, redis ? AbortBackend.Redis : AbortBackend.InProcess) + if (redis) { + const deadline = Date.now() + timeoutMs + for (;;) { + try { + const acquired = await acquireLock( + getChatStreamLockKey(chatId), + streamId, + CHAT_STREAM_LOCK_TTL_SECONDS + ) + if (acquired) { + registerPendingChatStream(chatId, streamId) + span.setAttribute(TraceAttr.LockAcquired, true) + return true } + if (!pendingChatStreams.has(chatId)) { + const ownerStreamId = await redis.get(getChatStreamLockKey(chatId)) + if (ownerStreamId) { + const settled = await waitForPendingChatStream(chatId, 0, ownerStreamId) + if (settled) { + continue + } + } + } + } catch (error) { + logger.warn('Failed to acquire chat stream lock', { + chatId, + streamId, + error: error instanceof Error ? error.message : String(error), + }) } - } - } catch (error) { - logger.warn('Failed to acquire chat stream lock', { - chatId, - streamId, - error: error instanceof Error ? error.message : String(error), - }) - } - if (Date.now() >= deadline) { - return false + if (Date.now() >= deadline) { + span.setAttribute(TraceAttr.LockAcquired, false) + span.setAttribute(TraceAttr.LockTimedOut, true) + return false + } + await new Promise((resolve) => setTimeout(resolve, 200)) + } } - await new Promise((resolve) => setTimeout(resolve, 200)) - } - } - for (;;) { - const existing = pendingChatStreams.get(chatId) - if (!existing) { - registerPendingChatStream(chatId, streamId) - return true - } + for (;;) { + const existing = pendingChatStreams.get(chatId) + if (!existing) { + registerPendingChatStream(chatId, streamId) + span.setAttribute(TraceAttr.LockAcquired, true) + return true + } - const settled = await Promise.race([ - existing.promise.then(() => true), - new Promise((resolve) => setTimeout(() => resolve(false), timeoutMs)), - ]) - if (!settled) { - return false + const settled = await Promise.race([ + existing.promise.then(() => true), + new Promise((resolve) => setTimeout(() => resolve(false), timeoutMs)), + ]) + if (!settled) { + span.setAttribute(TraceAttr.LockAcquired, false) + span.setAttribute(TraceAttr.LockTimedOut, true) + return false + } + } } - } + ) } /** * Returns `true` if it aborted an in-process controller, * `false` if it only wrote the marker (no local controller found). + * + * Spanned because the two operations inside can stall independently + * — Redis latency on `writeAbortMarker` was previously invisible, and + * the "no local controller" branch (happens when the stream handler + * is on a different Sim box than the one receiving /chat/abort) is + * a subtle but important outcome to distinguish from "aborted a live + * controller" in dashboards. */ export async function abortActiveStream(streamId: string): Promise { - await writeAbortMarker(streamId) - const controller = activeStreams.get(streamId) - if (!controller) return false - controller.abort('user_stop:abortActiveStream') - activeStreams.delete(streamId) - return true + return withCopilotSpan( + TraceSpan.CopilotChatAbortActiveStream, + { [TraceAttr.StreamId]: streamId }, + async (span) => { + await writeAbortMarker(streamId) + span.setAttribute(TraceAttr.CopilotAbortMarkerWritten, true) + const controller = activeStreams.get(streamId) + if (!controller) { + span.setAttribute(TraceAttr.CopilotAbortControllerFired, false) + return false + } + controller.abort(AbortReason.UserStop) + activeStreams.delete(streamId) + span.setAttribute(TraceAttr.CopilotAbortControllerFired, true) + return true + } + ) +} + +/** + * Reason strings passed to `AbortController.abort(reason)` for every + * Sim-originated cancel path. Exported so the lifecycle finalizer can + * look at `signal.reason` and distinguish EXPLICIT stops (user hit the + * Stop button) from client disconnects (tab closed, network dropped) + * without guessing. + * + * Why this matters: when the user clicks Stop, we fire + * `abortController.abort(AbortReason.UserStop)` from + * `abortActiveStream()`. That causes Sim's SSE writer to close, which + * in turn makes the BROWSER's SSE reader see the stream end — which + * fires the browser-side fetch AbortController and propagates back to + * Sim as `publisher.markDisconnected()`. So on an explicit Stop you + * observe BOTH "explicit reason" AND "client disconnected" — the + * discriminator is the reason string, not the client flag. + * + * For any NEW abort path, add its reason here and in the + * `isExplicitStopReason` helper so classification stays correct. + */ +export const AbortReason = { + /** Same-process stop: browser→Sim→abortActiveStream. */ + UserStop: 'user_stop:abortActiveStream', + /** + * Cross-process stop: the Sim node that holds the SSE didn't + * receive the Stop HTTP call, but it polled the Redis abort marker + * that the node that DID receive it wrote, and aborts on the poll. + */ + RedisPoller: 'redis_abort_marker:poller', + /** Internal timeout on the outbound explicit-abort fetch to Go. */ + ExplicitAbortFetchTimeout: 'timeout:go_explicit_abort_fetch', +} as const + +export type AbortReasonValue = (typeof AbortReason)[keyof typeof AbortReason] + +/** + * True iff `reason` indicates the user explicitly triggered the abort + * (as opposed to an implicit client disconnect or server timeout). + * Treated as a small closed vocabulary — any string not in + * `AbortReason` is presumed non-explicit. + */ +export function isExplicitStopReason(reason: unknown): boolean { + return reason === AbortReason.UserStop || reason === AbortReason.RedisPoller } const pollingStreams = new Set() @@ -206,7 +293,7 @@ export function startAbortPoller( try { const shouldAbort = await hasAbortMarker(streamId) if (shouldAbort && !abortController.signal.aborted) { - abortController.abort('redis_abort_marker:poller') + abortController.abort(AbortReason.RedisPoller) await clearAbortMarker(streamId) } } catch (error) { diff --git a/apps/sim/lib/copilot/request/session/contract.test.ts b/apps/sim/lib/copilot/request/session/contract.test.ts index e9ac58707c6..3d50fdd64eb 100644 --- a/apps/sim/lib/copilot/request/session/contract.test.ts +++ b/apps/sim/lib/copilot/request/session/contract.test.ts @@ -27,6 +27,10 @@ describe('stream session contract parser', () => { it('accepts contract text events', () => { const event = { ...BASE_ENVELOPE, + trace: { + ...BASE_ENVELOPE.trace, + goTraceId: 'go-trace-1', + }, type: 'text' as const, payload: { channel: 'assistant' as const, @@ -97,7 +101,11 @@ describe('stream session contract parser', () => { const event = { ...BASE_ENVELOPE, type: 'span' as const, - payload: { kind: 'subagent' as const, event: 'start' as const, agent: 'file' }, + payload: { + kind: 'subagent' as const, + event: 'start' as const, + agent: 'file', + }, } expect(isContractStreamEventEnvelope(event)).toBe(true) diff --git a/apps/sim/lib/copilot/request/session/contract.ts b/apps/sim/lib/copilot/request/session/contract.ts index ff45dbd9151..7953a11c956 100644 --- a/apps/sim/lib/copilot/request/session/contract.ts +++ b/apps/sim/lib/copilot/request/session/contract.ts @@ -171,7 +171,12 @@ function isStreamRef(value: unknown): value is MothershipStreamV1StreamRef { } function isTrace(value: unknown): value is MothershipStreamV1Trace { - return isRecord(value) && typeof value.requestId === 'string' && isOptionalString(value.spanId) + return ( + isRecord(value) && + typeof value.requestId === 'string' && + isOptionalString(value.goTraceId) && + isOptionalString(value.spanId) + ) } function isStreamScope(value: unknown): value is MothershipStreamV1StreamScope { @@ -317,9 +322,12 @@ function isContractEnvelope(value: unknown): value is MothershipStreamV1EventEnv // Synthetic file-preview envelope validators // --------------------------------------------------------------------------- -function isSyntheticEnvelopeBase( - value: unknown -): value is Omit & { payload?: unknown } { +function isSyntheticEnvelopeBase(value: unknown): value is Omit< + SyntheticFilePreviewEventEnvelope, + 'payload' +> & { + payload?: unknown +} { return ( isRecord(value) && value.v === 1 && diff --git a/apps/sim/lib/copilot/request/session/explicit-abort.ts b/apps/sim/lib/copilot/request/session/explicit-abort.ts index 7aad5465544..b27d6ab6504 100644 --- a/apps/sim/lib/copilot/request/session/explicit-abort.ts +++ b/apps/sim/lib/copilot/request/session/explicit-abort.ts @@ -1,4 +1,8 @@ +import type { Context } from '@opentelemetry/api' import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { fetchGo } from '@/lib/copilot/request/go/fetch' +import { AbortReason } from '@/lib/copilot/request/session/abort' import { env } from '@/lib/core/config/env' export const DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS = 3000 @@ -8,19 +12,31 @@ export async function requestExplicitStreamAbort(params: { userId: string chatId?: string timeoutMs?: number + otelContext?: Context }): Promise { - const { streamId, userId, chatId, timeoutMs = DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS } = params + const { + streamId, + userId, + chatId, + timeoutMs = DEFAULT_EXPLICIT_ABORT_TIMEOUT_MS, + otelContext, + } = params - const headers: Record = { 'Content-Type': 'application/json' } + const headers: Record = { + 'Content-Type': 'application/json', + } if (env.COPILOT_API_KEY) { headers['x-api-key'] = env.COPILOT_API_KEY } const controller = new AbortController() - const timeout = setTimeout(() => controller.abort('timeout:go_explicit_abort_fetch'), timeoutMs) + const timeout = setTimeout( + () => controller.abort(AbortReason.ExplicitAbortFetchTimeout), + timeoutMs + ) try { - const response = await fetch(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, { + const response = await fetchGo(`${SIM_AGENT_API_URL}/api/streams/explicit-abort`, { method: 'POST', headers, signal: controller.signal, @@ -29,6 +45,13 @@ export async function requestExplicitStreamAbort(params: { userId, ...(chatId ? { chatId } : {}), }), + otelContext, + spanName: 'sim → go /api/streams/explicit-abort', + operation: 'explicit_abort', + attributes: { + [TraceAttr.StreamId]: streamId, + ...(chatId ? { [TraceAttr.ChatId]: chatId } : {}), + }, }) if (!response.ok) { diff --git a/apps/sim/lib/copilot/request/session/index.ts b/apps/sim/lib/copilot/request/session/index.ts index 70466d11b7a..a09a194c788 100644 --- a/apps/sim/lib/copilot/request/session/index.ts +++ b/apps/sim/lib/copilot/request/session/index.ts @@ -1,8 +1,11 @@ export { + AbortReason, + type AbortReasonValue, abortActiveStream, acquirePendingChatStream, cleanupAbortMarker, getPendingChatStreamId, + isExplicitStopReason, registerActiveStream, releasePendingChatStream, startAbortPoller, diff --git a/apps/sim/lib/copilot/request/session/recovery.test.ts b/apps/sim/lib/copilot/request/session/recovery.test.ts new file mode 100644 index 00000000000..a06b42548b0 --- /dev/null +++ b/apps/sim/lib/copilot/request/session/recovery.test.ts @@ -0,0 +1,38 @@ +/** + * @vitest-environment node + */ + +import { describe, expect, it, vi } from 'vitest' + +const { getLatestSeq, getOldestSeq, readEvents } = vi.hoisted(() => ({ + getLatestSeq: vi.fn(), + getOldestSeq: vi.fn(), + readEvents: vi.fn(), +})) + +vi.mock('./buffer', () => ({ + getLatestSeq, + getOldestSeq, + readEvents, +})) + +import { checkForReplayGap } from './recovery' + +describe('checkForReplayGap', () => { + it('uses the latest buffered request id when run metadata is missing it', async () => { + getOldestSeq.mockResolvedValue(10) + getLatestSeq.mockResolvedValue(12) + readEvents.mockResolvedValue([ + { + trace: { requestId: 'req-live-123' }, + }, + ]) + + const result = await checkForReplayGap('stream-1', '1') + + expect(readEvents).toHaveBeenCalledWith('stream-1', '11') + expect(result?.gapDetected).toBe(true) + expect(result?.envelopes[0].trace.requestId).toBe('req-live-123') + expect(result?.envelopes[1].trace.requestId).toBe('req-live-123') + }) +}) diff --git a/apps/sim/lib/copilot/request/session/recovery.ts b/apps/sim/lib/copilot/request/session/recovery.ts index 74612b59211..8bad72847a0 100644 --- a/apps/sim/lib/copilot/request/session/recovery.ts +++ b/apps/sim/lib/copilot/request/session/recovery.ts @@ -3,7 +3,11 @@ import { MothershipStreamV1CompletionStatus, MothershipStreamV1EventType, } from '@/lib/copilot/generated/mothership-stream-v1' -import { getLatestSeq, getOldestSeq } from './buffer' +import { CopilotRecoveryOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { withCopilotSpan } from '@/lib/copilot/request/otel' +import { getLatestSeq, getOldestSeq, readEvents } from './buffer' import { createEvent } from './event' const logger = createLogger('SessionRecovery') @@ -15,62 +19,105 @@ export interface ReplayGapResult { export async function checkForReplayGap( streamId: string, - afterCursor: string + afterCursor: string, + requestId?: string ): Promise { const requestedAfterSeq = Number(afterCursor || '0') if (requestedAfterSeq <= 0) { + // Fast path: no cursor → nothing to check. Skip the span to avoid + // emitting zero-work spans on every stream connect. return null } - const oldestSeq = await getOldestSeq(streamId) - const latestSeq = await getLatestSeq(streamId) + return withCopilotSpan( + TraceSpan.CopilotRecoveryCheckReplayGap, + { + [TraceAttr.StreamId]: streamId, + [TraceAttr.CopilotRecoveryRequestedAfterSeq]: requestedAfterSeq, + ...(requestId ? { [TraceAttr.RequestId]: requestId } : {}), + }, + async (span) => { + const oldestSeq = await getOldestSeq(streamId) + const latestSeq = await getLatestSeq(streamId) + span.setAttributes({ + [TraceAttr.CopilotRecoveryOldestSeq]: oldestSeq ?? -1, + [TraceAttr.CopilotRecoveryLatestSeq]: latestSeq ?? -1, + }) - if ( - latestSeq !== null && - latestSeq > 0 && - oldestSeq !== null && - requestedAfterSeq < oldestSeq - 1 - ) { - logger.warn('Replay gap detected: requested cursor is below oldest available event', { - streamId, - requestedAfterSeq, - oldestAvailableSeq: oldestSeq, - latestSeq, - }) - - const gapEnvelope = createEvent({ - streamId, - cursor: String(latestSeq + 1), - seq: latestSeq + 1, - requestId: '', - type: MothershipStreamV1EventType.error, - payload: { - message: 'Replay history is no longer available. Some events may have been lost.', - code: 'replay_gap', - data: { - oldestAvailableSeq: oldestSeq, + if ( + latestSeq !== null && + latestSeq > 0 && + oldestSeq !== null && + requestedAfterSeq < oldestSeq - 1 + ) { + const resolvedRequestId = await resolveReplayGapRequestId(streamId, latestSeq, requestId) + logger.warn('Replay gap detected: requested cursor is below oldest available event', { + streamId, requestedAfterSeq, - }, - }, - }) + oldestAvailableSeq: oldestSeq, + latestSeq, + }) + span.setAttribute(TraceAttr.CopilotRecoveryOutcome, CopilotRecoveryOutcome.GapDetected) - const terminalEnvelope = createEvent({ - streamId, - cursor: String(latestSeq + 2), - seq: latestSeq + 2, - requestId: '', - type: MothershipStreamV1EventType.complete, - payload: { - status: MothershipStreamV1CompletionStatus.error, - reason: 'replay_gap', - }, - }) + const gapEnvelope = createEvent({ + streamId, + cursor: String(latestSeq + 1), + seq: latestSeq + 1, + requestId: resolvedRequestId, + type: MothershipStreamV1EventType.error, + payload: { + message: 'Replay history is no longer available. Some events may have been lost.', + code: 'replay_gap', + data: { + oldestAvailableSeq: oldestSeq, + requestedAfterSeq, + }, + }, + }) - return { - gapDetected: true, - envelopes: [gapEnvelope, terminalEnvelope], + const terminalEnvelope = createEvent({ + streamId, + cursor: String(latestSeq + 2), + seq: latestSeq + 2, + requestId: resolvedRequestId, + type: MothershipStreamV1EventType.complete, + payload: { + status: MothershipStreamV1CompletionStatus.error, + reason: 'replay_gap', + }, + }) + + return { + gapDetected: true, + envelopes: [gapEnvelope, terminalEnvelope], + } + } + + span.setAttribute(TraceAttr.CopilotRecoveryOutcome, CopilotRecoveryOutcome.InRange) + return null } + ) +} + +async function resolveReplayGapRequestId( + streamId: string, + latestSeq: number, + requestId?: string +): Promise { + if (typeof requestId === 'string' && requestId.length > 0) { + return requestId } - return null + try { + const latestEvents = await readEvents(streamId, String(Math.max(latestSeq - 1, 0))) + const latestRequestId = latestEvents[0]?.trace?.requestId + return typeof latestRequestId === 'string' ? latestRequestId : '' + } catch (error) { + logger.warn('Failed to resolve request ID for replay gap', { + streamId, + latestSeq, + error: error instanceof Error ? error.message : String(error), + }) + return '' + } } diff --git a/apps/sim/lib/copilot/request/subagent.ts b/apps/sim/lib/copilot/request/subagent.ts index d9403094698..ff1440a3ee2 100644 --- a/apps/sim/lib/copilot/request/subagent.ts +++ b/apps/sim/lib/copilot/request/subagent.ts @@ -1,13 +1,16 @@ import { createLogger } from '@sim/logger' import { generateWorkspaceContext } from '@/lib/copilot/chat/workspace-context' -import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' +import { SIM_AGENT_API_URL, SIM_AGENT_VERSION } from '@/lib/copilot/constants' import { MothershipStreamV1EventType, MothershipStreamV1SpanPayloadKind, } from '@/lib/copilot/generated/mothership-stream-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' import { createStreamingContext } from '@/lib/copilot/request/context/request-context' import { buildToolCallSummaries } from '@/lib/copilot/request/context/result' import { runStreamLoop } from '@/lib/copilot/request/go/stream' +import { withCopilotSpan } from '@/lib/copilot/request/otel' import type { ExecutionContext, OrchestratorOptions, @@ -29,6 +32,7 @@ export interface SubagentOrchestratorOptions extends Omit void | Promise } @@ -50,6 +54,38 @@ export async function orchestrateSubagentStream( agentId: string, requestPayload: Record, options: SubagentOrchestratorOptions +): Promise { + return withCopilotSpan( + TraceSpan.CopilotSubagentExecute, + { + [TraceAttr.SubagentId]: agentId, + [TraceAttr.UserId]: options.userId, + ...(options.simRequestId ? { [TraceAttr.SimRequestId]: options.simRequestId } : {}), + ...(options.workflowId ? { [TraceAttr.WorkflowId]: options.workflowId } : {}), + ...(options.workspaceId ? { [TraceAttr.WorkspaceId]: options.workspaceId } : {}), + }, + async (otelSpan) => { + const result = await orchestrateSubagentStreamInner(agentId, requestPayload, options) + otelSpan.setAttributes({ + [TraceAttr.SubagentOutcomeSuccess]: result.success, + [TraceAttr.SubagentOutcomeToolCallCount]: result.toolCalls.length, + [TraceAttr.SubagentOutcomeContentBytes]: result.content?.length ?? 0, + ...(result.structuredResult?.type + ? { [TraceAttr.SubagentOutcomeStructuredType]: result.structuredResult.type } + : {}), + ...(result.error + ? { [TraceAttr.SubagentOutcomeError]: String(result.error).slice(0, 500) } + : {}), + }) + return result + } + ) +} + +async function orchestrateSubagentStreamInner( + agentId: string, + requestPayload: Record, + options: SubagentOrchestratorOptions ): Promise { const { userId, workflowId, workspaceId, userPermission } = options const chatId = @@ -86,6 +122,7 @@ export async function orchestrateSubagentStream( const msgId = requestPayload?.messageId const context = createStreamingContext({ chatId, + requestId: options.simRequestId, messageId: typeof msgId === 'string' ? msgId : generateId(), }) @@ -99,6 +136,7 @@ export async function orchestrateSubagentStream( headers: { 'Content-Type': 'application/json', ...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}), + 'X-Client-Version': SIM_AGENT_VERSION, }, body: JSON.stringify({ ...requestPayload, @@ -148,7 +186,10 @@ export async function orchestrateSubagentStream( return result } catch (error) { const err = error instanceof Error ? error : new Error('Subagent orchestration failed') - logger.error('Subagent orchestration failed', { error: err.message, agentId }) + logger.error('Subagent orchestration failed', { + error: err.message, + agentId, + }) await options.onError?.(err) return { success: false, diff --git a/apps/sim/lib/copilot/request/tools/executor.ts b/apps/sim/lib/copilot/request/tools/executor.ts index bc1cb26cde9..47d08fb7b4e 100644 --- a/apps/sim/lib/copilot/request/tools/executor.ts +++ b/apps/sim/lib/copilot/request/tools/executor.ts @@ -17,7 +17,9 @@ import { MothershipStreamV1ToolPhase, } from '@/lib/copilot/generated/mothership-stream-v1' import { CreateWorkflow } from '@/lib/copilot/generated/tool-catalog-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' import { publishToolConfirmation } from '@/lib/copilot/persistence/tool-confirm' +import { withCopilotToolSpan } from '@/lib/copilot/request/otel' import { markToolResultSeen } from '@/lib/copilot/request/sse-utils' import { getToolCallStateOutput, @@ -53,6 +55,81 @@ function hasOutputValue(result: { output?: unknown } | undefined): result is { o return result !== undefined && Object.hasOwn(result, 'output') } +interface ToolResultSpanSummary { + resultSuccess: boolean + outputBytes: number + outputKind: string + errorMessage?: string + imageCount?: number + imageBytes?: number + attachmentMediaType?: string +} + +function summarizeToolResultForSpan(result: { + success: boolean + output?: unknown + error?: string +}): ToolResultSpanSummary { + const summary: ToolResultSpanSummary = { + resultSuccess: Boolean(result.success), + outputBytes: 0, + outputKind: 'none', + } + if (!result.success && result.error) { + summary.errorMessage = String(result.error).slice(0, 500) + } + if (!hasOutputValue(result)) { + return summary + } + const output = (result as { output: unknown }).output + if (typeof output === 'string') { + summary.outputKind = 'string' + summary.outputBytes = output.length + } else if (output && typeof output === 'object') { + summary.outputKind = Array.isArray(output) ? 'array' : 'object' + try { + summary.outputBytes = JSON.stringify(output).length + } catch { + summary.outputBytes = 0 + } + const attachment = extractAttachmentShape(output) + if (attachment) { + summary.imageCount = attachment.imageCount + summary.imageBytes = attachment.imageBytes + if (attachment.mediaType) { + summary.attachmentMediaType = attachment.mediaType + } + } + } else if (output !== undefined && output !== null) { + summary.outputKind = typeof output + summary.outputBytes = String(output).length + } + return summary +} + +function extractAttachmentShape( + output: unknown +): { imageCount: number; imageBytes: number; mediaType?: string } | null { + if (!isRecord(output)) return null + const candidate = (output as Record).attachment + if (!isRecord(candidate)) return null + const source = (candidate as Record).source + if (!isRecord(source)) return null + const type = + typeof (candidate as Record).type === 'string' + ? ((candidate as Record).type as string) + : '' + if (type !== 'image') return null + const mediaType = + typeof source.media_type === 'string' ? (source.media_type as string) : undefined + const data = typeof source.data === 'string' ? (source.data as string) : '' + return { + imageCount: 1, + imageBytes: data.length, + mediaType, + } +} + function buildCompletionSignal(input: { status: AsyncCompletionSignal['status'] message?: string @@ -163,6 +240,44 @@ export async function executeToolAndReport( message: 'Tool call not found', }) + const argsPayload = toolCall.params + ? (() => { + try { + return JSON.stringify(toolCall.params) + } catch { + return undefined + } + })() + : undefined + return withCopilotToolSpan( + { + toolName: toolCall.name, + toolCallId: toolCall.id, + runId: context.runId, + chatId: execContext.chatId, + argsBytes: argsPayload?.length, + argsPreview: argsPayload?.slice(0, 200), + }, + async (otelSpan) => { + const completion = await executeToolAndReportInner(toolCall, context, execContext, options) + otelSpan.setAttribute(TraceAttr.ToolOutcome, completion.status) + if (completion.message) { + otelSpan.setAttribute( + TraceAttr.ToolOutcomeMessage, + String(completion.message).slice(0, 500) + ) + } + return completion + } + ) +} + +async function executeToolAndReportInner( + toolCall: ToolCallState, + context: StreamingContext, + execContext: ExecutionContext, + options?: OrchestratorOptions +): Promise { if (toolCall.status === 'executing') { return buildCompletionSignal({ status: MothershipStreamV1AsyncToolRecordStatus.running, @@ -376,6 +491,11 @@ export async function executeToolAndReport( endToolSpan('cancelled', { cancelReason: 'abort_during_post_processing_csv' }) return cancelledCompletion('Request aborted during tool post-processing') } + toolSpan.attributes = { + ...toolSpan.attributes, + ...summarizeToolResultForSpan(result), + } + setTerminalToolCallState(toolCall, { status: result.success ? MothershipStreamV1ToolOutcome.success diff --git a/apps/sim/lib/copilot/request/tools/files.ts b/apps/sim/lib/copilot/request/tools/files.ts index 64bb4bf6a1d..80c4371cd5f 100644 --- a/apps/sim/lib/copilot/request/tools/files.ts +++ b/apps/sim/lib/copilot/request/tools/files.ts @@ -1,5 +1,10 @@ import { createLogger } from '@sim/logger' import { FunctionExecute, UserTable } from '@/lib/copilot/generated/tool-catalog-v1' +import { CopilotOutputFileOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { withCopilotSpan } from '@/lib/copilot/request/otel' import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/request/types' import { uploadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' @@ -146,55 +151,80 @@ export async function maybeWriteOutputToFile( const explicitFormat = (params?.outputFormat as string | undefined) ?? (args?.outputFormat as string | undefined) - try { - const fileName = normalizeOutputWorkspaceFileName(outputPath) - const format = resolveOutputFormat(fileName, explicitFormat) - if (context.abortSignal?.aborted) { - throw new Error('Request aborted before tool mutation could be applied') - } - const content = serializeOutputForFile(result.output, format) - const contentType = FORMAT_TO_CONTENT_TYPE[format] - - const buffer = Buffer.from(content, 'utf-8') - if (context.abortSignal?.aborted) { - throw new Error('Request aborted before tool mutation could be applied') - } - const uploaded = await uploadWorkspaceFile( - context.workspaceId, - context.userId, - buffer, - fileName, - contentType - ) - - logger.info('Tool output written to file', { - toolName, - fileName, - size: buffer.length, - fileId: uploaded.id, - }) - - return { - success: true, - output: { - message: `Output written to files/${fileName} (${buffer.length} bytes)`, - fileId: uploaded.id, - fileName, - size: buffer.length, - downloadUrl: uploaded.url, - }, - resources: [{ type: 'file', id: uploaded.id, title: fileName }], - } - } catch (err) { - const message = err instanceof Error ? err.message : String(err) - logger.warn('Failed to write tool output to file', { - toolName, - outputPath, - error: message, - }) - return { - success: false, - error: `Failed to write output file: ${message}`, + // Only span the actual write path (where we upload to storage). Fast + // no-op returns above don't need a span — they'd just pad the trace + // with empty work. + return withCopilotSpan( + TraceSpan.CopilotToolsWriteOutputFile, + { + [TraceAttr.ToolName]: toolName, + [TraceAttr.WorkspaceId]: context.workspaceId, + }, + async (span) => { + try { + const fileName = normalizeOutputWorkspaceFileName(outputPath) + const format = resolveOutputFormat(fileName, explicitFormat) + span.setAttributes({ + [TraceAttr.CopilotOutputFileName]: fileName, + [TraceAttr.CopilotOutputFileFormat]: format, + }) + if (context.abortSignal?.aborted) { + throw new Error('Request aborted before tool mutation could be applied') + } + const content = serializeOutputForFile(result.output, format) + const contentType = FORMAT_TO_CONTENT_TYPE[format] + + const buffer = Buffer.from(content, 'utf-8') + span.setAttribute(TraceAttr.CopilotOutputFileBytes, buffer.length) + if (context.abortSignal?.aborted) { + throw new Error('Request aborted before tool mutation could be applied') + } + const uploaded = await uploadWorkspaceFile( + context.workspaceId!, + context.userId!, + buffer, + fileName, + contentType + ) + span.setAttributes({ + [TraceAttr.CopilotOutputFileId]: uploaded.id, + [TraceAttr.CopilotOutputFileOutcome]: CopilotOutputFileOutcome.Uploaded, + }) + + logger.info('Tool output written to file', { + toolName, + fileName, + size: buffer.length, + fileId: uploaded.id, + }) + + return { + success: true, + output: { + message: `Output written to files/${fileName} (${buffer.length} bytes)`, + fileId: uploaded.id, + fileName, + size: buffer.length, + downloadUrl: uploaded.url, + }, + resources: [{ type: 'file', id: uploaded.id, title: fileName }], + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err) + logger.warn('Failed to write tool output to file', { + toolName, + outputPath, + error: message, + }) + span.setAttribute(TraceAttr.CopilotOutputFileOutcome, CopilotOutputFileOutcome.Failed) + span.addEvent(TraceEvent.CopilotOutputFileError, { + [TraceAttr.ErrorMessage]: message.slice(0, 500), + }) + return { + success: false, + error: `Failed to write output file: ${message}`, + } + } } - } + ) } diff --git a/apps/sim/lib/copilot/request/tools/resources.ts b/apps/sim/lib/copilot/request/tools/resources.ts index b14f0caf79e..15fd19e6552 100644 --- a/apps/sim/lib/copilot/request/tools/resources.ts +++ b/apps/sim/lib/copilot/request/tools/resources.ts @@ -3,6 +3,9 @@ import { MothershipStreamV1EventType, MothershipStreamV1ResourceOp, } from '@/lib/copilot/generated/mothership-stream-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { withCopilotSpan } from '@/lib/copilot/request/otel' import type { StreamEvent, ToolCallResult } from '@/lib/copilot/request/types' import { extractDeletedResourcesFromToolResult, @@ -29,63 +32,102 @@ export async function handleResourceSideEffects( onEvent: ((event: StreamEvent) => void | Promise) | undefined, isAborted: () => boolean ): Promise { - let isDeleteOp = false + // Cheap early exit so we don't emit a span for tools that can never + // produce resources (most of them). The span only shows up for tools + // that might actually do resource work. + if ( + !hasDeleteCapability(toolName) && + !isResourceToolName(toolName) && + !(result.resources && result.resources.length > 0) + ) { + return + } - if (hasDeleteCapability(toolName)) { - const deleted = extractDeletedResourcesFromToolResult(toolName, params, result.output) - if (deleted.length > 0) { - isDeleteOp = true - removeChatResources(chatId, deleted).catch((err) => { - logger.warn('Failed to remove chat resources after deletion', { - chatId, - error: err instanceof Error ? err.message : String(err), - }) - }) + return withCopilotSpan( + TraceSpan.CopilotToolsHandleResourceSideEffects, + { + [TraceAttr.ToolName]: toolName, + [TraceAttr.ChatId]: chatId, + }, + async (span) => { + let isDeleteOp = false + let removedCount = 0 + let upsertedCount = 0 + + if (hasDeleteCapability(toolName)) { + const deleted = extractDeletedResourcesFromToolResult(toolName, params, result.output) + if (deleted.length > 0) { + isDeleteOp = true + removedCount = deleted.length + // Detached from the span lifecycle — the span ends before the + // DB call completes. That is intentional; we want the span to + // reflect the synchronous decision + event emission, not the + // best-effort persistence. + removeChatResources(chatId, deleted).catch((err) => { + logger.warn('Failed to remove chat resources after deletion', { + chatId, + error: err instanceof Error ? err.message : String(err), + }) + }) - for (const resource of deleted) { - if (isAborted()) break - await onEvent?.({ - type: MothershipStreamV1EventType.resource, - payload: { - op: MothershipStreamV1ResourceOp.remove, - resource: { type: resource.type, id: resource.id, title: resource.title }, - }, - }) + for (const resource of deleted) { + if (isAborted()) break + await onEvent?.({ + type: MothershipStreamV1EventType.resource, + payload: { + op: MothershipStreamV1ResourceOp.remove, + resource: { type: resource.type, id: resource.id, title: resource.title }, + }, + }) + } + } } - } - } - if (!isDeleteOp && !isAborted()) { - const resources = - result.resources && result.resources.length > 0 - ? result.resources - : isResourceToolName(toolName) - ? extractResourcesFromToolResult(toolName, params, result.output) - : [] + if (!isDeleteOp && !isAborted()) { + const resources = + result.resources && result.resources.length > 0 + ? result.resources + : isResourceToolName(toolName) + ? extractResourcesFromToolResult(toolName, params, result.output) + : [] - if (resources.length > 0) { - logger.info('[file-stream-server] Emitting resource upsert events', { - toolName, - chatId, - resources: resources.map((r) => ({ type: r.type, id: r.id, title: r.title })), - }) - persistChatResources(chatId, resources).catch((err) => { - logger.warn('Failed to persist chat resources', { - chatId, - error: err instanceof Error ? err.message : String(err), - }) - }) + if (resources.length > 0) { + upsertedCount = resources.length + logger.info('[file-stream-server] Emitting resource upsert events', { + toolName, + chatId, + resources: resources.map((r) => ({ type: r.type, id: r.id, title: r.title })), + }) + persistChatResources(chatId, resources).catch((err) => { + logger.warn('Failed to persist chat resources', { + chatId, + error: err instanceof Error ? err.message : String(err), + }) + }) - for (const resource of resources) { - if (isAborted()) break - await onEvent?.({ - type: MothershipStreamV1EventType.resource, - payload: { - op: MothershipStreamV1ResourceOp.upsert, - resource: { type: resource.type, id: resource.id, title: resource.title }, - }, - }) + for (const resource of resources) { + if (isAborted()) break + await onEvent?.({ + type: MothershipStreamV1EventType.resource, + payload: { + op: MothershipStreamV1ResourceOp.upsert, + resource: { type: resource.type, id: resource.id, title: resource.title }, + }, + }) + } + } } + + span.setAttributes({ + [TraceAttr.CopilotResourcesOp]: isDeleteOp + ? 'delete' + : upsertedCount > 0 + ? 'upsert' + : 'none', + [TraceAttr.CopilotResourcesRemovedCount]: removedCount, + [TraceAttr.CopilotResourcesUpsertedCount]: upsertedCount, + [TraceAttr.CopilotResourcesAborted]: isAborted(), + }) } - } + ) } diff --git a/apps/sim/lib/copilot/request/tools/tables.ts b/apps/sim/lib/copilot/request/tools/tables.ts index 89e0a5c19f0..2949a66a9eb 100644 --- a/apps/sim/lib/copilot/request/tools/tables.ts +++ b/apps/sim/lib/copilot/request/tools/tables.ts @@ -4,6 +4,11 @@ import { createLogger } from '@sim/logger' import { parse as csvParse } from 'csv-parse/sync' import { eq } from 'drizzle-orm' import { FunctionExecute, Read as ReadTool } from '@/lib/copilot/generated/tool-catalog-v1' +import { CopilotTableOutcome } from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { withCopilotSpan } from '@/lib/copilot/request/otel' import type { ExecutionContext, ToolCallResult } from '@/lib/copilot/request/types' import { getTableById } from '@/lib/table/service' @@ -25,105 +30,126 @@ export async function maybeWriteOutputToTable( const outputTable = params?.outputTable as string | undefined if (!outputTable) return result - try { - const table = await getTableById(outputTable) - if (!table) { - return { - success: false, - error: `Table "${outputTable}" not found`, - } - } + return withCopilotSpan( + TraceSpan.CopilotToolsWriteOutputTable, + { + [TraceAttr.ToolName]: toolName, + [TraceAttr.CopilotTableId]: outputTable, + [TraceAttr.WorkspaceId]: context.workspaceId, + }, + async (span) => { + try { + const table = await getTableById(outputTable) + if (!table) { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.TableNotFound) + return { + success: false, + error: `Table "${outputTable}" not found`, + } + } - const rawOutput = result.output - let rows: Array> + const rawOutput = result.output + let rows: Array> - if (rawOutput && typeof rawOutput === 'object' && 'result' in rawOutput) { - const inner = (rawOutput as Record).result - if (Array.isArray(inner)) { - rows = inner - } else { - return { - success: false, - error: 'outputTable requires the code to return an array of objects', + if (rawOutput && typeof rawOutput === 'object' && 'result' in rawOutput) { + const inner = (rawOutput as Record).result + if (Array.isArray(inner)) { + rows = inner + } else { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidShape) + return { + success: false, + error: 'outputTable requires the code to return an array of objects', + } + } + } else if (Array.isArray(rawOutput)) { + rows = rawOutput + } else { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidShape) + return { + success: false, + error: 'outputTable requires the code to return an array of objects', + } } - } - } else if (Array.isArray(rawOutput)) { - rows = rawOutput - } else { - return { - success: false, - error: 'outputTable requires the code to return an array of objects', - } - } - if (rows.length > MAX_OUTPUT_TABLE_ROWS) { - return { - success: false, - error: `outputTable row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`, - } - } + span.setAttribute(TraceAttr.CopilotTableRowCount, rows.length) - if (rows.length === 0) { - return { - success: false, - error: 'outputTable requires at least one row — code returned an empty array', - } - } + if (rows.length > MAX_OUTPUT_TABLE_ROWS) { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.RowLimitExceeded) + return { + success: false, + error: `outputTable row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`, + } + } - if (context.abortSignal?.aborted) { - throw new Error('Request aborted before tool mutation could be applied') - } - await db.transaction(async (tx) => { - if (context.abortSignal?.aborted) { - throw new Error('Request aborted before tool mutation could be applied') - } - await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable)) + if (rows.length === 0) { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyRows) + return { + success: false, + error: 'outputTable requires at least one row — code returned an empty array', + } + } - const now = new Date() - for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) { if (context.abortSignal?.aborted) { throw new Error('Request aborted before tool mutation could be applied') } - const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE) - const values = chunk.map((rowData, j) => ({ - id: `row_${crypto.randomUUID().replace(/-/g, '')}`, + await db.transaction(async (tx) => { + if (context.abortSignal?.aborted) { + throw new Error('Request aborted before tool mutation could be applied') + } + await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable)) + + const now = new Date() + for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) { + if (context.abortSignal?.aborted) { + throw new Error('Request aborted before tool mutation could be applied') + } + const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE) + const values = chunk.map((rowData, j) => ({ + id: `row_${crypto.randomUUID().replace(/-/g, '')}`, + tableId: outputTable, + workspaceId: context.workspaceId!, + data: rowData, + position: i + j, + createdAt: now, + updatedAt: now, + createdBy: context.userId, + })) + await tx.insert(userTableRows).values(values) + } + }) + + logger.info('Tool output written to table', { + toolName, tableId: outputTable, - workspaceId: context.workspaceId!, - data: rowData, - position: i + j, - createdAt: now, - updatedAt: now, - createdBy: context.userId, - })) - await tx.insert(userTableRows).values(values) + rowCount: rows.length, + }) + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Wrote) + return { + success: true, + output: { + message: `Wrote ${rows.length} rows to table ${outputTable}`, + tableId: outputTable, + rowCount: rows.length, + }, + } + } catch (err) { + logger.warn('Failed to write tool output to table', { + toolName, + outputTable, + error: err instanceof Error ? err.message : String(err), + }) + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Failed) + span.addEvent(TraceEvent.CopilotTableError, { + [TraceAttr.ErrorMessage]: (err instanceof Error ? err.message : String(err)).slice(0, 500), + }) + return { + success: false, + error: `Failed to write to table: ${err instanceof Error ? err.message : String(err)}`, + } } - }) - - logger.info('Tool output written to table', { - toolName, - tableId: outputTable, - rowCount: rows.length, - }) - - return { - success: true, - output: { - message: `Wrote ${rows.length} rows to table ${outputTable}`, - tableId: outputTable, - rowCount: rows.length, - }, - } - } catch (err) { - logger.warn('Failed to write tool output to table', { - toolName, - outputTable, - error: err instanceof Error ? err.message : String(err), - }) - return { - success: false, - error: `Failed to write to table: ${err instanceof Error ? err.message : String(err)}`, } - } + ) } export async function maybeWriteReadCsvToTable( @@ -139,110 +165,136 @@ export async function maybeWriteReadCsvToTable( const outputTable = params?.outputTable as string | undefined if (!outputTable) return result - try { - const table = await getTableById(outputTable) - if (!table) { - return { success: false, error: `Table "${outputTable}" not found` } - } + return withCopilotSpan( + TraceSpan.CopilotToolsWriteCsvToTable, + { + [TraceAttr.ToolName]: toolName, + [TraceAttr.CopilotTableId]: outputTable, + [TraceAttr.WorkspaceId]: context.workspaceId, + }, + async (span) => { + try { + const table = await getTableById(outputTable) + if (!table) { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.TableNotFound) + return { success: false, error: `Table "${outputTable}" not found` } + } - const output = result.output as Record - const content = (output.content as string) || '' - if (!content.trim()) { - return { success: false, error: 'File has no content to import into table' } - } + const output = result.output as Record + const content = (output.content as string) || '' + if (!content.trim()) { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyContent) + return { success: false, error: 'File has no content to import into table' } + } - const filePath = (params?.path as string) || '' - const ext = filePath.split('.').pop()?.toLowerCase() + const filePath = (params?.path as string) || '' + const ext = filePath.split('.').pop()?.toLowerCase() + span.setAttributes({ + [TraceAttr.CopilotTableSourcePath]: filePath, + [TraceAttr.CopilotTableSourceFormat]: ext === 'json' ? 'json' : 'csv', + [TraceAttr.CopilotTableSourceContentBytes]: content.length, + }) - let rows: Record[] + let rows: Record[] - if (ext === 'json') { - const parsed = JSON.parse(content) - if (!Array.isArray(parsed)) { - return { - success: false, - error: 'JSON file must contain an array of objects for table import', + if (ext === 'json') { + const parsed = JSON.parse(content) + if (!Array.isArray(parsed)) { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.InvalidJsonShape) + return { + success: false, + error: 'JSON file must contain an array of objects for table import', + } + } + rows = parsed + } else { + rows = csvParse(content, { + columns: true, + skip_empty_lines: true, + trim: true, + relax_column_count: true, + relax_quotes: true, + skip_records_with_error: true, + cast: false, + }) as Record[] } - } - rows = parsed - } else { - rows = csvParse(content, { - columns: true, - skip_empty_lines: true, - trim: true, - relax_column_count: true, - relax_quotes: true, - skip_records_with_error: true, - cast: false, - }) as Record[] - } - if (rows.length === 0) { - return { success: false, error: 'File has no data rows to import' } - } + span.setAttribute(TraceAttr.CopilotTableRowCount, rows.length) - if (rows.length > MAX_OUTPUT_TABLE_ROWS) { - return { - success: false, - error: `Row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`, - } - } + if (rows.length === 0) { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.EmptyRows) + return { success: false, error: 'File has no data rows to import' } + } - if (context.abortSignal?.aborted) { - throw new Error('Request aborted before tool mutation could be applied') - } - await db.transaction(async (tx) => { - if (context.abortSignal?.aborted) { - throw new Error('Request aborted before tool mutation could be applied') - } - await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable)) + if (rows.length > MAX_OUTPUT_TABLE_ROWS) { + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.RowLimitExceeded) + return { + success: false, + error: `Row limit exceeded: got ${rows.length}, max is ${MAX_OUTPUT_TABLE_ROWS}`, + } + } - const now = new Date() - for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) { if (context.abortSignal?.aborted) { throw new Error('Request aborted before tool mutation could be applied') } - const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE) - const values = chunk.map((rowData, j) => ({ - id: `row_${crypto.randomUUID().replace(/-/g, '')}`, + await db.transaction(async (tx) => { + if (context.abortSignal?.aborted) { + throw new Error('Request aborted before tool mutation could be applied') + } + await tx.delete(userTableRows).where(eq(userTableRows.tableId, outputTable)) + + const now = new Date() + for (let i = 0; i < rows.length; i += BATCH_CHUNK_SIZE) { + if (context.abortSignal?.aborted) { + throw new Error('Request aborted before tool mutation could be applied') + } + const chunk = rows.slice(i, i + BATCH_CHUNK_SIZE) + const values = chunk.map((rowData, j) => ({ + id: `row_${crypto.randomUUID().replace(/-/g, '')}`, + tableId: outputTable, + workspaceId: context.workspaceId!, + data: rowData, + position: i + j, + createdAt: now, + updatedAt: now, + createdBy: context.userId, + })) + await tx.insert(userTableRows).values(values) + } + }) + + logger.info('Read output written to table', { + toolName, tableId: outputTable, - workspaceId: context.workspaceId!, - data: rowData, - position: i + j, - createdAt: now, - updatedAt: now, - createdBy: context.userId, - })) - await tx.insert(userTableRows).values(values) + tableName: table.name, + rowCount: rows.length, + filePath, + }) + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Imported) + return { + success: true, + output: { + message: `Imported ${rows.length} rows from "${filePath}" into table "${table.name}"`, + tableId: outputTable, + tableName: table.name, + rowCount: rows.length, + }, + } + } catch (err) { + logger.warn('Failed to write read output to table', { + toolName, + outputTable, + error: err instanceof Error ? err.message : String(err), + }) + span.setAttribute(TraceAttr.CopilotTableOutcome, CopilotTableOutcome.Failed) + span.addEvent(TraceEvent.CopilotTableError, { + [TraceAttr.ErrorMessage]: (err instanceof Error ? err.message : String(err)).slice(0, 500), + }) + return { + success: false, + error: `Failed to import into table: ${err instanceof Error ? err.message : String(err)}`, + } } - }) - - logger.info('Read output written to table', { - toolName, - tableId: outputTable, - tableName: table.name, - rowCount: rows.length, - filePath, - }) - - return { - success: true, - output: { - message: `Imported ${rows.length} rows from "${filePath}" into table "${table.name}"`, - tableId: outputTable, - tableName: table.name, - rowCount: rows.length, - }, - } - } catch (err) { - logger.warn('Failed to write read output to table', { - toolName, - outputTable, - error: err instanceof Error ? err.message : String(err), - }) - return { - success: false, - error: `Failed to import into table: ${err instanceof Error ? err.message : String(err)}`, } - } + ) } diff --git a/apps/sim/lib/copilot/request/trace.ts b/apps/sim/lib/copilot/request/trace.ts index 8f74f743762..cb399959d7d 100644 --- a/apps/sim/lib/copilot/request/trace.ts +++ b/apps/sim/lib/copilot/request/trace.ts @@ -1,3 +1,4 @@ +import type { Context } from '@opentelemetry/api' import { createLogger } from '@sim/logger' import { SIM_AGENT_API_URL } from '@/lib/copilot/constants' import { @@ -9,6 +10,7 @@ import { RequestTraceV1SpanStatus, type RequestTraceV1UsageSummary, } from '@/lib/copilot/generated/request-trace-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' import { env } from '@/lib/core/config/env' const logger = createLogger('RequestTrace') @@ -71,6 +73,13 @@ export class TraceCollector { chatId?: string runId?: string executionId?: string + // Original user prompt, surfaced on the `request_traces.message` + // column at row-insert time so it's queryable from the DB without + // going through Tempo. Sim already has this at chat-POST time; it's + // threaded through here to the trace report so the row is complete + // the moment it's first written instead of waiting on the late + // analytics UPDATE. + userMessage?: string usage?: { prompt: number; completion: number } cost?: { input: number; output: number; total: number } }): RequestTraceV1SimReport { @@ -96,6 +105,7 @@ export class TraceCollector { chatId: params.chatId, runId: params.runId, executionId: params.executionId, + ...(params.userMessage ? { userMessage: params.userMessage } : {}), startMs: this.startMs, endMs, durationMs: endMs - this.startMs, @@ -107,14 +117,27 @@ export class TraceCollector { } } -export async function reportTrace(trace: RequestTraceV1SimReport): Promise { - const response = await fetch(`${SIM_AGENT_API_URL}/api/traces`, { +export async function reportTrace( + trace: RequestTraceV1SimReport, + otelContext?: Context +): Promise { + const { fetchGo } = await import('@/lib/copilot/request/go/fetch') + const body = JSON.stringify(trace) + const response = await fetchGo(`${SIM_AGENT_API_URL}/api/traces`, { method: 'POST', headers: { 'Content-Type': 'application/json', ...(env.COPILOT_API_KEY ? { 'x-api-key': env.COPILOT_API_KEY } : {}), }, - body: JSON.stringify(trace), + body, + otelContext, + spanName: 'sim → go /api/traces', + operation: 'report_trace', + attributes: { + [TraceAttr.RequestId]: trace.simRequestId ?? '', + [TraceAttr.HttpRequestContentLength]: body.length, + [TraceAttr.CopilotTraceSpanCount]: trace.spans?.length ?? 0, + }, }) if (!response.ok) { diff --git a/apps/sim/lib/copilot/request/types.ts b/apps/sim/lib/copilot/request/types.ts index 87416c5e4f4..fd296cd52ca 100644 --- a/apps/sim/lib/copilot/request/types.ts +++ b/apps/sim/lib/copilot/request/types.ts @@ -1,5 +1,6 @@ import type { AsyncCompletionSignal } from '@/lib/copilot/async-runs/lifecycle' import { MothershipStreamV1ToolOutcome } from '@/lib/copilot/generated/mothership-stream-v1' +import type { RequestTraceV1Span } from '@/lib/copilot/generated/request-trace-v1' import type { StreamEvent } from '@/lib/copilot/request/session' import type { TraceCollector } from '@/lib/copilot/request/trace' import type { ToolExecutionContext, ToolExecutionResult } from '@/lib/copilot/tool-executor/types' @@ -99,6 +100,7 @@ export interface StreamingContext { edit?: Record } | null trace: TraceCollector + subAgentTraceSpans?: Map } export interface FileAttachment { @@ -138,6 +140,19 @@ export interface OrchestratorOptions { export interface OrchestratorResult { success: boolean + /** + * True iff the non-success outcome was a user-initiated cancel + * (abort signal fired or client disconnected). Lets callers treat + * cancels differently from actual errors — notably, `buildOnComplete` + * must NOT finalize the chat row on cancel, because the browser's + * `/api/copilot/chat/stop` POST owns writing the partial assistant + * content and clearing `conversationId` in one UPDATE. Finalizing + * here would race and clear `conversationId` first, making the stop + * UPDATE match zero rows and the partial content vanish on refetch. + * + * Always false when `success=true`. + */ + cancelled?: boolean content: string contentBlocks: ContentBlock[] toolCalls: ToolCallSummary[] diff --git a/apps/sim/lib/copilot/tools/client/run-tool-execution.ts b/apps/sim/lib/copilot/tools/client/run-tool-execution.ts index 860dc7f0184..ae1a36b5f1e 100644 --- a/apps/sim/lib/copilot/tools/client/run-tool-execution.ts +++ b/apps/sim/lib/copilot/tools/client/run-tool-execution.ts @@ -7,6 +7,7 @@ import { RunFromBlock, RunWorkflowUntilBlock, } from '@/lib/copilot/generated/tool-catalog-v1' +import { traceparentHeader } from '@/lib/copilot/tools/client/trace-context' import { generateId } from '@/lib/core/utils/uuid' import { executeWorkflowWithFullLogging } from '@/app/workspace/[workspaceId]/w/[workflowId]/utils/workflow-execution-utils' import { useExecutionStore } from '@/stores/execution/store' @@ -465,7 +466,7 @@ async function reportCompletion( }) const res = await fetch(COPILOT_CONFIRM_API_PATH, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { 'Content-Type': 'application/json', ...traceparentHeader() }, body, }) const LARGE_PAYLOAD_THRESHOLD = 10 * 1024 * 1024 @@ -479,7 +480,7 @@ async function reportCompletion( }) const retryRes = await fetch(COPILOT_CONFIRM_API_PATH, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { 'Content-Type': 'application/json', ...traceparentHeader() }, body: JSON.stringify({ toolCallId, status, diff --git a/apps/sim/lib/copilot/tools/client/trace-context.ts b/apps/sim/lib/copilot/tools/client/trace-context.ts new file mode 100644 index 00000000000..30c6eecdba6 --- /dev/null +++ b/apps/sim/lib/copilot/tools/client/trace-context.ts @@ -0,0 +1,17 @@ +// Browser-side W3C traceparent holder for the active copilot chat. +// Module-level singleton because client tool callbacks fire from deep +// inside runtime code that can't thread a React ref. The browser only +// has one active chat at a time (gated by the stop-barrier), so a +// singleton is safe. + +let currentTraceparent: string | undefined + +export function setCurrentChatTraceparent(value: string | undefined): void { + currentTraceparent = value +} + +// `fetch` header spread: `headers: { ...traceparentHeader(), ... }`. +export function traceparentHeader(): Record { + const tp = currentTraceparent + return tp ? { traceparent: tp } : {} +} diff --git a/apps/sim/lib/copilot/tools/handlers/vfs.test.ts b/apps/sim/lib/copilot/tools/handlers/vfs.test.ts index 7ec15c7d087..8cc02e65751 100644 --- a/apps/sim/lib/copilot/tools/handlers/vfs.test.ts +++ b/apps/sim/lib/copilot/tools/handlers/vfs.test.ts @@ -56,13 +56,14 @@ describe('vfs handlers oversize policy', () => { expect(result.error).toContain('context window') }) - it('fails oversized read results with grep guidance', async () => { + it('fails oversized read results from VFS with grep guidance', async () => { const vfs = makeVfs() + vfs.readFileContent.mockResolvedValue(null) vfs.read.mockReturnValue({ content: OVERSIZED_INLINE_CONTENT, totalLines: 1 }) getOrMaterializeVFS.mockResolvedValue(vfs) const result = await executeVfsRead( - { path: 'files/big.txt' }, + { path: 'workflows/My Workflow/state.json' }, { userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' } ) @@ -72,9 +73,8 @@ describe('vfs handlers oversize policy', () => { expect(result.error).toContain('context window') }) - it('fails file-backed oversized read placeholders with grep guidance', async () => { + it('fails file-backed oversized read placeholders with original message', async () => { const vfs = makeVfs() - vfs.read.mockReturnValue(null) vfs.readFileContent.mockResolvedValue({ content: '[File too large to display inline: big.txt (6000000 bytes, limit 5242880)]', totalLines: 1, @@ -87,8 +87,46 @@ describe('vfs handlers oversize policy', () => { ) expect(result.success).toBe(false) - expect(result.error).toContain('Use grep') - expect(result.error).toContain('offset/limit') - expect(result.error).toContain('context window') + expect(result.error).toContain('File too large to display inline') + expect(result.error).toContain('big.txt') + }) + + it('passes through image reads with attachment even when oversized', async () => { + const vfs = makeVfs() + const largeBase64 = 'A'.repeat(TOOL_RESULT_MAX_INLINE_CHARS + 1) + vfs.readFileContent.mockResolvedValue({ + content: 'Image: chess.png (500.0KB, image/png)', + totalLines: 1, + attachment: { + type: 'image', + source: { type: 'base64', media_type: 'image/png', data: largeBase64 }, + }, + }) + getOrMaterializeVFS.mockResolvedValue(vfs) + + const result = await executeVfsRead( + { path: 'files/chess.png' }, + { userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' } + ) + + expect(result.success).toBe(true) + expect((result.output as { attachment?: { type: string } })?.attachment?.type).toBe('image') + }) + + it('fails oversized image placeholder when image exceeds size limit', async () => { + const vfs = makeVfs() + vfs.readFileContent.mockResolvedValue({ + content: '[Image too large: huge.png (10.0MB, limit 5MB)]', + totalLines: 1, + }) + getOrMaterializeVFS.mockResolvedValue(vfs) + + const result = await executeVfsRead( + { path: 'files/huge.png' }, + { userId: 'user-1', workflowId: 'wf-1', workspaceId: 'ws-1' } + ) + + expect(result.success).toBe(false) + expect(result.error).toContain('too large') }) }) diff --git a/apps/sim/lib/copilot/tools/handlers/vfs.ts b/apps/sim/lib/copilot/tools/handlers/vfs.ts index 67430f61fd2..a3a1245edba 100644 --- a/apps/sim/lib/copilot/tools/handlers/vfs.ts +++ b/apps/sim/lib/copilot/tools/handlers/vfs.ts @@ -160,21 +160,30 @@ export async function executeVfsRead( const filename = path.slice('uploads/'.length) const uploadResult = await readChatUpload(filename, context.chatId) if (uploadResult) { + const isImage = hasImageAttachment(uploadResult) if ( - !hasImageAttachment(uploadResult) && + !isImage && (isOversizedReadPlaceholder(uploadResult.content) || serializedResultSize(uploadResult) > TOOL_RESULT_MAX_INLINE_CHARS) ) { + logger.warn('Upload read result too large', { + path, + hasAttachment: isImage, + contentLength: uploadResult.content.length, + serializedSize: serializedResultSize(uploadResult), + }) return { success: false, - error: - 'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.', + error: isOversizedReadPlaceholder(uploadResult.content) + ? uploadResult.content + : 'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.', } } const windowedUpload = applyWindow(uploadResult) logger.debug('vfs_read resolved chat upload', { path, totalLines: uploadResult.totalLines, + hasAttachment: isImage, offset, limit, }) @@ -187,34 +196,47 @@ export async function executeVfsRead( } const vfs = await getOrMaterializeVFS(workspaceId, context.userId) - const result = vfs.read(path, offset, limit) - if (!result) { - const fileContent = await vfs.readFileContent(path) - if (fileContent) { - if ( - !hasImageAttachment(fileContent) && - (isOversizedReadPlaceholder(fileContent.content) || - serializedResultSize(fileContent) > TOOL_RESULT_MAX_INLINE_CHARS) - ) { - return { - success: false, - error: - 'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.', - } - } - const windowedFileContent = applyWindow(fileContent) - logger.debug('vfs_read resolved workspace file', { + + // For workspace file paths (files/ or recently-deleted/files/), try readFileContent + // first so images, PDFs, and documents get proper attachment/parsing handling rather + // than being served as raw VFS metadata text. + const fileContent = await vfs.readFileContent(path) + if (fileContent) { + const isImage = hasImageAttachment(fileContent) + if ( + !isImage && + (isOversizedReadPlaceholder(fileContent.content) || + serializedResultSize(fileContent) > TOOL_RESULT_MAX_INLINE_CHARS) + ) { + logger.warn('File read result too large', { path, - totalLines: fileContent.totalLines, - offset, - limit, + hasAttachment: isImage, + contentLength: fileContent.content.length, + serializedSize: serializedResultSize(fileContent), }) return { - success: true, - output: windowedFileContent, + success: false, + error: isOversizedReadPlaceholder(fileContent.content) + ? fileContent.content + : 'Read result too large to return inline. Use grep with a more specific pattern or narrower path to locate the relevant section, then retry read with offset/limit. Avoid catch-all greps or full-file reads because they waste context window.', } } + const windowedFileContent = applyWindow(fileContent) + logger.debug('vfs_read resolved workspace file', { + path, + totalLines: fileContent.totalLines, + hasAttachment: isImage, + offset, + limit, + }) + return { + success: true, + output: windowedFileContent, + } + } + const result = vfs.read(path, offset, limit) + if (!result) { const suggestions = vfs.suggestSimilar(path) logger.warn('vfs_read file not found', { path, suggestions }) const hint = diff --git a/apps/sim/lib/copilot/vfs/file-reader.test.ts b/apps/sim/lib/copilot/vfs/file-reader.test.ts new file mode 100644 index 00000000000..1e202d77d5f --- /dev/null +++ b/apps/sim/lib/copilot/vfs/file-reader.test.ts @@ -0,0 +1,94 @@ +/** + * @vitest-environment node + */ + +import { randomFillSync } from 'node:crypto' +import { loggerMock } from '@sim/testing' +import { describe, expect, it, vi } from 'vitest' + +const { downloadWorkspaceFile } = vi.hoisted(() => ({ + downloadWorkspaceFile: vi.fn(), +})) + +vi.mock('@sim/logger', () => loggerMock) +vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({ + downloadWorkspaceFile, +})) + +import { readFileRecord } from '@/lib/copilot/vfs/file-reader' + +const MAX_IMAGE_READ_BYTES = 5 * 1024 * 1024 + +async function makeNoisePng(width: number, height: number): Promise { + const sharp = (await import('sharp')).default + const raw = Buffer.alloc(width * height * 3) + randomFillSync(raw) + return sharp(raw, { raw: { width, height, channels: 3 } }) + .png() + .toBuffer() +} + +describe('readFileRecord', () => { + it('returns small images as attachments without resize note', async () => { + const sharp = (await import('sharp')).default + const smallPng = await sharp({ + create: { + width: 200, + height: 200, + channels: 3, + background: { r: 255, g: 0, b: 0 }, + }, + }) + .png() + .toBuffer() + + downloadWorkspaceFile.mockResolvedValue(smallPng) + + const result = await readFileRecord({ + id: 'wf_small', + workspaceId: 'ws_1', + name: 'small.png', + key: 'uploads/small.png', + path: '/api/files/serve/uploads%2Fsmall.png?context=mothership', + size: smallPng.length, + type: 'image/png', + uploadedBy: 'user_1', + uploadedAt: new Date(), + deletedAt: null, + storageContext: 'mothership', + }) + + expect(result?.attachment?.type).toBe('image') + expect(result?.attachment?.source.media_type).toBe('image/png') + expect(result?.content).not.toContain('resized for vision') + expect(Buffer.from(result?.attachment?.source.data ?? '', 'base64')).toEqual(smallPng) + }) + + it('downscales oversized images into attachments that fit the read limit', async () => { + const largePng = await makeNoisePng(1800, 1800) + expect(largePng.length).toBeGreaterThan(MAX_IMAGE_READ_BYTES) + + downloadWorkspaceFile.mockResolvedValue(largePng) + + const result = await readFileRecord({ + id: 'wf_large', + workspaceId: 'ws_1', + name: 'chesspng.png', + key: 'uploads/chesspng.png', + path: '/api/files/serve/uploads%2Fchesspng.png?context=mothership', + size: largePng.length, + type: 'image/png', + uploadedBy: 'user_1', + uploadedAt: new Date(), + deletedAt: null, + storageContext: 'mothership', + }) + + expect(result?.attachment?.type).toBe('image') + expect(result?.content).toContain('resized for vision') + + const decoded = Buffer.from(result?.attachment?.source.data ?? '', 'base64') + expect(decoded.length).toBeLessThanOrEqual(MAX_IMAGE_READ_BYTES) + expect(result?.attachment?.source.media_type).toMatch(/^image\/(jpeg|webp|png)$/) + }) +}) diff --git a/apps/sim/lib/copilot/vfs/file-reader.ts b/apps/sim/lib/copilot/vfs/file-reader.ts index 00f2e2dc55e..2e6bc59d459 100644 --- a/apps/sim/lib/copilot/vfs/file-reader.ts +++ b/apps/sim/lib/copilot/vfs/file-reader.ts @@ -1,12 +1,38 @@ +import { type Span, trace } from '@opentelemetry/api' import { createLogger } from '@sim/logger' +import { + CopilotVfsOutcome, + CopilotVfsReadOutcome, + CopilotVfsReadPath, +} from '@/lib/copilot/generated/trace-attribute-values-v1' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' +import { TraceEvent } from '@/lib/copilot/generated/trace-events-v1' +import { TraceSpan } from '@/lib/copilot/generated/trace-spans-v1' +import { markSpanForError } from '@/lib/copilot/request/otel' import type { WorkspaceFileRecord } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { downloadWorkspaceFile } from '@/lib/uploads/contexts/workspace/workspace-file-manager' import { isImageFileType } from '@/lib/uploads/utils/file-utils' +// Lazy tracer (same pattern as lib/copilot/request/otel.ts). +function getVfsTracer() { + return trace.getTracer('sim-copilot-vfs', '1.0.0') +} + +function recordSpanError(span: Span, err: unknown) { + markSpanForError(span, err) +} + const logger = createLogger('FileReader') const MAX_TEXT_READ_BYTES = 5 * 1024 * 1024 // 5 MB const MAX_IMAGE_READ_BYTES = 5 * 1024 * 1024 // 5 MB +// Parseable-document byte cap. Large office/PDF files can still +// produce huge extracted text; reject up front to avoid wasting a +// download + parse only to blow past the tool-result budget. +const MAX_PARSEABLE_READ_BYTES = 5 * 1024 * 1024 // 5 MB +const MAX_IMAGE_DIMENSION = 1568 +const IMAGE_RESIZE_DIMENSIONS = [1568, 1280, 1024, 768] +const IMAGE_QUALITY_STEPS = [85, 70, 55, 40] const TEXT_TYPES = new Set([ 'text/plain', @@ -41,6 +67,194 @@ function detectImageMime(buf: Buffer, claimed: string): string { return claimed } +interface PreparedVisionImage { + buffer: Buffer + mediaType: string + resized: boolean +} + +/** + * Prepare an image for vision models: detect media type, optionally + * resize/compress with sharp, and return the prepared buffer. + * + * Wrapped in a `copilot.vfs.prepare_image` span so the external trace + * shows exactly when an image read blocked the request on CPU-heavy + * encode attempts. Attributes record input dimensions, whether a resize + * was needed, how many encode attempts it took, and the final + * dimension/quality chosen. + */ +async function prepareImageForVision( + buffer: Buffer, + claimedType: string +): Promise { + return getVfsTracer().startActiveSpan( + TraceSpan.CopilotVfsPrepareImage, + { + attributes: { + [TraceAttr.CopilotVfsInputBytes]: buffer.length, + [TraceAttr.CopilotVfsInputMediaTypeClaimed]: claimedType, + }, + }, + async (span) => { + try { + const mediaType = detectImageMime(buffer, claimedType) + span.setAttribute(TraceAttr.CopilotVfsInputMediaTypeDetected, mediaType) + + let sharpModule: typeof import('sharp').default + try { + sharpModule = (await import('sharp')).default + } catch (err) { + logger.warn('Failed to load sharp for image preparation', { + mediaType, + error: err instanceof Error ? err.message : String(err), + }) + span.setAttribute(TraceAttr.CopilotVfsSharpLoadFailed, true) + const fitsWithoutSharp = buffer.length <= MAX_IMAGE_READ_BYTES + span.setAttribute( + TraceAttr.CopilotVfsOutcome, + fitsWithoutSharp ? 'passthrough_no_sharp' : 'rejected_no_sharp' + ) + return fitsWithoutSharp ? { buffer, mediaType, resized: false } : null + } + + let metadata: Awaited['metadata']>> + try { + metadata = await sharpModule(buffer, { limitInputPixels: false }).metadata() + } catch (err) { + logger.warn('Failed to read image metadata for VFS read', { + mediaType, + error: err instanceof Error ? err.message : String(err), + }) + span.setAttribute(TraceAttr.CopilotVfsMetadataFailed, true) + const fitsWithoutSharp = buffer.length <= MAX_IMAGE_READ_BYTES + span.setAttribute( + TraceAttr.CopilotVfsOutcome, + fitsWithoutSharp ? 'passthrough_no_metadata' : 'rejected_no_metadata' + ) + return fitsWithoutSharp ? { buffer, mediaType, resized: false } : null + } + + const width = metadata.width ?? 0 + const height = metadata.height ?? 0 + span.setAttributes({ + [TraceAttr.CopilotVfsInputWidth]: width, + [TraceAttr.CopilotVfsInputHeight]: height, + }) + + const needsResize = + buffer.length > MAX_IMAGE_READ_BYTES || + width > MAX_IMAGE_DIMENSION || + height > MAX_IMAGE_DIMENSION + if (!needsResize) { + span.setAttributes({ + [TraceAttr.CopilotVfsResized]: false, + [TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.PassthroughFitsBudget, + [TraceAttr.CopilotVfsOutputBytes]: buffer.length, + [TraceAttr.CopilotVfsOutputMediaType]: mediaType, + }) + return { buffer, mediaType, resized: false } + } + + const hasAlpha = Boolean( + metadata.hasAlpha || + mediaType === 'image/png' || + mediaType === 'image/webp' || + mediaType === 'image/gif' + ) + span.setAttribute(TraceAttr.CopilotVfsHasAlpha, hasAlpha) + + let attempts = 0 + for (const dimension of IMAGE_RESIZE_DIMENSIONS) { + for (const quality of IMAGE_QUALITY_STEPS) { + attempts += 1 + try { + const pipeline = sharpModule(buffer, { limitInputPixels: false }).rotate().resize({ + width: dimension, + height: dimension, + fit: 'inside', + withoutEnlargement: true, + }) + + const transformed = hasAlpha + ? { + buffer: await pipeline + .webp({ quality, alphaQuality: quality, effort: 4 }) + .toBuffer(), + mediaType: 'image/webp', + } + : { + buffer: await pipeline + .jpeg({ quality, mozjpeg: true, chromaSubsampling: '4:4:4' }) + .toBuffer(), + mediaType: 'image/jpeg', + } + + span.addEvent(TraceEvent.CopilotVfsResizeAttempt, { + [TraceAttr.CopilotVfsResizeDimension]: dimension, + [TraceAttr.CopilotVfsResizeQuality]: quality, + [TraceAttr.CopilotVfsResizeOutputBytes]: transformed.buffer.length, + [TraceAttr.CopilotVfsResizeFitsBudget]: transformed.buffer.length <= MAX_IMAGE_READ_BYTES, + }) + + if (transformed.buffer.length <= MAX_IMAGE_READ_BYTES) { + logger.info('Resized image for VFS read', { + originalBytes: buffer.length, + outputBytes: transformed.buffer.length, + originalWidth: width || undefined, + originalHeight: height || undefined, + maxDimension: dimension, + quality, + originalMediaType: mediaType, + outputMediaType: transformed.mediaType, + }) + span.setAttributes({ + [TraceAttr.CopilotVfsResized]: true, + [TraceAttr.CopilotVfsResizeAttempts]: attempts, + [TraceAttr.CopilotVfsResizeChosenDimension]: dimension, + [TraceAttr.CopilotVfsResizeChosenQuality]: quality, + [TraceAttr.CopilotVfsOutputBytes]: transformed.buffer.length, + [TraceAttr.CopilotVfsOutputMediaType]: transformed.mediaType, + [TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.Resized, + }) + return { + buffer: transformed.buffer, + mediaType: transformed.mediaType, + resized: true, + } + } + } catch (err) { + logger.warn('Failed image resize attempt for VFS read', { + mediaType, + dimension, + quality, + error: err instanceof Error ? err.message : String(err), + }) + span.addEvent(TraceEvent.CopilotVfsResizeAttemptFailed, { + [TraceAttr.CopilotVfsResizeDimension]: dimension, + [TraceAttr.CopilotVfsResizeQuality]: quality, + [TraceAttr.ErrorMessage]: + err instanceof Error ? err.message : String(err).slice(0, 500), + }) + } + } + } + + span.setAttributes({ + [TraceAttr.CopilotVfsResized]: false, + [TraceAttr.CopilotVfsResizeAttempts]: attempts, + [TraceAttr.CopilotVfsOutcome]: CopilotVfsOutcome.RejectedTooLargeAfterResize, + }) + return null + } catch (err) { + recordSpanError(span, err) + throw err + } finally { + span.end() + } + } + ) +} + export interface FileReadResult { content: string totalLines: number @@ -58,75 +272,141 @@ export interface FileReadResult { * Read and return the content of a workspace file record. * Handles images (base64 attachment), parseable documents (PDF, DOCX, etc.), * binary files, and plain text with size guards. + * + * Wrapped in `copilot.vfs.read_file` so the parent mothership trace shows + * per-file read latency, the path taken (image / text / parseable / + * binary), and any size rejection. The `prepareImageForVision` span + * nests underneath for the image-resize path. */ export async function readFileRecord(record: WorkspaceFileRecord): Promise { - try { - if (isImageFileType(record.type)) { - if (record.size > MAX_IMAGE_READ_BYTES) { - return { - content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 5MB)]`, - totalLines: 1, + return getVfsTracer().startActiveSpan( + TraceSpan.CopilotVfsReadFile, + { + attributes: { + [TraceAttr.CopilotVfsFileName]: record.name, + [TraceAttr.CopilotVfsFileMediaType]: record.type, + [TraceAttr.CopilotVfsFileSizeBytes]: record.size, + [TraceAttr.CopilotVfsFileExtension]: getExtension(record.name), + }, + }, + async (span) => { + try { + if (isImageFileType(record.type)) { + span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.Image) + const originalBuffer = await downloadWorkspaceFile(record) + const prepared = await prepareImageForVision(originalBuffer, record.type) + if (!prepared) { + span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ImageTooLarge) + return { + content: `[Image too large: ${record.name} (${(record.size / 1024 / 1024).toFixed(1)}MB, limit 5MB after resize/compression)]`, + totalLines: 1, + } + } + const sizeKb = (prepared.buffer.length / 1024).toFixed(1) + const resizeNote = prepared.resized ? ', resized for vision' : '' + span.setAttributes({ + [TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.ImagePrepared, + [TraceAttr.CopilotVfsReadOutputBytes]: prepared.buffer.length, + [TraceAttr.CopilotVfsReadOutputMediaType]: prepared.mediaType, + [TraceAttr.CopilotVfsReadImageResized]: prepared.resized, + }) + return { + content: `Image: ${record.name} (${sizeKb}KB, ${prepared.mediaType}${resizeNote})`, + totalLines: 1, + attachment: { + type: 'image', + source: { + type: 'base64', + media_type: prepared.mediaType, + data: prepared.buffer.toString('base64'), + }, + }, + } } - } - const buffer = await downloadWorkspaceFile(record) - const mime = detectImageMime(buffer, record.type) - return { - content: `Image: ${record.name} (${(record.size / 1024).toFixed(1)}KB, ${mime})`, - totalLines: 1, - attachment: { - type: 'image', - source: { - type: 'base64', - media_type: mime, - data: buffer.toString('base64'), - }, - }, - } - } - if (isReadableType(record.type)) { - if (record.size > MAX_TEXT_READ_BYTES) { - return { - content: `[File too large to display inline: ${record.name} (${record.size} bytes, limit ${MAX_TEXT_READ_BYTES})]`, - totalLines: 1, + if (isReadableType(record.type)) { + span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.Text) + if (record.size > MAX_TEXT_READ_BYTES) { + span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.TextTooLarge) + return { + content: `[File too large to display inline: ${record.name} (${record.size} bytes, limit ${MAX_TEXT_READ_BYTES})]`, + totalLines: 1, + } + } + + const buffer = await downloadWorkspaceFile(record) + const content = buffer.toString('utf-8') + const lines = content.split('\n').length + span.setAttributes({ + [TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.TextRead, + [TraceAttr.CopilotVfsReadOutputBytes]: buffer.length, + [TraceAttr.CopilotVfsReadOutputLines]: lines, + }) + return { content, totalLines: lines } } - } - const buffer = await downloadWorkspaceFile(record) - const content = buffer.toString('utf-8') - return { content, totalLines: content.split('\n').length } - } + const ext = getExtension(record.name) + if (PARSEABLE_EXTENSIONS.has(ext)) { + span.setAttribute(TraceAttr.CopilotVfsReadPath, CopilotVfsReadPath.ParseableDocument) + if (record.size > MAX_PARSEABLE_READ_BYTES) { + span.setAttribute( + TraceAttr.CopilotVfsReadOutcome, + CopilotVfsReadOutcome.DocumentTooLarge + ) + return { + content: `[Document too large to parse inline: ${record.name} (${record.size} bytes, limit ${MAX_PARSEABLE_READ_BYTES})]`, + totalLines: 1, + } + } + const buffer = await downloadWorkspaceFile(record) + try { + const { parseBuffer } = await import('@/lib/file-parsers') + const result = await parseBuffer(buffer, ext) + const content = result.content || '' + const lines = content.split('\n').length + span.setAttributes({ + [TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.DocumentParsed, + [TraceAttr.CopilotVfsReadOutputBytes]: content.length, + [TraceAttr.CopilotVfsReadOutputLines]: lines, + }) + return { content, totalLines: lines } + } catch (parseErr) { + logger.warn('Failed to parse document', { + fileName: record.name, + ext, + error: parseErr instanceof Error ? parseErr.message : String(parseErr), + }) + span.addEvent(TraceEvent.CopilotVfsParseFailed, { + [TraceAttr.ErrorMessage]: + parseErr instanceof Error ? parseErr.message : String(parseErr).slice(0, 500), + }) + span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ParseFailed) + return { + content: `[Could not parse ${record.name} (${record.type}, ${record.size} bytes)]`, + totalLines: 1, + } + } + } - const ext = getExtension(record.name) - if (PARSEABLE_EXTENSIONS.has(ext)) { - const buffer = await downloadWorkspaceFile(record) - try { - const { parseBuffer } = await import('@/lib/file-parsers') - const result = await parseBuffer(buffer, ext) - const content = result.content || '' - return { content, totalLines: content.split('\n').length } - } catch (parseErr) { - logger.warn('Failed to parse document', { - fileName: record.name, - ext, - error: parseErr instanceof Error ? parseErr.message : String(parseErr), + span.setAttributes({ + [TraceAttr.CopilotVfsReadPath]: CopilotVfsReadPath.Binary, + [TraceAttr.CopilotVfsReadOutcome]: CopilotVfsReadOutcome.BinaryPlaceholder, }) return { - content: `[Could not parse ${record.name} (${record.type}, ${record.size} bytes)]`, + content: `[Binary file: ${record.name} (${record.type}, ${record.size} bytes). Cannot display as text.]`, totalLines: 1, } + } catch (err) { + logger.warn('Failed to read workspace file', { + fileName: record.name, + error: err instanceof Error ? err.message : String(err), + }) + recordSpanError(span, err) + span.setAttribute(TraceAttr.CopilotVfsReadOutcome, CopilotVfsReadOutcome.ReadFailed) + return null + } finally { + span.end() } } - - return { - content: `[Binary file: ${record.name} (${record.type}, ${record.size} bytes). Cannot display as text.]`, - totalLines: 1, - } - } catch (err) { - logger.warn('Failed to read workspace file', { - fileName: record.name, - error: err instanceof Error ? err.message : String(err), - }) - return null - } + ) } diff --git a/apps/sim/lib/core/telemetry.ts b/apps/sim/lib/core/telemetry.ts index 5bb0c6227b7..d73cd0d81f1 100644 --- a/apps/sim/lib/core/telemetry.ts +++ b/apps/sim/lib/core/telemetry.ts @@ -18,6 +18,7 @@ import { context, type Span, SpanStatusCode, trace } from '@opentelemetry/api' import { createLogger } from '@sim/logger' +import { TraceAttr } from '@/lib/copilot/generated/trace-attributes-v1' import type { TraceSpan } from '@/lib/logs/types' /** @@ -278,8 +279,8 @@ export function createOTelSpanFromTraceSpan(traceSpan: TraceSpan, parentSpan?: S { attributes: { [GenAIAttributes.TOOL_NAME]: toolCall.name, - 'tool.status': toolCall.status, - 'tool.duration_ms': toolCall.duration || 0, + [TraceAttr.ToolStatus]: toolCall.status, + [TraceAttr.ToolDurationMs]: toolCall.duration || 0, }, startTime: new Date(toolCall.startTime), }, @@ -341,8 +342,8 @@ export function createOTelSpansForWorkflowExecution(params: { [GenAIAttributes.WORKFLOW_ID]: params.workflowId, [GenAIAttributes.WORKFLOW_NAME]: params.workflowName || params.workflowId, [GenAIAttributes.WORKFLOW_EXECUTION_ID]: params.executionId, - 'workflow.trigger': params.trigger, - 'workflow.duration_ms': params.totalDurationMs, + [TraceAttr.WorkflowTrigger]: params.trigger, + [TraceAttr.WorkflowDurationMs]: params.totalDurationMs, }, startTime: new Date(params.startTime), }, @@ -403,9 +404,9 @@ export async function traceBlockExecution( blockMapping.spanName, { attributes: { - 'block.type': blockType, - 'block.id': blockId, - 'block.name': blockName, + [TraceAttr.BlockType]: blockType, + [TraceAttr.BlockId]: blockId, + [TraceAttr.BlockName]: blockName, }, }, async (span) => { @@ -439,8 +440,8 @@ export function trackPlatformEvent( const span = tracer.startSpan(eventName, { attributes: { ...attributes, - 'event.name': eventName, - 'event.timestamp': Date.now(), + [TraceAttr.EventName]: eventName, + [TraceAttr.EventTimestamp]: Date.now(), }, }) span.setStatus({ code: SpanStatusCode.OK }) diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts index 61b073e9dcc..49cf756f43a 100644 --- a/apps/sim/providers/models.ts +++ b/apps/sim/providers/models.ts @@ -519,6 +519,26 @@ export const PROVIDER_DEFINITIONS: Record = { toolUsageControl: true, }, models: [ + { + id: 'claude-opus-4-7', + pricing: { + input: 5.0, + cachedInput: 0.5, + output: 25.0, + updatedAt: '2026-04-16', + }, + capabilities: { + temperature: { min: 0, max: 1 }, + nativeStructuredOutputs: true, + maxOutputTokens: 128000, + thinking: { + levels: ['low', 'medium', 'high', 'max'], + default: 'high', + }, + }, + contextWindow: 1000000, + releaseDate: '2026-04-16', + }, { id: 'claude-opus-4-6', pricing: { diff --git a/package.json b/package.json index d78396fbb5c..54ec52dc1be 100644 --- a/package.json +++ b/package.json @@ -27,8 +27,16 @@ "mship-tools:check": "bun run scripts/sync-tool-catalog.ts --check", "trace-contracts:generate": "bun run scripts/sync-request-trace-contract.ts", "trace-contracts:check": "bun run scripts/sync-request-trace-contract.ts --check", - "mship:generate": "bun run mship-contracts:generate && bun run mship-tools:generate && bun run trace-contracts:generate", - "mship:check": "bun run mship-contracts:check && bun run mship-tools:check && bun run trace-contracts:check", + "trace-spans-contract:generate": "bun run scripts/sync-trace-spans-contract.ts", + "trace-spans-contract:check": "bun run scripts/sync-trace-spans-contract.ts --check", + "trace-attributes-contract:generate": "bun run scripts/sync-trace-attributes-contract.ts", + "trace-attributes-contract:check": "bun run scripts/sync-trace-attributes-contract.ts --check", + "trace-attribute-values-contract:generate": "bun run scripts/sync-trace-attribute-values-contract.ts", + "trace-attribute-values-contract:check": "bun run scripts/sync-trace-attribute-values-contract.ts --check", + "trace-events-contract:generate": "bun run scripts/sync-trace-events-contract.ts", + "trace-events-contract:check": "bun run scripts/sync-trace-events-contract.ts --check", + "mship:generate": "bun run mship-contracts:generate && bun run mship-tools:generate && bun run trace-contracts:generate && bun run trace-spans-contract:generate && bun run trace-attributes-contract:generate && bun run trace-attribute-values-contract:generate && bun run trace-events-contract:generate", + "mship:check": "bun run mship-contracts:check && bun run mship-tools:check && bun run trace-contracts:check && bun run trace-spans-contract:check && bun run trace-attributes-contract:check && bun run trace-attribute-values-contract:check && bun run trace-events-contract:check", "prepare": "bun husky", "type-check": "turbo run type-check", "release": "bun run scripts/create-single-release.ts" diff --git a/scripts/sync-trace-attribute-values-contract.ts b/scripts/sync-trace-attribute-values-contract.ts new file mode 100644 index 00000000000..917c26c1764 --- /dev/null +++ b/scripts/sync-trace-attribute-values-contract.ts @@ -0,0 +1,155 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { dirname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +/** + * Generate `apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts` + * from the Go-side `contracts/trace-attribute-values-v1.schema.json` + * contract. + * + * Unlike span-names / attribute-keys / event-names (each of which is a + * single enum), this contract carries MULTIPLE enums — one per span + * attribute whose value set is closed. The schema's `$defs` holds one + * definition per enum (e.g. `CopilotRequestCancelReason`, + * `CopilotAbortOutcome`, …). For each $def we emit a TS `as const` + * object named after the Go type, so call sites read as: + * + * span.setAttribute( + * TraceAttr.CopilotRequestCancelReason, + * CopilotRequestCancelReason.ExplicitStop, + * ) + * + * Skipped $defs: anything that doesn't have a string-only `enum` + * array. That filters out wrapper structs the reflector adds + * incidentally (e.g. `TraceAttributeValuesV1AllDefs`). + */ +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)) +const ROOT = resolve(SCRIPT_DIR, '..') +const DEFAULT_CONTRACT_PATH = resolve( + ROOT, + '../copilot/copilot/contracts/trace-attribute-values-v1.schema.json', +) +const OUTPUT_PATH = resolve( + ROOT, + 'apps/sim/lib/copilot/generated/trace-attribute-values-v1.ts', +) + +interface ExtractedEnum { + /** The Go type name — becomes the TS const + type name. */ + name: string + /** The value strings, sorted for diff stability. */ + values: string[] +} + +function extractEnums(schema: Record): ExtractedEnum[] { + const defs = (schema.$defs ?? {}) as Record + const out: ExtractedEnum[] = [] + for (const [name, def] of Object.entries(defs)) { + if (!def || typeof def !== 'object') continue + const enumValues = (def as Record).enum + if (!Array.isArray(enumValues)) continue + if (!enumValues.every((v) => typeof v === 'string')) continue + out.push({ name, values: (enumValues as string[]).slice().sort() }) + } + out.sort((a, b) => a.name.localeCompare(b.name)) + return out +} + +/** + * PascalCase identifier for a wire enum value. Mirrors the algorithm + * used by the span-names + attribute-keys scripts, so + * `explicit_stop` -> `ExplicitStop`, matching what a reader would + * guess from Go's exported constants. + */ +function toValueIdent(value: string): string { + const parts = value.split(/[^A-Za-z0-9]+/).filter(Boolean) + if (parts.length === 0) { + throw new Error(`Cannot derive identifier for enum value: ${value}`) + } + const ident = parts + .map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase()) + .join('') + if (/^[0-9]/.test(ident)) { + throw new Error( + `Derived identifier "${ident}" for value "${value}" starts with a digit`, + ) + } + return ident +} + +function renderEnum(e: ExtractedEnum): string { + const seen = new Map() + const lines = e.values.map((v) => { + const ident = toValueIdent(v) + const prev = seen.get(ident) + if (prev && prev !== v) { + throw new Error( + `Enum ${e.name}: identifier collision — "${prev}" and "${v}" both map to "${ident}"`, + ) + } + seen.set(ident, v) + return ` ${ident}: ${JSON.stringify(v)},` + }) + + return `export const ${e.name} = { +${lines.join('\n')} +} as const; + +export type ${e.name}Key = keyof typeof ${e.name}; +export type ${e.name}Value = (typeof ${e.name})[${e.name}Key];` +} + +function render(enums: ExtractedEnum[]): string { + const body = enums.map(renderEnum).join('\n\n') + return `// AUTO-GENERATED FILE. DO NOT EDIT. +// +// Source: copilot/copilot/contracts/trace-attribute-values-v1.schema.json +// Regenerate with: bun run trace-attribute-values-contract:generate +// +// Canonical closed-set value vocabularies for mothership OTel +// attributes. Call sites should reference e.g. +// \`CopilotRequestCancelReason.ExplicitStop\` rather than the raw +// string literal, so typos become compile errors and the Go contract +// remains the single source of truth. + +${body} +` +} + +async function main() { + const checkOnly = process.argv.includes('--check') + const inputArg = process.argv.find((a) => a.startsWith('--input=')) + const inputPath = inputArg + ? resolve(ROOT, inputArg.slice('--input='.length)) + : DEFAULT_CONTRACT_PATH + + const raw = await readFile(inputPath, 'utf8') + const schema = JSON.parse(raw) + const enums = extractEnums(schema) + if (enums.length === 0) { + throw new Error( + 'No enum $defs found in trace-attribute-values-v1.schema.json — did you add the Go type to TraceAttributeValuesV1AllDefs?', + ) + } + const rendered = render(enums) + + if (checkOnly) { + const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null) + if (existing !== rendered) { + throw new Error( + 'Generated trace attribute values contract is stale. Run: bun run trace-attribute-values-contract:generate', + ) + } + console.log('Trace attribute values contract is up to date.') + return + } + + await mkdir(dirname(OUTPUT_PATH), { recursive: true }) + await writeFile(OUTPUT_PATH, rendered, 'utf8') + console.log(`Generated trace attribute values types -> ${OUTPUT_PATH}`) +} + +main().catch((err) => { + console.error(err) + process.exit(1) +}) diff --git a/scripts/sync-trace-attributes-contract.ts b/scripts/sync-trace-attributes-contract.ts new file mode 100644 index 00000000000..3f693781cd3 --- /dev/null +++ b/scripts/sync-trace-attributes-contract.ts @@ -0,0 +1,168 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { dirname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +/** + * Generate `apps/sim/lib/copilot/generated/trace-attributes-v1.ts` + * from the Go-side `contracts/trace-attributes-v1.schema.json` + * contract. + * + * The contract is a single-enum JSON Schema listing every CUSTOM + * (non-OTel-semconv) span attribute key used in mothership. We emit: + * - A `TraceAttr` const object keyed by PascalCase identifier whose + * values are the exact wire strings, so call sites look like + * `span.setAttribute(TraceAttr.ChatId, …)` instead of the raw + * `span.setAttribute('chat.id', …)`. + * - A `TraceAttrKey` union and a `TraceAttrValue` union type so + * helpers that take an attribute key are well-typed. + * - A sorted `TraceAttrValues` readonly array for tests/enumeration. + * + * This is the attribute-key twin of `sync-trace-spans-contract.ts` + * (span names). The two files share the enum-extraction + identifier + * PascalCase + collision-detection pattern so a reader who understands + * one understands both. + * + * For OTel semantic-convention keys (e.g. `http.request.method`, + * `db.system`, `gen_ai.system`, `messaging.*`, `net.*`, + * `service.name`, `deployment.environment`), import from + * `@opentelemetry/semantic-conventions` directly — they live in the + * upstream package, not in this contract. + */ +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)) +const ROOT = resolve(SCRIPT_DIR, '..') +const DEFAULT_CONTRACT_PATH = resolve( + ROOT, + '../copilot/copilot/contracts/trace-attributes-v1.schema.json', +) +const OUTPUT_PATH = resolve( + ROOT, + 'apps/sim/lib/copilot/generated/trace-attributes-v1.ts', +) + +function extractAttrKeys(schema: Record): string[] { + const defs = (schema.$defs ?? {}) as Record + const nameDef = defs.TraceAttributesV1Name + if ( + !nameDef || + typeof nameDef !== 'object' || + !Array.isArray((nameDef as Record).enum) + ) { + throw new Error( + 'trace-attributes-v1.schema.json is missing $defs.TraceAttributesV1Name.enum', + ) + } + const enumValues = (nameDef as Record).enum as unknown[] + if (!enumValues.every((v) => typeof v === 'string')) { + throw new Error('TraceAttributesV1Name enum must be string-only') + } + return (enumValues as string[]).slice().sort() +} + +/** + * Convert a wire attribute key like `copilot.vfs.input.media_type_claimed` + * into an identifier-safe PascalCase key like + * `CopilotVfsInputMediaTypeClaimed`. + * + * Same algorithm as the span-name sync script so readers can learn one + * and reuse it. + */ +function toIdentifier(name: string): string { + const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean) + if (parts.length === 0) { + throw new Error(`Cannot derive identifier for attribute key: ${name}`) + } + const ident = parts + .map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase()) + .join('') + if (/^[0-9]/.test(ident)) { + throw new Error( + `Derived identifier "${ident}" for attribute "${name}" starts with a digit`, + ) + } + return ident +} + +function render(attrKeys: string[]): string { + const pairs = attrKeys.map((name) => ({ name, ident: toIdentifier(name) })) + + // Identifier collisions silently override earlier keys and break + // type safety — fail loudly instead. + const seen = new Map() + for (const p of pairs) { + const prev = seen.get(p.ident) + if (prev && prev !== p.name) { + throw new Error( + `Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`, + ) + } + seen.set(p.ident, p.name) + } + + const constLines = pairs + .map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`) + .join('\n') + const arrayEntries = attrKeys.map((n) => ` ${JSON.stringify(n)},`).join('\n') + + return `// AUTO-GENERATED FILE. DO NOT EDIT. +// +// Source: copilot/copilot/contracts/trace-attributes-v1.schema.json +// Regenerate with: bun run trace-attributes-contract:generate +// +// Canonical custom mothership OTel span attribute keys. Call sites +// should reference \`TraceAttr.\` (e.g. +// \`TraceAttr.ChatId\`, \`TraceAttr.ToolCallId\`) rather than raw +// string literals, so the Go-side contract is the single source of +// truth and typos become compile errors. +// +// For OTel semantic-convention keys (\`http.*\`, \`db.*\`, +// \`gen_ai.*\`, \`net.*\`, \`messaging.*\`, \`service.*\`, +// \`deployment.environment\`), import from +// \`@opentelemetry/semantic-conventions\` directly — those are owned +// by the upstream OTel spec, not by this contract. + +export const TraceAttr = { +${constLines} +} as const; + +export type TraceAttrKey = keyof typeof TraceAttr; +export type TraceAttrValue = (typeof TraceAttr)[TraceAttrKey]; + +/** Readonly sorted list of every canonical custom attribute key. */ +export const TraceAttrValues: readonly TraceAttrValue[] = [ +${arrayEntries} +] as const; +` +} + +async function main() { + const checkOnly = process.argv.includes('--check') + const inputArg = process.argv.find((a) => a.startsWith('--input=')) + const inputPath = inputArg + ? resolve(ROOT, inputArg.slice('--input='.length)) + : DEFAULT_CONTRACT_PATH + + const raw = await readFile(inputPath, 'utf8') + const schema = JSON.parse(raw) + const attrKeys = extractAttrKeys(schema) + const rendered = render(attrKeys) + + if (checkOnly) { + const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null) + if (existing !== rendered) { + throw new Error( + 'Generated trace attributes contract is stale. Run: bun run trace-attributes-contract:generate', + ) + } + console.log('Trace attributes contract is up to date.') + return + } + + await mkdir(dirname(OUTPUT_PATH), { recursive: true }) + await writeFile(OUTPUT_PATH, rendered, 'utf8') + console.log(`Generated trace attributes types -> ${OUTPUT_PATH}`) +} + +main().catch((err) => { + console.error(err) + process.exit(1) +}) diff --git a/scripts/sync-trace-events-contract.ts b/scripts/sync-trace-events-contract.ts new file mode 100644 index 00000000000..7e858f4e2a6 --- /dev/null +++ b/scripts/sync-trace-events-contract.ts @@ -0,0 +1,137 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { dirname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +/** + * Generate `apps/sim/lib/copilot/generated/trace-events-v1.ts` from + * the Go-side `contracts/trace-events-v1.schema.json` contract. + * + * Mirrors the span-names + attribute-keys sync scripts exactly — the + * only difference is the $defs key (`TraceEventsV1Name`), the output + * path, and the generated const name (`TraceEvent`). Keeping the + * scripts structurally identical means a reader who understands one + * understands all three, and drift between them gets caught + * immediately in code review. + */ +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)) +const ROOT = resolve(SCRIPT_DIR, '..') +const DEFAULT_CONTRACT_PATH = resolve( + ROOT, + '../copilot/copilot/contracts/trace-events-v1.schema.json', +) +const OUTPUT_PATH = resolve( + ROOT, + 'apps/sim/lib/copilot/generated/trace-events-v1.ts', +) + +function extractEventNames(schema: Record): string[] { + const defs = (schema.$defs ?? {}) as Record + const nameDef = defs.TraceEventsV1Name + if ( + !nameDef || + typeof nameDef !== 'object' || + !Array.isArray((nameDef as Record).enum) + ) { + throw new Error( + 'trace-events-v1.schema.json is missing $defs.TraceEventsV1Name.enum', + ) + } + const enumValues = (nameDef as Record).enum as unknown[] + if (!enumValues.every((v) => typeof v === 'string')) { + throw new Error('TraceEventsV1Name enum must be string-only') + } + return (enumValues as string[]).slice().sort() +} + +function toIdentifier(name: string): string { + const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean) + if (parts.length === 0) { + throw new Error(`Cannot derive identifier for event name: ${name}`) + } + const ident = parts + .map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase()) + .join('') + if (/^[0-9]/.test(ident)) { + throw new Error( + `Derived identifier "${ident}" for event "${name}" starts with a digit`, + ) + } + return ident +} + +function render(eventNames: string[]): string { + const pairs = eventNames.map((name) => ({ name, ident: toIdentifier(name) })) + + const seen = new Map() + for (const p of pairs) { + const prev = seen.get(p.ident) + if (prev && prev !== p.name) { + throw new Error( + `Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`, + ) + } + seen.set(p.ident, p.name) + } + + const constLines = pairs + .map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`) + .join('\n') + const arrayEntries = eventNames.map((n) => ` ${JSON.stringify(n)},`).join('\n') + + return `// AUTO-GENERATED FILE. DO NOT EDIT. +// +// Source: copilot/copilot/contracts/trace-events-v1.schema.json +// Regenerate with: bun run trace-events-contract:generate +// +// Canonical mothership OTel span event names. Call sites should +// reference \`TraceEvent.\` (e.g. +// \`TraceEvent.RequestCancelled\`) rather than raw string literals, +// so the Go-side contract is the single source of truth and typos +// become compile errors. + +export const TraceEvent = { +${constLines} +} as const; + +export type TraceEventKey = keyof typeof TraceEvent; +export type TraceEventValue = (typeof TraceEvent)[TraceEventKey]; + +/** Readonly sorted list of every canonical event name. */ +export const TraceEventValues: readonly TraceEventValue[] = [ +${arrayEntries} +] as const; +` +} + +async function main() { + const checkOnly = process.argv.includes('--check') + const inputArg = process.argv.find((a) => a.startsWith('--input=')) + const inputPath = inputArg + ? resolve(ROOT, inputArg.slice('--input='.length)) + : DEFAULT_CONTRACT_PATH + + const raw = await readFile(inputPath, 'utf8') + const schema = JSON.parse(raw) + const eventNames = extractEventNames(schema) + const rendered = render(eventNames) + + if (checkOnly) { + const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null) + if (existing !== rendered) { + throw new Error( + 'Generated trace events contract is stale. Run: bun run trace-events-contract:generate', + ) + } + console.log('Trace events contract is up to date.') + return + } + + await mkdir(dirname(OUTPUT_PATH), { recursive: true }) + await writeFile(OUTPUT_PATH, rendered, 'utf8') + console.log(`Generated trace events types -> ${OUTPUT_PATH}`) +} + +main().catch((err) => { + console.error(err) + process.exit(1) +}) diff --git a/scripts/sync-trace-spans-contract.ts b/scripts/sync-trace-spans-contract.ts new file mode 100644 index 00000000000..b3495753f6c --- /dev/null +++ b/scripts/sync-trace-spans-contract.ts @@ -0,0 +1,155 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { dirname, resolve } from 'node:path' +import { fileURLToPath } from 'node:url' + +/** + * Generate `apps/sim/lib/copilot/generated/trace-spans-v1.ts` from the + * Go-side `contracts/trace-spans-v1.schema.json` contract. + * + * The contract is a single-enum JSON Schema. We emit: + * - A `TraceSpansV1Name` const object (key-as-value) for ergonomic + * access: `TraceSpansV1Name['copilot.vfs.read_file']`. + * - A `TraceSpansV1NameValue` union type. + * - A sorted `TraceSpansV1Names` readonly array (useful for tests that + * verify coverage, and for tooling that wants to enumerate names). + * + * We deliberately do NOT pass through `json-schema-to-typescript` — + * it would generate a noisy `TraceSpansV1` object type for the wrapper + * that drives reflection; the wrapper type has no runtime use on the Sim + * side and would obscure the actual enum. + */ +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)) +const ROOT = resolve(SCRIPT_DIR, '..') +const DEFAULT_CONTRACT_PATH = resolve( + ROOT, + '../copilot/copilot/contracts/trace-spans-v1.schema.json', +) +const OUTPUT_PATH = resolve( + ROOT, + 'apps/sim/lib/copilot/generated/trace-spans-v1.ts', +) + +function extractSpanNames(schema: Record): string[] { + const defs = (schema.$defs ?? {}) as Record + const nameDef = defs.TraceSpansV1Name + if ( + !nameDef || + typeof nameDef !== 'object' || + !Array.isArray((nameDef as Record).enum) + ) { + throw new Error( + 'trace-spans-v1.schema.json is missing $defs.TraceSpansV1Name.enum', + ) + } + const enumValues = (nameDef as Record).enum as unknown[] + if (!enumValues.every((v) => typeof v === 'string')) { + throw new Error('TraceSpansV1Name enum must be string-only') + } + return (enumValues as string[]).slice().sort() +} + +/** + * Convert a wire name like "copilot.recovery.check_replay_gap" into an + * identifier-safe PascalCase key like "CopilotRecoveryCheckReplayGap", + * so call sites read as `TraceSpan.CopilotRecoveryCheckReplayGap` + * instead of `TraceSpan["copilot.recovery.check_replay_gap"]`. + * + * Splits on `.`, `_`, and non-alphanumeric characters; capitalizes each + * part; collapses. Strict mapping (not a best-effort heuristic), so the + * same input always produces the same identifier. + */ +function toIdentifier(name: string): string { + const parts = name.split(/[^A-Za-z0-9]+/).filter(Boolean) + if (parts.length === 0) { + throw new Error(`Cannot derive identifier for span name: ${name}`) + } + const ident = parts + .map((p) => p.charAt(0).toUpperCase() + p.slice(1).toLowerCase()) + .join('') + // Safety: identifiers may not start with a digit. + if (/^[0-9]/.test(ident)) { + throw new Error( + `Derived identifier "${ident}" for span "${name}" starts with a digit`, + ) + } + return ident +} + +function render(spanNames: string[]): string { + const pairs = spanNames.map((name) => ({ name, ident: toIdentifier(name) })) + + // Guard against collisions: if two wire names ever collapse to the + // same PascalCase identifier, we want a clear build failure, not a + // silent override. + const seen = new Map() + for (const p of pairs) { + const prev = seen.get(p.ident) + if (prev && prev !== p.name) { + throw new Error( + `Identifier collision: "${prev}" and "${p.name}" both map to "${p.ident}"`, + ) + } + seen.set(p.ident, p.name) + } + + const constLines = pairs + .map((p) => ` ${p.ident}: ${JSON.stringify(p.name)},`) + .join('\n') + const arrayEntries = spanNames.map((n) => ` ${JSON.stringify(n)},`).join('\n') + + return `// AUTO-GENERATED FILE. DO NOT EDIT. +// +// Source: copilot/copilot/contracts/trace-spans-v1.schema.json +// Regenerate with: bun run trace-spans-contract:generate +// +// Canonical mothership OTel span names. Call sites should reference +// \`TraceSpan.\` (e.g. \`TraceSpan.CopilotVfsReadFile\`) +// rather than raw string literals, so the Go-side contract is the +// single source of truth and typos become compile errors. + +export const TraceSpan = { +${constLines} +} as const; + +export type TraceSpanKey = keyof typeof TraceSpan; +export type TraceSpanValue = (typeof TraceSpan)[TraceSpanKey]; + +/** Readonly sorted list of every canonical span name. */ +export const TraceSpanValues: readonly TraceSpanValue[] = [ +${arrayEntries} +] as const; +` +} + +async function main() { + const checkOnly = process.argv.includes('--check') + const inputArg = process.argv.find((a) => a.startsWith('--input=')) + const inputPath = inputArg + ? resolve(ROOT, inputArg.slice('--input='.length)) + : DEFAULT_CONTRACT_PATH + + const raw = await readFile(inputPath, 'utf8') + const schema = JSON.parse(raw) + const spanNames = extractSpanNames(schema) + const rendered = render(spanNames) + + if (checkOnly) { + const existing = await readFile(OUTPUT_PATH, 'utf8').catch(() => null) + if (existing !== rendered) { + throw new Error( + 'Generated trace spans contract is stale. Run: bun run trace-spans-contract:generate', + ) + } + console.log('Trace spans contract is up to date.') + return + } + + await mkdir(dirname(OUTPUT_PATH), { recursive: true }) + await writeFile(OUTPUT_PATH, rendered, 'utf8') + console.log(`Generated trace spans types -> ${OUTPUT_PATH}`) +} + +main().catch((err) => { + console.error(err) + process.exit(1) +})