Skip to content

Commit 4dc30bc

Browse files
committed
otimizações para diminuir a latência das respostas do agente
1 parent abf24db commit 4dc30bc

File tree

1 file changed

+76
-52
lines changed

1 file changed

+76
-52
lines changed

app/(chat)/api/chat/route.ts

Lines changed: 76 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,27 @@ export async function POST(request: Request) {
450450
const chatVisibility = "private" as const;
451451
const isAgentEngineRequest = isAgentEngineModel(selectedChatModel);
452452

453+
// Early promise kickoff (Agent Engine only): these have no dependency
454+
// on auth() and can run in parallel with it. Starting the service
455+
// account token early also warms the singleton cache used by all
456+
// downstream BigQuery operations (getBigQueryAccessToken delegates
457+
// to getServiceAccountAccessToken).
458+
const earlyServiceAccountTokenPromise = isAgentEngineRequest
459+
? getServiceAccountAccessToken()
460+
: null;
461+
const earlyProviderSessionPromise = isAgentEngineRequest
462+
? getProviderSessionByChatId({
463+
chatId: id,
464+
provider: AGENT_ENGINE_PROVIDER_ID,
465+
}).catch((error) => {
466+
console.warn(
467+
"[chat/route] Early provider session lookup failed, will retry later:",
468+
error instanceof Error ? error.message : String(error)
469+
);
470+
return null;
471+
})
472+
: null;
473+
453474
const session = await auth();
454475

455476
if (!session?.user) {
@@ -491,6 +512,24 @@ export async function POST(request: Request) {
491512
const isToolApprovalFlow = Boolean(messages);
492513
let messageHistory: ChatMessage[] = [];
493514
let titlePromise: Promise<string> | null = null;
515+
const pendingPersistenceTasks: Promise<void>[] = [];
516+
517+
const queuePersistenceTask = (task: Promise<void>, context: string) => {
518+
const trackedTask = task.catch((error) => {
519+
logAgentEngineEvent("warn", {
520+
event: "message_persist_failed",
521+
request_id: requestId,
522+
chat_id: id,
523+
session_id: providerSessionIdForPersistence,
524+
user_id: bigQueryUserId,
525+
model_id: selectedChatModel,
526+
context,
527+
reason: error instanceof Error ? error.message : String(error),
528+
});
529+
});
530+
531+
pendingPersistenceTasks.push(trackedTask);
532+
};
494533

495534
if (chat) {
496535
if (!chatOwnerIds.has(chat.userId)) {
@@ -505,12 +544,17 @@ export async function POST(request: Request) {
505544
});
506545
}
507546
} else if (message?.role === "user") {
508-
await saveChat({
547+
const saveChatTask = saveChat({
509548
id,
510549
userId: bigQueryUserId,
511550
title: "Nova Conversa",
512551
visibility: chatVisibility,
513552
});
553+
if (isAgentEngineRequest) {
554+
queuePersistenceTask(saveChatTask as Promise<void>, "deferred_save_chat");
555+
} else {
556+
await saveChatTask;
557+
}
514558
if (
515559
isAgentEngineModel(selectedChatModel) ||
516560
isDirectProviderModel(selectedChatModel)
@@ -587,24 +631,6 @@ export async function POST(request: Request) {
587631
let providerSessionReadyAtMs: number | null = null;
588632
let streamOpenedAtMs: number | null = null;
589633
let firstDeltaAtMs: number | null = null;
590-
const pendingPersistenceTasks: Promise<void>[] = [];
591-
592-
const queuePersistenceTask = (task: Promise<void>, context: string) => {
593-
const trackedTask = task.catch((error) => {
594-
logAgentEngineEvent("warn", {
595-
event: "message_persist_failed",
596-
request_id: requestId,
597-
chat_id: id,
598-
session_id: providerSessionIdForPersistence,
599-
user_id: bigQueryUserId,
600-
model_id: selectedChatModel,
601-
context,
602-
reason: error instanceof Error ? error.message : String(error),
603-
});
604-
});
605-
606-
pendingPersistenceTasks.push(trackedTask);
607-
};
608634

609635
const getPersistenceSessionId = () => {
610636
if (!isAgentEngineRequest) {
@@ -744,25 +770,22 @@ export async function POST(request: Request) {
744770
"No user message found for Agent Engine request."
745771
).toResponse();
746772
}
773+
// Await the early-started promises (started before auth, likely
774+
// already resolved by now).
747775
const [serviceAccountAccessToken, existingProviderSession] =
748776
await Promise.all([
749-
getServiceAccountAccessToken(),
750-
getProviderSessionByChatId({
751-
chatId: id,
752-
provider: AGENT_ENGINE_PROVIDER_ID,
753-
}).catch((error) => {
754-
logAgentEngineEvent("warn", {
755-
event: "provider_session_lookup_failed",
756-
request_id: requestId,
757-
chat_id: id,
758-
user_id: bigQueryUserId,
759-
model_id: selectedChatModel,
760-
reason: error instanceof Error ? error.message : String(error),
761-
});
762-
return null;
763-
}),
777+
earlyServiceAccountTokenPromise!,
778+
earlyProviderSessionPromise!,
764779
]);
765780

781+
// Start building the vertex message immediately — it depends only
782+
// on the user message and request signal, NOT on the provider session.
783+
// This overlaps file download/encoding with session creation.
784+
const vertexMessagePromise = buildVertexMessageFromUserMessage(
785+
latestUserMessage,
786+
request.signal
787+
);
788+
766789
let providerSessionId = existingProviderSession?.sessionId;
767790
const providerSessionSource = providerSessionId ? "existing" : "created";
768791

@@ -772,24 +795,26 @@ export async function POST(request: Request) {
772795
bigQueryUserId
773796
);
774797

775-
try {
776-
await upsertProviderSession({
798+
// Defer persistence — the stream only needs the in-memory session ID.
799+
queuePersistenceTask(
800+
upsertProviderSession({
777801
chatId: id,
778802
provider: AGENT_ENGINE_PROVIDER_ID,
779803
sessionId: providerSessionId,
780804
userId: bigQueryUserId,
781-
});
782-
} catch (error) {
783-
logAgentEngineEvent("warn", {
784-
event: "provider_session_persist_failed",
785-
request_id: requestId,
786-
chat_id: id,
787-
session_id: providerSessionId,
788-
user_id: bigQueryUserId,
789-
model_id: selectedChatModel,
790-
reason: error instanceof Error ? error.message : String(error),
791-
});
792-
}
805+
}).catch((error) => {
806+
logAgentEngineEvent("warn", {
807+
event: "provider_session_persist_failed",
808+
request_id: requestId,
809+
chat_id: id,
810+
session_id: providerSessionId,
811+
user_id: bigQueryUserId,
812+
model_id: selectedChatModel,
813+
reason: error instanceof Error ? error.message : String(error),
814+
});
815+
}) as Promise<void>,
816+
"provider_session_persist"
817+
);
793818
}
794819
if (!providerSessionId) {
795820
throw new Error(
@@ -827,10 +852,9 @@ export async function POST(request: Request) {
827852
);
828853
}
829854

830-
const vertexMessage = await buildVertexMessageFromUserMessage(
831-
latestUserMessage,
832-
request.signal
833-
);
855+
// Await the vertex message (likely already resolved if no attachments,
856+
// or ran in parallel with createVertexSession if session was new).
857+
const vertexMessage = await vertexMessagePromise;
834858
const allowTableChartFallback =
835859
shouldAllowTableChartFallback(latestUserMessage);
836860
const initialProviderSessionId: string = providerSessionId;

0 commit comments

Comments
 (0)