@@ -450,6 +450,27 @@ export async function POST(request: Request) {
450450 const chatVisibility = "private" as const ;
451451 const isAgentEngineRequest = isAgentEngineModel ( selectedChatModel ) ;
452452
453+ // Early promise kickoff (Agent Engine only): these have no dependency
454+ // on auth() and can run in parallel with it. Starting the service
455+ // account token early also warms the singleton cache used by all
456+ // downstream BigQuery operations (getBigQueryAccessToken delegates
457+ // to getServiceAccountAccessToken).
458+ const earlyServiceAccountTokenPromise = isAgentEngineRequest
459+ ? getServiceAccountAccessToken ( )
460+ : null ;
461+ const earlyProviderSessionPromise = isAgentEngineRequest
462+ ? getProviderSessionByChatId ( {
463+ chatId : id ,
464+ provider : AGENT_ENGINE_PROVIDER_ID ,
465+ } ) . catch ( ( error ) => {
466+ console . warn (
467+ "[chat/route] Early provider session lookup failed, will retry later:" ,
468+ error instanceof Error ? error . message : String ( error )
469+ ) ;
470+ return null ;
471+ } )
472+ : null ;
473+
453474 const session = await auth ( ) ;
454475
455476 if ( ! session ?. user ) {
@@ -491,6 +512,24 @@ export async function POST(request: Request) {
491512 const isToolApprovalFlow = Boolean ( messages ) ;
492513 let messageHistory : ChatMessage [ ] = [ ] ;
493514 let titlePromise : Promise < string > | null = null ;
515+ const pendingPersistenceTasks : Promise < void > [ ] = [ ] ;
516+
517+ const queuePersistenceTask = ( task : Promise < void > , context : string ) => {
518+ const trackedTask = task . catch ( ( error ) => {
519+ logAgentEngineEvent ( "warn" , {
520+ event : "message_persist_failed" ,
521+ request_id : requestId ,
522+ chat_id : id ,
523+ session_id : providerSessionIdForPersistence ,
524+ user_id : bigQueryUserId ,
525+ model_id : selectedChatModel ,
526+ context,
527+ reason : error instanceof Error ? error . message : String ( error ) ,
528+ } ) ;
529+ } ) ;
530+
531+ pendingPersistenceTasks . push ( trackedTask ) ;
532+ } ;
494533
495534 if ( chat ) {
496535 if ( ! chatOwnerIds . has ( chat . userId ) ) {
@@ -505,12 +544,17 @@ export async function POST(request: Request) {
505544 } ) ;
506545 }
507546 } else if ( message ?. role === "user" ) {
508- await saveChat ( {
547+ const saveChatTask = saveChat ( {
509548 id,
510549 userId : bigQueryUserId ,
511550 title : "Nova Conversa" ,
512551 visibility : chatVisibility ,
513552 } ) ;
553+ if ( isAgentEngineRequest ) {
554+ queuePersistenceTask ( saveChatTask as Promise < void > , "deferred_save_chat" ) ;
555+ } else {
556+ await saveChatTask ;
557+ }
514558 if (
515559 isAgentEngineModel ( selectedChatModel ) ||
516560 isDirectProviderModel ( selectedChatModel )
@@ -587,24 +631,6 @@ export async function POST(request: Request) {
587631 let providerSessionReadyAtMs : number | null = null ;
588632 let streamOpenedAtMs : number | null = null ;
589633 let firstDeltaAtMs : number | null = null ;
590- const pendingPersistenceTasks : Promise < void > [ ] = [ ] ;
591-
592- const queuePersistenceTask = ( task : Promise < void > , context : string ) => {
593- const trackedTask = task . catch ( ( error ) => {
594- logAgentEngineEvent ( "warn" , {
595- event : "message_persist_failed" ,
596- request_id : requestId ,
597- chat_id : id ,
598- session_id : providerSessionIdForPersistence ,
599- user_id : bigQueryUserId ,
600- model_id : selectedChatModel ,
601- context,
602- reason : error instanceof Error ? error . message : String ( error ) ,
603- } ) ;
604- } ) ;
605-
606- pendingPersistenceTasks . push ( trackedTask ) ;
607- } ;
608634
609635 const getPersistenceSessionId = ( ) => {
610636 if ( ! isAgentEngineRequest ) {
@@ -744,25 +770,22 @@ export async function POST(request: Request) {
744770 "No user message found for Agent Engine request."
745771 ) . toResponse ( ) ;
746772 }
773+ // Await the early-started promises (started before auth, likely
774+ // already resolved by now).
747775 const [ serviceAccountAccessToken , existingProviderSession ] =
748776 await Promise . all ( [
749- getServiceAccountAccessToken ( ) ,
750- getProviderSessionByChatId ( {
751- chatId : id ,
752- provider : AGENT_ENGINE_PROVIDER_ID ,
753- } ) . catch ( ( error ) => {
754- logAgentEngineEvent ( "warn" , {
755- event : "provider_session_lookup_failed" ,
756- request_id : requestId ,
757- chat_id : id ,
758- user_id : bigQueryUserId ,
759- model_id : selectedChatModel ,
760- reason : error instanceof Error ? error . message : String ( error ) ,
761- } ) ;
762- return null ;
763- } ) ,
777+ earlyServiceAccountTokenPromise ! ,
778+ earlyProviderSessionPromise ! ,
764779 ] ) ;
765780
781+ // Start building the vertex message immediately — it depends only
782+ // on the user message and request signal, NOT on the provider session.
783+ // This overlaps file download/encoding with session creation.
784+ const vertexMessagePromise = buildVertexMessageFromUserMessage (
785+ latestUserMessage ,
786+ request . signal
787+ ) ;
788+
766789 let providerSessionId = existingProviderSession ?. sessionId ;
767790 const providerSessionSource = providerSessionId ? "existing" : "created" ;
768791
@@ -772,24 +795,26 @@ export async function POST(request: Request) {
772795 bigQueryUserId
773796 ) ;
774797
775- try {
776- await upsertProviderSession ( {
798+ // Defer persistence — the stream only needs the in-memory session ID.
799+ queuePersistenceTask (
800+ upsertProviderSession ( {
777801 chatId : id ,
778802 provider : AGENT_ENGINE_PROVIDER_ID ,
779803 sessionId : providerSessionId ,
780804 userId : bigQueryUserId ,
781- } ) ;
782- } catch ( error ) {
783- logAgentEngineEvent ( "warn" , {
784- event : "provider_session_persist_failed" ,
785- request_id : requestId ,
786- chat_id : id ,
787- session_id : providerSessionId ,
788- user_id : bigQueryUserId ,
789- model_id : selectedChatModel ,
790- reason : error instanceof Error ? error . message : String ( error ) ,
791- } ) ;
792- }
805+ } ) . catch ( ( error ) => {
806+ logAgentEngineEvent ( "warn" , {
807+ event : "provider_session_persist_failed" ,
808+ request_id : requestId ,
809+ chat_id : id ,
810+ session_id : providerSessionId ,
811+ user_id : bigQueryUserId ,
812+ model_id : selectedChatModel ,
813+ reason : error instanceof Error ? error . message : String ( error ) ,
814+ } ) ;
815+ } ) as Promise < void > ,
816+ "provider_session_persist"
817+ ) ;
793818 }
794819 if ( ! providerSessionId ) {
795820 throw new Error (
@@ -827,10 +852,9 @@ export async function POST(request: Request) {
827852 ) ;
828853 }
829854
830- const vertexMessage = await buildVertexMessageFromUserMessage (
831- latestUserMessage ,
832- request . signal
833- ) ;
855+ // Await the vertex message (likely already resolved if no attachments,
856+ // or ran in parallel with createVertexSession if session was new).
857+ const vertexMessage = await vertexMessagePromise ;
834858 const allowTableChartFallback =
835859 shouldAllowTableChartFallback ( latestUserMessage ) ;
836860 const initialProviderSessionId : string = providerSessionId ;
0 commit comments