@@ -298,7 +298,7 @@ def transcribe(
298298 language_detection_segments : int = 1 ,
299299 ) -> Union [
300300 Tuple [Iterable [Segment ], TranscriptionInfo ],
301- List [ Tuple [List [Segment ], TranscriptionInfo ] ],
301+ Tuple [List [List [ Segment ]] , TranscriptionInfo ],
302302 ]:
303303 """Transcribe audio in chunks in batched fashion and return with language info.
304304
@@ -379,9 +379,9 @@ def transcribe(
379379 - a generator over transcribed segments
380380 - an instance of TranscriptionInfo
381381
382- For multiple audios: A list of tuples, each containing :
383- - a list of transcribed segments
384- - an instance of TranscriptionInfo
382+ For multiple audios: A tuple with :
383+ - a list of segment lists (one per audio)
384+ - an instance of TranscriptionInfo (using first audio's duration)
385385 """
386386
387387 is_batch = isinstance (audio , list )
@@ -595,7 +595,7 @@ def transcribe(
595595 clip_timestamps_provided = clip_timestamps is not None
596596
597597 if is_batch :
598- grouped_segments = self ._batched_segments_generator_grouped (
598+ segments = self ._batched_segments_generator_grouped (
599599 all_features ,
600600 tokenizer ,
601601 all_chunks_metadata ,
@@ -605,20 +605,17 @@ def transcribe(
605605 log_progress ,
606606 )
607607
608- results = []
609- for i , audio_segments in enumerate (grouped_segments ):
610- info = TranscriptionInfo (
611- language = language ,
612- language_probability = language_probability ,
613- duration = audio_infos [i ]["duration" ],
614- duration_after_vad = audio_infos [i ]["duration_after_vad" ],
615- transcription_options = options ,
616- vad_options = _vad_parameters ,
617- all_language_probs = all_language_probs ,
618- )
619- results .append ((audio_segments , info ))
608+ info = TranscriptionInfo (
609+ language = language ,
610+ language_probability = language_probability ,
611+ duration = audio_infos [0 ]["duration" ],
612+ duration_after_vad = audio_infos [0 ]["duration_after_vad" ],
613+ transcription_options = options ,
614+ vad_options = _vad_parameters ,
615+ all_language_probs = all_language_probs ,
616+ )
620617
621- return results
618+ return segments , info
622619 else :
623620 info = TranscriptionInfo (
624621 language = language ,
0 commit comments