From 718f8429a124f75db4a576c603f1155b0bca3111 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Wed, 12 Feb 2025 17:27:36 +0100 Subject: [PATCH 1/6] Select Whisper model size from model_id / size-string --- rust-executor/src/ai_service/mod.rs | 80 +++++++++++++++++++++++++---- 1 file changed, 70 insertions(+), 10 deletions(-) diff --git a/rust-executor/src/ai_service/mod.rs b/rust-executor/src/ai_service/mod.rs index 583f056e0..feafea234 100644 --- a/rust-executor/src/ai_service/mod.rs +++ b/rust-executor/src/ai_service/mod.rs @@ -212,7 +212,7 @@ impl AIService { match model.model_type { ModelType::Llm => self.spawn_llm_model(model, None).await?, ModelType::Embedding => self.spawn_embedding_model(model).await, - ModelType::Transcription => Self::load_transcriber_model(&model).await, + ModelType::Transcription => Self::load_transcriber_model(model.id.clone()).await, }; Ok(()) } @@ -868,7 +868,66 @@ impl AIService { // Whisper / Transcription // ------------------------------------- - pub async fn open_transcription_stream(&self, _model_id: String) -> Result { + + fn whisper_string_to_model(whisper_string: String) -> Result { + match whisper_string.as_str() { + "whisper_tiny" => Ok(WhisperSource::Tiny), + "whisper_tiny_quantized" => Ok(WhisperSource::QuantizedTiny), + "whisper_tiny_en" => Ok(WhisperSource::TinyEn), + "whisper_tiny_en_quantized" => Ok(WhisperSource::QuantizedTinyEn), + "whisper_base" => Ok(WhisperSource::Base), + "whisper_base_en" => Ok(WhisperSource::BaseEn), + "whisper_small" => Ok(WhisperSource::Small), + "whisper_small_en" => Ok(WhisperSource::SmallEn), + "whisper_medium" => Ok(WhisperSource::Medium), + "whisper_medium_en" => Ok(WhisperSource::MediumEn), + "whisper_medium_en_quantized_distil" => Ok(WhisperSource::QuantizedDistilMediumEn), + "whisper_large" => Ok(WhisperSource::Large), + "whisper_large_v2" => Ok(WhisperSource::LargeV2), + "whisper_distil_medium_en" => Ok(WhisperSource::DistilMediumEn), + "whisper_distil_large_v2" => Ok(WhisperSource::DistilLargeV2), + "whisper_distil_large_v3" => Ok(WhisperSource::DistilLargeV3), + "whisper_distil_large_v3_quantized" => Ok(WhisperSource::QuantizedDistilLargeV3), + "whisper_large_v3_turbo_quantized" => Ok(WhisperSource::QuantizedLargeV3Turbo), + _ => Err(anyhow!("Unknown whisper model: {}", whisper_string)) + } + } + + fn get_whisper_model_size(model_id: String) -> Result { + // Try to treat string as model size string first + if let Ok(model) = Self::whisper_string_to_model(model_id.clone()) { + return Ok(model); + } + + // Try to get model from DB by ID + if let Ok(Some(model)) = Ad4mDb::with_global_instance(|db| db.get_model(model_id.clone())) { + if model.model_type != ModelType::Transcription { + return Err(anyhow!("Model '{}' is not a transcription model", model_id)); + } + // Use filename from local model config + if let Some(local) = model.local { + return Self::whisper_string_to_model(local.file_name); + } + } + + // if nothing above works, see if we have a transcription model in the DB and use that + // Try to find first transcription model in DB + if let Ok(models) = Ad4mDb::with_global_instance(|db| db.get_models()) { + if let Some(model) = models.into_iter().find(|m| m.model_type == ModelType::Transcription) { + if let Some(local) = model.local { + return Self::whisper_string_to_model(local.file_name); + } + } + } + + // Default to tiny if nothing found + Ok(WhisperSource::Tiny) + + } + + + pub async fn open_transcription_stream(&self, model_id: String) -> Result { + let model_size = Self::get_whisper_model_size(model_id)?; let stream_id = uuid::Uuid::new_v4().to_string(); let stream_id_clone = stream_id.clone(); let (samples_tx, samples_rx) = futures_channel::mpsc::unbounded::>(); @@ -881,7 +940,7 @@ impl AIService { rt.block_on(async { let maybe_model = WhisperBuilder::default() - .with_source(WHISPER_MODEL) + .with_source(model_size) .with_device(Self::new_candle_device()) .build() .await; @@ -974,22 +1033,23 @@ impl AIService { } } - async fn load_transcriber_model(model: &crate::types::Model) { - let id = &model.id; - publish_model_status(id.clone(), 0.0, "Loading", false, false).await; + async fn load_transcriber_model(model_id: String) { + publish_model_status(model_id.clone(), 0.0, "Loading", false, false).await; + + let model_size = Self::get_whisper_model_size(model_id.clone()).ok().unwrap_or(WHISPER_MODEL); let _ = WhisperBuilder::default() - .with_source(WHISPER_MODEL) + .with_source(model_size) .with_device(Self::new_candle_device()) .build_with_loading_handler({ - let name = id.clone(); + let name = model_id.clone(); move |progress| { tokio::spawn(handle_progress(name.clone(), progress)); } }) .await; - publish_model_status(id.clone(), 100.0, "Loaded", true, false).await; + publish_model_status(model_id.clone(), 100.0, "Loaded", true, false).await; } pub async fn update_model(&self, model_id: String, model_config: ModelInput) -> Result<()> { @@ -1060,7 +1120,7 @@ impl AIService { // TODO: Handle embedding model updates } ModelType::Transcription => { - // TODO: Handle transcription model updates + Self::load_transcriber_model(updated_model.id.clone()).await; } } From f06bea04bbadf7521bc26d3c5e73930756056c12 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Wed, 12 Feb 2025 17:27:59 +0100 Subject: [PATCH 2/6] UI for whisper size selection --- ui/src/components/ModelCard.tsx | 5 +++++ ui/src/components/ModelModal.tsx | 25 ++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/ui/src/components/ModelCard.tsx b/ui/src/components/ModelCard.tsx index a745f1aed..50e299a9f 100644 --- a/ui/src/components/ModelCard.tsx +++ b/ui/src/components/ModelCard.tsx @@ -78,6 +78,11 @@ export default function ModelCard(props: { )} + {modelType == "TRANSCRIPTION" && ( + + + + )} diff --git a/ui/src/components/ModelModal.tsx b/ui/src/components/ModelModal.tsx index 518a76d26..4bf723e5a 100644 --- a/ui/src/components/ModelModal.tsx +++ b/ui/src/components/ModelModal.tsx @@ -29,7 +29,26 @@ const llmModels = [ "llama_34b_code", "llama_70b", ]; -const transcriptionModels = ["whisper"]; +const transcriptionModels = [ + "whisper_tiny", + "whisper_tiny_quantized", + "whisper_tiny_en", + "whisper_tiny_en_quantized", + "whisper_base", + "whisper_base_en", + "whisper_small", + "whisper_small_en", + "whisper_medium", + "whisper_medium_en", + "whisper_medium_en_quantized_distil", + "whisper_large", + "whisper_large_v2", + "whisper_distil_medium_en", + "whisper_distil_large_v2", + "whisper_distil_large_v3", + "whisper_distil_large_v3_quantized", + "whisper_large_v3_turbo_quantized" +]; const embeddingModels = ["bert"]; export default function ModelModal(props: { close: () => void; oldModel?: any }) { @@ -234,7 +253,7 @@ export default function ModelModal(props: { close: () => void; oldModel?: any }) setNewModel(oldModel.local.fileName); } else { setNewModels(transcriptionModels); - setNewModel(oldModel.local.fileName); + setNewModel(oldModel.local.fileName || "whisper_small"); } if (oldModel.api) { @@ -298,7 +317,7 @@ export default function ModelModal(props: { close: () => void; oldModel?: any }) setNewModel("bert"); } else { setNewModels(transcriptionModels); - setNewModel("whisper"); + setNewModel("whisper_small"); } closeMenu("ai-types"); }} From d94b4753b6f67e806d231a9303920914a2108bb9 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Wed, 12 Feb 2025 17:28:21 +0100 Subject: [PATCH 3/6] =?UTF-8?q?Bugfix:=20don=E2=80=99t=20set=20default=20m?= =?UTF-8?q?odel=20when=20other=20type=20of=20model=20gets=20changed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ui/src/components/ModelModal.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ui/src/components/ModelModal.tsx b/ui/src/components/ModelModal.tsx index 4bf723e5a..040b421b8 100644 --- a/ui/src/components/ModelModal.tsx +++ b/ui/src/components/ModelModal.tsx @@ -211,8 +211,10 @@ export default function ModelModal(props: { close: () => void; oldModel?: any }) else { const newModelId = await client!.ai.addModel(model); // if no default LLM set, mark new model as default - const defaultLLM = await client!.ai.getDefaultModel("LLM"); - if (!defaultLLM) client!.ai.setDefaultModel("LLM", newModelId); + if (newModelType === "LLM") { + const defaultLLM = await client!.ai.getDefaultModel("LLM"); + if (!defaultLLM) client!.ai.setDefaultModel("LLM", newModelId); + } } close(); } From be943787455397625ee02dd469329bdb090828f0 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Wed, 12 Feb 2025 18:38:25 +0100 Subject: [PATCH 4/6] fmt --- rust-executor/src/ai_service/mod.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/rust-executor/src/ai_service/mod.rs b/rust-executor/src/ai_service/mod.rs index feafea234..2313f7c52 100644 --- a/rust-executor/src/ai_service/mod.rs +++ b/rust-executor/src/ai_service/mod.rs @@ -868,7 +868,6 @@ impl AIService { // Whisper / Transcription // ------------------------------------- - fn whisper_string_to_model(whisper_string: String) -> Result { match whisper_string.as_str() { "whisper_tiny" => Ok(WhisperSource::Tiny), @@ -886,10 +885,10 @@ impl AIService { "whisper_large_v2" => Ok(WhisperSource::LargeV2), "whisper_distil_medium_en" => Ok(WhisperSource::DistilMediumEn), "whisper_distil_large_v2" => Ok(WhisperSource::DistilLargeV2), - "whisper_distil_large_v3" => Ok(WhisperSource::DistilLargeV3), + "whisper_distil_large_v3" => Ok(WhisperSource::DistilLargeV3), "whisper_distil_large_v3_quantized" => Ok(WhisperSource::QuantizedDistilLargeV3), "whisper_large_v3_turbo_quantized" => Ok(WhisperSource::QuantizedLargeV3Turbo), - _ => Err(anyhow!("Unknown whisper model: {}", whisper_string)) + _ => Err(anyhow!("Unknown whisper model: {}", whisper_string)), } } @@ -913,7 +912,10 @@ impl AIService { // if nothing above works, see if we have a transcription model in the DB and use that // Try to find first transcription model in DB if let Ok(models) = Ad4mDb::with_global_instance(|db| db.get_models()) { - if let Some(model) = models.into_iter().find(|m| m.model_type == ModelType::Transcription) { + if let Some(model) = models + .into_iter() + .find(|m| m.model_type == ModelType::Transcription) + { if let Some(local) = model.local { return Self::whisper_string_to_model(local.file_name); } @@ -922,10 +924,8 @@ impl AIService { // Default to tiny if nothing found Ok(WhisperSource::Tiny) - } - pub async fn open_transcription_stream(&self, model_id: String) -> Result { let model_size = Self::get_whisper_model_size(model_id)?; let stream_id = uuid::Uuid::new_v4().to_string(); @@ -1036,7 +1036,9 @@ impl AIService { async fn load_transcriber_model(model_id: String) { publish_model_status(model_id.clone(), 0.0, "Loading", false, false).await; - let model_size = Self::get_whisper_model_size(model_id.clone()).ok().unwrap_or(WHISPER_MODEL); + let model_size = Self::get_whisper_model_size(model_id.clone()) + .ok() + .unwrap_or(WHISPER_MODEL); let _ = WhisperBuilder::default() .with_source(model_size) From 9c82cf9fbade40a691e41eff3a99aeebcb807109 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Wed, 12 Feb 2025 18:49:52 +0100 Subject: [PATCH 5/6] Fix and update AI setup page --- ui/src/components/Login.tsx | 162 ++++++++++++++++++++++-------------- 1 file changed, 100 insertions(+), 62 deletions(-) diff --git a/ui/src/components/Login.tsx b/ui/src/components/Login.tsx index 0835b5f4c..11f1086c5 100644 --- a/ui/src/components/Login.tsx +++ b/ui/src/components/Login.tsx @@ -155,15 +155,13 @@ const Login = () => { } async function saveModels() { + let whisperModel = "whisper_small"; // add llm model if (aiMode !== "None") { const llm = { name: "LLM Model 1", modelType: "LLM" } as ModelInput; if (aiMode === "Local") { - llm.local = { - fileName: "Qwen2.5.1-Coder-7B-Instruct", - tokenizerSource: "", - modelParameters: "", - }; + llm.local = { fileName: "Qwen2.5.1-Coder-7B-Instruct" }; + whisperModel = "whisper_large_v3_turbo_quantized"; } else { llm.api = { baseUrl: apiUrl, @@ -179,23 +177,16 @@ const Login = () => { // add embedding model client!.ai.addModel({ name: "bert", - local: { - fileName: "bert", - tokenizerSource: "", - modelParameters: "", - }, + local: { fileName: "bert" }, modelType: "EMBEDDING", }); - // add transcription model + // add medium whisper model client!.ai.addModel({ - name: "Transcription Model 1", - local: { - fileName: "whisper", - tokenizerSource: "", - modelParameters: "", - }, + name: "Whisper", + local: { fileName: whisperModel }, modelType: "TRANSCRIPTION", }); + setCurrentIndex(6); } @@ -493,39 +484,6 @@ const Login = () => { Is your computer capabale of running Large Language Models locally? - - Regardless of your choice here, we will always download and use - small AI models (such as{" "} - - open("https://huggingface.co/openai/whisper-small") - } - style={{ cursor: "pointer" }} - > - Whisper small - {" "} - and an{" "} - - open( - "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs" - ) - } - style={{ cursor: "pointer" }} - > - Embedding model - - ) to handle basic tasks on all devices. -

-

- When it comes to LLMs, it depends on you having either an Apple - Silicon mac (M1 or better) or an nVidia GPU (with enough vRAM). -

-

- Alternatively, you can configure ADAM to out-source LLM tasks to a - remote API. If you unsure, you can select "None" now and add, - remove or change model settings later-on in the AI tab. -
@@ -594,18 +552,43 @@ const Login = () => { style={{ marginTop: 30, maxWidth: 350 }} > - This will download{" "} - - open( - "https://huggingface.co/bartowski/Qwen2.5.1-Coder-7B-Instruct-GGUF" - ) - } - style={{ cursor: "pointer" }} - > - Qwen2.5 Coder 7B Instruct - - {" "}(4.68GB) + This will download +

+ + open( + "https://huggingface.co/bartowski/Qwen2.5.1-Coder-7B-Instruct-GGUF" + ) + } + style={{ cursor: "pointer" }} + > + Qwen2.5 Coder 7B Instruct (4.68GB) + +

+ and +

+ + open( + "https://huggingface.co/openai/whisper-large-v3-turbo" + ) + } + style={{ cursor: "pointer" }} + >Whisper large v3 turbo (809MB) +

+ and +

+ + open( + "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs" + ) + } + style={{ cursor: "pointer" }} + > + Bert Embedding model (90MB) + +

)} @@ -690,6 +673,33 @@ const Login = () => { )} + + This will still download +

+ + open( + "https://huggingface.co/openai/whisper-small" + ) + } + style={{ cursor: "pointer" }} + >Whisper small (244MB) +

+ and +

+ + open( + "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs" + ) + } + style={{ cursor: "pointer" }} + > + Bert Embedding model (90MB) + +

+
+ {apiValid && ( @@ -730,6 +740,7 @@ const Login = () => { )} + )} {(!apiModelValid || !apiValid) && ( @@ -768,6 +779,33 @@ const Login = () => { Selecting None here and not having any LLM configured might result in new Synergy features not working in Flux... + + + This will still download +

+ + open( + "https://huggingface.co/openai/whisper-small" + ) + } + style={{ cursor: "pointer" }} + >Whisper small (244MB) +

+ and +

+ + open( + "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs" + ) + } + style={{ cursor: "pointer" }} + > + Bert Embedding model (90MB) + +

+
)} From 6dd8e90ca2a7148ffb443707c2c27cf3c9be4701 Mon Sep 17 00:00:00 2001 From: Nicolas Luck Date: Wed, 12 Feb 2025 18:51:17 +0100 Subject: [PATCH 6/6] changelog --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index e9624cf4f..e1a6bee3f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -71,6 +71,7 @@ This project _loosely_ adheres to [Semantic Versioning](https://semver.org/spec/ - Added ability to handle multiple agents in launcher. [PR#459](https://github.com/coasys/ad4m/pull/459) - Added a way to show & add new `AgentInfo` in launcher. [PR#463](https://github.com/coasys/ad4m/pull/463) - `ad4m-executor` binary prints capability request challange to stdout to enable app hand-shake [PR#471](https://github.com/coasys/ad4m/pull/471) + - Add ability to select Whisper model size [PR#564](https://github.com/coasys/ad4m/pull/564) ### Changed - Much improved ADAM Launcher setup flow [PR#440](https://github.com/coasys/ad4m/pull/440) and [PR#444](https://github.com/coasys/ad4m/pull/444):