From 718f8429a124f75db4a576c603f1155b0bca3111 Mon Sep 17 00:00:00 2001
From: Nicolas Luck <nicolas@lucksus.eu>
Date: Wed, 12 Feb 2025 17:27:36 +0100
Subject: [PATCH 1/6] Select Whisper model size from model_id / size-string

---
 rust-executor/src/ai_service/mod.rs | 80 +++++++++++++++++++++++++----
 1 file changed, 70 insertions(+), 10 deletions(-)
diff --git a/rust-executor/src/ai_service/mod.rs b/rust-executor/src/ai_service/mod.rs
index 583f056e0..feafea234 100644
--- a/rust-executor/src/ai_service/mod.rs
+++ b/rust-executor/src/ai_service/mod.rs
@@ -212,7 +212,7 @@ impl AIService {
         match model.model_type {
             ModelType::Llm => self.spawn_llm_model(model, None).await?,
             ModelType::Embedding => self.spawn_embedding_model(model).await,
-            ModelType::Transcription => Self::load_transcriber_model(&model).await,
+            ModelType::Transcription => Self::load_transcriber_model(model.id.clone()).await,
         };
         Ok(())
     }
@@ -868,7 +868,66 @@ impl AIService {
     // Whisper / Transcription
     // -------------------------------------
 
-    pub async fn open_transcription_stream(&self, _model_id: String) -> Result<String> {
+
+    fn whisper_string_to_model(whisper_string: String) -> Result<WhisperSource> {
+        match whisper_string.as_str() {
+            "whisper_tiny" => Ok(WhisperSource::Tiny),
+            "whisper_tiny_quantized" => Ok(WhisperSource::QuantizedTiny),
+            "whisper_tiny_en" => Ok(WhisperSource::TinyEn),
+            "whisper_tiny_en_quantized" => Ok(WhisperSource::QuantizedTinyEn),
+            "whisper_base" => Ok(WhisperSource::Base),
+            "whisper_base_en" => Ok(WhisperSource::BaseEn),
+            "whisper_small" => Ok(WhisperSource::Small),
+            "whisper_small_en" => Ok(WhisperSource::SmallEn),
+            "whisper_medium" => Ok(WhisperSource::Medium),
+            "whisper_medium_en" => Ok(WhisperSource::MediumEn),
+            "whisper_medium_en_quantized_distil" => Ok(WhisperSource::QuantizedDistilMediumEn),
+            "whisper_large" => Ok(WhisperSource::Large),
+            "whisper_large_v2" => Ok(WhisperSource::LargeV2),
+            "whisper_distil_medium_en" => Ok(WhisperSource::DistilMediumEn),
+            "whisper_distil_large_v2" => Ok(WhisperSource::DistilLargeV2),
+            "whisper_distil_large_v3"  => Ok(WhisperSource::DistilLargeV3),
+            "whisper_distil_large_v3_quantized" => Ok(WhisperSource::QuantizedDistilLargeV3),
+            "whisper_large_v3_turbo_quantized" => Ok(WhisperSource::QuantizedLargeV3Turbo),
+            _ => Err(anyhow!("Unknown whisper model: {}", whisper_string))
+        }
+    }
+
+    fn get_whisper_model_size(model_id: String) -> Result<WhisperSource> {
+        // Try to treat string as model size string first
+        if let Ok(model) = Self::whisper_string_to_model(model_id.clone()) {
+            return Ok(model);
+        }
+
+        // Try to get model from DB by ID
+        if let Ok(Some(model)) = Ad4mDb::with_global_instance(|db| db.get_model(model_id.clone())) {
+            if model.model_type != ModelType::Transcription {
+                return Err(anyhow!("Model '{}' is not a transcription model", model_id));
+            }
+            // Use filename from local model config
+            if let Some(local) = model.local {
+                return Self::whisper_string_to_model(local.file_name);
+            }
+        }
+
+        // if nothing above works, see if we have a transcription model in the DB and use that
+        // Try to find first transcription model in DB
+        if let Ok(models) = Ad4mDb::with_global_instance(|db| db.get_models()) {
+            if let Some(model) = models.into_iter().find(|m| m.model_type == ModelType::Transcription) {
+                if let Some(local) = model.local {
+                    return Self::whisper_string_to_model(local.file_name);
+                }
+            }
+        }
+
+        // Default to tiny if nothing found
+        Ok(WhisperSource::Tiny)
+        
+    }
+
+
+    pub async fn open_transcription_stream(&self, model_id: String) -> Result<String> {
+        let model_size = Self::get_whisper_model_size(model_id)?;
         let stream_id = uuid::Uuid::new_v4().to_string();
         let stream_id_clone = stream_id.clone();
         let (samples_tx, samples_rx) = futures_channel::mpsc::unbounded::<Vec<f32>>();
@@ -881,7 +940,7 @@ impl AIService {
 
             rt.block_on(async {
                 let maybe_model = WhisperBuilder::default()
-                    .with_source(WHISPER_MODEL)
+                    .with_source(model_size)
                     .with_device(Self::new_candle_device())
                     .build()
                     .await;
@@ -974,22 +1033,23 @@ impl AIService {
         }
     }
 
-    async fn load_transcriber_model(model: &crate::types::Model) {
-        let id = &model.id;
-        publish_model_status(id.clone(), 0.0, "Loading", false, false).await;
+    async fn load_transcriber_model(model_id: String) {
+        publish_model_status(model_id.clone(), 0.0, "Loading", false, false).await;
+
+        let model_size = Self::get_whisper_model_size(model_id.clone()).ok().unwrap_or(WHISPER_MODEL);
 
         let _ = WhisperBuilder::default()
-            .with_source(WHISPER_MODEL)
+            .with_source(model_size)
             .with_device(Self::new_candle_device())
             .build_with_loading_handler({
-                let name = id.clone();
+                let name = model_id.clone();
                 move |progress| {
                     tokio::spawn(handle_progress(name.clone(), progress));
                 }
             })
             .await;
 
-        publish_model_status(id.clone(), 100.0, "Loaded", true, false).await;
+        publish_model_status(model_id.clone(), 100.0, "Loaded", true, false).await;
     }
 
     pub async fn update_model(&self, model_id: String, model_config: ModelInput) -> Result<()> {
@@ -1060,7 +1120,7 @@ impl AIService {
                 // TODO: Handle embedding model updates
             }
             ModelType::Transcription => {
-                // TODO: Handle transcription model updates
+                Self::load_transcriber_model(updated_model.id.clone()).await;
             }
         }
 

From f06bea04bbadf7521bc26d3c5e73930756056c12 Mon Sep 17 00:00:00 2001
From: Nicolas Luck <nicolas@lucksus.eu>
Date: Wed, 12 Feb 2025 17:27:59 +0100
Subject: [PATCH 2/6] UI for whisper size selection

---
 ui/src/components/ModelCard.tsx  |  5 +++++
 ui/src/components/ModelModal.tsx | 25 ++++++++++++++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/ui/src/components/ModelCard.tsx b/ui/src/components/ModelCard.tsx
index a745f1aed..50e299a9f 100644
--- a/ui/src/components/ModelCard.tsx
+++ b/ui/src/components/ModelCard.tsx
@@ -78,6 +78,11 @@ export default function ModelCard(props: {
               </j-button>
             </j-flex>
           )}
+          {modelType == "TRANSCRIPTION" && (
+            <j-button variant="subtle" size="sm" onClick={editModel}>
+              <j-icon name="pencil-square" size="sm" />
+            </j-button>
+          )}
         </j-flex>
       </j-box>
       <j-flex direction="column" gap="400">
diff --git a/ui/src/components/ModelModal.tsx b/ui/src/components/ModelModal.tsx
index 518a76d26..4bf723e5a 100644
--- a/ui/src/components/ModelModal.tsx
+++ b/ui/src/components/ModelModal.tsx
@@ -29,7 +29,26 @@ const llmModels = [
   "llama_34b_code",
   "llama_70b",
 ];
-const transcriptionModels = ["whisper"];
+const transcriptionModels = [
+  "whisper_tiny",
+  "whisper_tiny_quantized",
+  "whisper_tiny_en",
+  "whisper_tiny_en_quantized",
+  "whisper_base",
+  "whisper_base_en",
+  "whisper_small",
+  "whisper_small_en",
+  "whisper_medium",
+  "whisper_medium_en",
+  "whisper_medium_en_quantized_distil",
+  "whisper_large",
+  "whisper_large_v2",
+  "whisper_distil_medium_en",
+  "whisper_distil_large_v2",
+  "whisper_distil_large_v3",
+  "whisper_distil_large_v3_quantized",
+  "whisper_large_v3_turbo_quantized"
+];
 const embeddingModels = ["bert"];
 
 export default function ModelModal(props: { close: () => void; oldModel?: any }) {
@@ -234,7 +253,7 @@ export default function ModelModal(props: { close: () => void; oldModel?: any })
         setNewModel(oldModel.local.fileName);
       } else {
         setNewModels(transcriptionModels);
-        setNewModel(oldModel.local.fileName);
+        setNewModel(oldModel.local.fileName || "whisper_small");
       }
 
       if (oldModel.api) {
@@ -298,7 +317,7 @@ export default function ModelModal(props: { close: () => void; oldModel?: any })
                             setNewModel("bert");
                           } else {
                             setNewModels(transcriptionModels);
-                            setNewModel("whisper");
+                            setNewModel("whisper_small");
                           }
                           closeMenu("ai-types");
                         }}

From d94b4753b6f67e806d231a9303920914a2108bb9 Mon Sep 17 00:00:00 2001
From: Nicolas Luck <nicolas@lucksus.eu>
Date: Wed, 12 Feb 2025 17:28:21 +0100
Subject: [PATCH 3/6] =?UTF-8?q?Bugfix:=20don=E2=80=99t=20set=20default=20m?=
 =?UTF-8?q?odel=20when=20other=20type=20of=20model=20gets=20changed?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ui/src/components/ModelModal.tsx | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ui/src/components/ModelModal.tsx b/ui/src/components/ModelModal.tsx
index 4bf723e5a..040b421b8 100644
--- a/ui/src/components/ModelModal.tsx
+++ b/ui/src/components/ModelModal.tsx
@@ -211,8 +211,10 @@ export default function ModelModal(props: { close: () => void; oldModel?: any })
       else {
         const newModelId = await client!.ai.addModel(model);
         // if no default LLM set, mark new model as default
-        const defaultLLM = await client!.ai.getDefaultModel("LLM");
-        if (!defaultLLM) client!.ai.setDefaultModel("LLM", newModelId);
+        if (newModelType === "LLM") {
+          const defaultLLM = await client!.ai.getDefaultModel("LLM");
+          if (!defaultLLM) client!.ai.setDefaultModel("LLM", newModelId);
+        }
       }
       close();
     }

From be943787455397625ee02dd469329bdb090828f0 Mon Sep 17 00:00:00 2001
From: Nicolas Luck <nicolas@lucksus.eu>
Date: Wed, 12 Feb 2025 18:38:25 +0100
Subject: [PATCH 4/6] fmt

---
 rust-executor/src/ai_service/mod.rs | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/rust-executor/src/ai_service/mod.rs b/rust-executor/src/ai_service/mod.rs
index feafea234..2313f7c52 100644
--- a/rust-executor/src/ai_service/mod.rs
+++ b/rust-executor/src/ai_service/mod.rs
@@ -868,7 +868,6 @@ impl AIService {
     // Whisper / Transcription
     // -------------------------------------
 
-
     fn whisper_string_to_model(whisper_string: String) -> Result<WhisperSource> {
         match whisper_string.as_str() {
             "whisper_tiny" => Ok(WhisperSource::Tiny),
@@ -886,10 +885,10 @@ impl AIService {
             "whisper_large_v2" => Ok(WhisperSource::LargeV2),
             "whisper_distil_medium_en" => Ok(WhisperSource::DistilMediumEn),
             "whisper_distil_large_v2" => Ok(WhisperSource::DistilLargeV2),
-            "whisper_distil_large_v3"  => Ok(WhisperSource::DistilLargeV3),
+            "whisper_distil_large_v3" => Ok(WhisperSource::DistilLargeV3),
             "whisper_distil_large_v3_quantized" => Ok(WhisperSource::QuantizedDistilLargeV3),
             "whisper_large_v3_turbo_quantized" => Ok(WhisperSource::QuantizedLargeV3Turbo),
-            _ => Err(anyhow!("Unknown whisper model: {}", whisper_string))
+            _ => Err(anyhow!("Unknown whisper model: {}", whisper_string)),
         }
     }
 
@@ -913,7 +912,10 @@ impl AIService {
         // if nothing above works, see if we have a transcription model in the DB and use that
         // Try to find first transcription model in DB
         if let Ok(models) = Ad4mDb::with_global_instance(|db| db.get_models()) {
-            if let Some(model) = models.into_iter().find(|m| m.model_type == ModelType::Transcription) {
+            if let Some(model) = models
+                .into_iter()
+                .find(|m| m.model_type == ModelType::Transcription)
+            {
                 if let Some(local) = model.local {
                     return Self::whisper_string_to_model(local.file_name);
                 }
@@ -922,10 +924,8 @@ impl AIService {
 
         // Default to tiny if nothing found
         Ok(WhisperSource::Tiny)
-        
     }
 
-
     pub async fn open_transcription_stream(&self, model_id: String) -> Result<String> {
         let model_size = Self::get_whisper_model_size(model_id)?;
         let stream_id = uuid::Uuid::new_v4().to_string();
@@ -1036,7 +1036,9 @@ impl AIService {
     async fn load_transcriber_model(model_id: String) {
         publish_model_status(model_id.clone(), 0.0, "Loading", false, false).await;
 
-        let model_size = Self::get_whisper_model_size(model_id.clone()).ok().unwrap_or(WHISPER_MODEL);
+        let model_size = Self::get_whisper_model_size(model_id.clone())
+            .ok()
+            .unwrap_or(WHISPER_MODEL);
 
         let _ = WhisperBuilder::default()
             .with_source(model_size)

From 9c82cf9fbade40a691e41eff3a99aeebcb807109 Mon Sep 17 00:00:00 2001
From: Nicolas Luck <nicolas@lucksus.eu>
Date: Wed, 12 Feb 2025 18:49:52 +0100
Subject: [PATCH 5/6] Fix and update AI setup page

---
 ui/src/components/Login.tsx | 162 ++++++++++++++++++++++--------------
 1 file changed, 100 insertions(+), 62 deletions(-)

diff --git a/ui/src/components/Login.tsx b/ui/src/components/Login.tsx
index 0835b5f4c..11f1086c5 100644
--- a/ui/src/components/Login.tsx
+++ b/ui/src/components/Login.tsx
@@ -155,15 +155,13 @@ const Login = () => {
   }
 
   async function saveModels() {
+    let whisperModel = "whisper_small";
     // add llm model
     if (aiMode !== "None") {
       const llm = { name: "LLM Model 1", modelType: "LLM" } as ModelInput;
       if (aiMode === "Local") {
-        llm.local = {
-          fileName: "Qwen2.5.1-Coder-7B-Instruct",
-          tokenizerSource: "",
-          modelParameters: "",
-        };
+        llm.local = { fileName: "Qwen2.5.1-Coder-7B-Instruct" };
+        whisperModel = "whisper_large_v3_turbo_quantized";
       } else {
         llm.api = {
           baseUrl: apiUrl,
@@ -179,23 +177,16 @@ const Login = () => {
     // add embedding model
     client!.ai.addModel({
       name: "bert",
-      local: {
-        fileName: "bert",
-        tokenizerSource: "",
-        modelParameters: "",
-      },
+      local: { fileName: "bert" },
       modelType: "EMBEDDING",
     });
-    // add transcription model
+    // add medium whisper model
     client!.ai.addModel({
-      name: "Transcription Model 1",
-      local: {
-        fileName: "whisper",
-        tokenizerSource: "",
-        modelParameters: "",
-      },
+      name: "Whisper",
+      local: { fileName: whisperModel },
       modelType: "TRANSCRIPTION",
     });
+
     setCurrentIndex(6);
   }
 
@@ -493,39 +484,6 @@ const Login = () => {
               Is your computer capabale of running Large Language Models
               locally?
             </j-text>
-            <j-text>
-              Regardless of your choice here, we will always download and use
-              small AI models (such as{" "}
-              <a
-                onClick={() =>
-                  open("https://huggingface.co/openai/whisper-small")
-                }
-                style={{ cursor: "pointer" }}
-              >
-                Whisper small
-              </a>{" "}
-              and an{" "}
-              <a
-                onClick={() =>
-                  open(
-                    "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
-                  )
-                }
-                style={{ cursor: "pointer" }}
-              >
-                Embedding model
-              </a>
-              ) to handle basic tasks on all devices.
-              <br></br>
-              <br></br>
-              When it comes to LLMs, it depends on you having either an Apple
-              Silicon mac (M1 or better) or an nVidia GPU (with enough vRAM).
-              <br></br>
-              <br></br>
-              Alternatively, you can configure ADAM to out-source LLM tasks to a
-              remote API. If you unsure, you can select "None" now and add,
-              remove or change model settings later-on in the <b>AI tab</b>.
-            </j-text>
           </j-flex>
 
           <j-flex gap="400" style={{ padding: "0 10px" }}>
@@ -594,18 +552,43 @@ const Login = () => {
               style={{ marginTop: 30, maxWidth: 350 }}
             >
               <j-text>
-                This will download{" "}
-                <a
-                  onClick={() =>
-                    open(
-                      "https://huggingface.co/bartowski/Qwen2.5.1-Coder-7B-Instruct-GGUF"
-                    )
-                  }
-                  style={{ cursor: "pointer" }}
-                >
-                  Qwen2.5 Coder 7B Instruct
-                </a>
-                {" "}(4.68GB)
+                This will download
+                <p>
+                  <a
+                    onClick={() =>
+                      open(
+                        "https://huggingface.co/bartowski/Qwen2.5.1-Coder-7B-Instruct-GGUF"
+                      )
+                    }
+                    style={{ cursor: "pointer" }}
+                  >
+                    Qwen2.5 Coder 7B Instruct (4.68GB)
+                  </a>
+                </p>
+                and
+                <p>
+                  <a
+                    onClick={() =>
+                      open(
+                        "https://huggingface.co/openai/whisper-large-v3-turbo"
+                      )
+                    }
+                    style={{ cursor: "pointer" }}
+                  >Whisper large v3 turbo (809MB)</a>
+                </p>
+                and
+                <p>
+                  <a
+                    onClick={() =>
+                      open(
+                        "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
+                      )
+                    }
+                    style={{ cursor: "pointer" }}
+                  >
+                    Bert Embedding model (90MB)
+                  </a>
+                </p>
               </j-text>
             </j-flex>
           )}
@@ -690,6 +673,33 @@ const Login = () => {
                   )}
                 </j-flex>
 
+                <j-text>
+                  This will still download
+                  <p>
+                    <a
+                      onClick={() =>
+                        open(
+                          "https://huggingface.co/openai/whisper-small"
+                        )
+                      }
+                      style={{ cursor: "pointer" }}
+                    >Whisper small (244MB)</a>
+                  </p>
+                  and
+                  <p>
+                    <a
+                      onClick={() =>
+                        open(
+                          "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
+                        )
+                      }
+                      style={{ cursor: "pointer" }}
+                    >
+                      Bert Embedding model (90MB)
+                    </a>
+                  </p>
+                </j-text>
+
                 {apiValid && (
                   <j-flex direction="column" a="center" gap="400">
                     <j-flex a="center" gap="400">
@@ -730,6 +740,7 @@ const Login = () => {
                       </j-text>
                     )}
                   </j-flex>
+                  
                 )}
 
                 {(!apiModelValid || !apiValid) && (
@@ -768,6 +779,33 @@ const Login = () => {
                 Selecting <b>None</b> here and not having any LLM configured
                 might result in new Synergy features not working in Flux...
               </j-text>
+
+              <j-text>
+                  This will still download
+                  <p>
+                    <a
+                      onClick={() =>
+                        open(
+                          "https://huggingface.co/openai/whisper-small"
+                        )
+                      }
+                      style={{ cursor: "pointer" }}
+                    >Whisper small (244MB)</a>
+                  </p>
+                  and
+                  <p>
+                    <a
+                      onClick={() =>
+                        open(
+                          "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
+                        )
+                      }
+                      style={{ cursor: "pointer" }}
+                    >
+                      Bert Embedding model (90MB)
+                    </a>
+                  </p>
+                </j-text>
             </j-flex>
           )}
 

From 6dd8e90ca2a7148ffb443707c2c27cf3c9be4701 Mon Sep 17 00:00:00 2001
From: Nicolas Luck <nicolas@lucksus.eu>
Date: Wed, 12 Feb 2025 18:51:17 +0100
Subject: [PATCH 6/6] changelog

---
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG b/CHANGELOG
index e9624cf4f..e1a6bee3f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -71,6 +71,7 @@ This project _loosely_ adheres to [Semantic Versioning](https://semver.org/spec/
  - Added ability to handle multiple agents in launcher. [PR#459](https://github.com/coasys/ad4m/pull/459)
  - Added a way to show & add new `AgentInfo` in launcher. [PR#463](https://github.com/coasys/ad4m/pull/463)
  - `ad4m-executor` binary prints capability request challange to stdout to enable app hand-shake [PR#471](https://github.com/coasys/ad4m/pull/471)
+ - Add ability to select Whisper model size [PR#564](https://github.com/coasys/ad4m/pull/564)
 
 ### Changed
  - Much improved ADAM Launcher setup flow [PR#440](https://github.com/coasys/ad4m/pull/440) and [PR#444](https://github.com/coasys/ad4m/pull/444):