coasys · lucksus · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025 · Feb 12, 2025
diff --git a/CHANGELOG b/CHANGELOG
@@ -71,6 +71,7 @@ This project _loosely_ adheres to [Semantic Versioning](https://semver.org/spec/
  - Added ability to handle multiple agents in launcher. [PR#459](https://github.com/coasys/ad4m/pull/459)
  - Added a way to show & add new `AgentInfo` in launcher. [PR#463](https://github.com/coasys/ad4m/pull/463)
  - `ad4m-executor` binary prints capability request challange to stdout to enable app hand-shake [PR#471](https://github.com/coasys/ad4m/pull/471)
+ - Add ability to select Whisper model size [PR#564](https://github.com/coasys/ad4m/pull/564)
 
 ### Changed
  - Much improved ADAM Launcher setup flow [PR#440](https://github.com/coasys/ad4m/pull/440) and [PR#444](https://github.com/coasys/ad4m/pull/444):

diff --git a/rust-executor/src/ai_service/mod.rs b/rust-executor/src/ai_service/mod.rs
@@ -212,7 +212,7 @@ impl AIService {
         match model.model_type {
             ModelType::Llm => self.spawn_llm_model(model, None).await?,
             ModelType::Embedding => self.spawn_embedding_model(model).await,
-            ModelType::Transcription => Self::load_transcriber_model(&model).await,
+            ModelType::Transcription => Self::load_transcriber_model(model.id.clone()).await,
         };
         Ok(())
     }
@@ -868,7 +868,66 @@ impl AIService {
     // Whisper / Transcription
     // -------------------------------------
 
-    pub async fn open_transcription_stream(&self, _model_id: String) -> Result<String> {
+    fn whisper_string_to_model(whisper_string: String) -> Result<WhisperSource> {
+        match whisper_string.as_str() {
+            "whisper_tiny" => Ok(WhisperSource::Tiny),
+            "whisper_tiny_quantized" => Ok(WhisperSource::QuantizedTiny),
+            "whisper_tiny_en" => Ok(WhisperSource::TinyEn),
+            "whisper_tiny_en_quantized" => Ok(WhisperSource::QuantizedTinyEn),
+            "whisper_base" => Ok(WhisperSource::Base),
+            "whisper_base_en" => Ok(WhisperSource::BaseEn),
+            "whisper_small" => Ok(WhisperSource::Small),
+            "whisper_small_en" => Ok(WhisperSource::SmallEn),
+            "whisper_medium" => Ok(WhisperSource::Medium),
+            "whisper_medium_en" => Ok(WhisperSource::MediumEn),
+            "whisper_medium_en_quantized_distil" => Ok(WhisperSource::QuantizedDistilMediumEn),
+            "whisper_large" => Ok(WhisperSource::Large),
+            "whisper_large_v2" => Ok(WhisperSource::LargeV2),
+            "whisper_distil_medium_en" => Ok(WhisperSource::DistilMediumEn),
+            "whisper_distil_large_v2" => Ok(WhisperSource::DistilLargeV2),
+            "whisper_distil_large_v3" => Ok(WhisperSource::DistilLargeV3),
+            "whisper_distil_large_v3_quantized" => Ok(WhisperSource::QuantizedDistilLargeV3),
+            "whisper_large_v3_turbo_quantized" => Ok(WhisperSource::QuantizedLargeV3Turbo),
+            _ => Err(anyhow!("Unknown whisper model: {}", whisper_string)),
+        }
+    }
+
+    fn get_whisper_model_size(model_id: String) -> Result<WhisperSource> {
+        // Try to treat string as model size string first
+        if let Ok(model) = Self::whisper_string_to_model(model_id.clone()) {
+            return Ok(model);
+        }
+
+        // Try to get model from DB by ID
+        if let Ok(Some(model)) = Ad4mDb::with_global_instance(|db| db.get_model(model_id.clone())) {
+            if model.model_type != ModelType::Transcription {
+                return Err(anyhow!("Model '{}' is not a transcription model", model_id));
+            }
+            // Use filename from local model config
+            if let Some(local) = model.local {
+                return Self::whisper_string_to_model(local.file_name);
+            }
+        }
+
+        // if nothing above works, see if we have a transcription model in the DB and use that
+        // Try to find first transcription model in DB
+        if let Ok(models) = Ad4mDb::with_global_instance(|db| db.get_models()) {
+            if let Some(model) = models
+                .into_iter()
+                .find(|m| m.model_type == ModelType::Transcription)
+            {
+                if let Some(local) = model.local {
+                    return Self::whisper_string_to_model(local.file_name);
+                }
+            }
+        }
+
+        // Default to tiny if nothing found
+        Ok(WhisperSource::Tiny)
+    }
+
+    pub async fn open_transcription_stream(&self, model_id: String) -> Result<String> {
+        let model_size = Self::get_whisper_model_size(model_id)?;
         let stream_id = uuid::Uuid::new_v4().to_string();
         let stream_id_clone = stream_id.clone();
         let (samples_tx, samples_rx) = futures_channel::mpsc::unbounded::<Vec<f32>>();
@@ -881,7 +940,7 @@ impl AIService {
 
             rt.block_on(async {
                 let maybe_model = WhisperBuilder::default()
-                    .with_source(WHISPER_MODEL)
+                    .with_source(model_size)
                     .with_device(Self::new_candle_device())
                     .build()
                     .await;
@@ -974,22 +1033,25 @@ impl AIService {
         }
     }
 
-    async fn load_transcriber_model(model: &crate::types::Model) {
-        let id = &model.id;
-        publish_model_status(id.clone(), 0.0, "Loading", false, false).await;
+    async fn load_transcriber_model(model_id: String) {
+        publish_model_status(model_id.clone(), 0.0, "Loading", false, false).await;
+
+        let model_size = Self::get_whisper_model_size(model_id.clone())
+            .ok()
+            .unwrap_or(WHISPER_MODEL);
 
         let _ = WhisperBuilder::default()
-            .with_source(WHISPER_MODEL)
+            .with_source(model_size)
             .with_device(Self::new_candle_device())
             .build_with_loading_handler({
-                let name = id.clone();
+                let name = model_id.clone();
                 move |progress| {
                     tokio::spawn(handle_progress(name.clone(), progress));
                 }
             })
             .await;
 
-        publish_model_status(id.clone(), 100.0, "Loaded", true, false).await;
+        publish_model_status(model_id.clone(), 100.0, "Loaded", true, false).await;
     }
 
     pub async fn update_model(&self, model_id: String, model_config: ModelInput) -> Result<()> {
@@ -1060,7 +1122,7 @@ impl AIService {
                 // TODO: Handle embedding model updates
             }
             ModelType::Transcription => {
-                // TODO: Handle transcription model updates
+                Self::load_transcriber_model(updated_model.id.clone()).await;
             }
         }
 

diff --git a/ui/src/components/Login.tsx b/ui/src/components/Login.tsx
@@ -155,15 +155,13 @@ const Login = () => {
   }
 
   async function saveModels() {
+    let whisperModel = "whisper_small";
     // add llm model
     if (aiMode !== "None") {
       const llm = { name: "LLM Model 1", modelType: "LLM" } as ModelInput;
       if (aiMode === "Local") {
-        llm.local = {
-          fileName: "Qwen2.5.1-Coder-7B-Instruct",
-          tokenizerSource: "",
-          modelParameters: "",
-        };
+        llm.local = { fileName: "Qwen2.5.1-Coder-7B-Instruct" };
+        whisperModel = "whisper_large_v3_turbo_quantized";
       } else {
         llm.api = {
           baseUrl: apiUrl,
@@ -179,23 +177,16 @@ const Login = () => {
     // add embedding model
     client!.ai.addModel({
       name: "bert",
-      local: {
-        fileName: "bert",
-        tokenizerSource: "",
-        modelParameters: "",
-      },
+      local: { fileName: "bert" },
       modelType: "EMBEDDING",
     });
-    // add transcription model
+    // add medium whisper model
     client!.ai.addModel({
-      name: "Transcription Model 1",
-      local: {
-        fileName: "whisper",
-        tokenizerSource: "",
-        modelParameters: "",
-      },
+      name: "Whisper",
+      local: { fileName: whisperModel },
       modelType: "TRANSCRIPTION",
     });
+
     setCurrentIndex(6);
   }
 
@@ -493,39 +484,6 @@ const Login = () => {
               Is your computer capabale of running Large Language Models
               locally?
             </j-text>
-            <j-text>
-              Regardless of your choice here, we will always download and use
-              small AI models (such as{" "}
-              <a
-                onClick={() =>
-                  open("https://huggingface.co/openai/whisper-small")
-                }
-                style={{ cursor: "pointer" }}
-              >
-                Whisper small
-              </a>{" "}
-              and an{" "}
-              <a
-                onClick={() =>
-                  open(
-                    "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
-                  )
-                }
-                style={{ cursor: "pointer" }}
-              >
-                Embedding model
-              </a>
-              ) to handle basic tasks on all devices.
-              <br></br>
-              <br></br>
-              When it comes to LLMs, it depends on you having either an Apple
-              Silicon mac (M1 or better) or an nVidia GPU (with enough vRAM).
-              <br></br>
-              <br></br>
-              Alternatively, you can configure ADAM to out-source LLM tasks to a
-              remote API. If you unsure, you can select "None" now and add,
-              remove or change model settings later-on in the <b>AI tab</b>.
-            </j-text>
           </j-flex>
 
           <j-flex gap="400" style={{ padding: "0 10px" }}>
@@ -594,18 +552,43 @@ const Login = () => {
               style={{ marginTop: 30, maxWidth: 350 }}
             >
               <j-text>
-                This will download{" "}
-                <a
-                  onClick={() =>
-                    open(
-                      "https://huggingface.co/bartowski/Qwen2.5.1-Coder-7B-Instruct-GGUF"
-                    )
-                  }
-                  style={{ cursor: "pointer" }}
-                >
-                  Qwen2.5 Coder 7B Instruct
-                </a>
-                {" "}(4.68GB)
+                This will download
+                <p>
+                  <a
+                    onClick={() =>
+                      open(
+                        "https://huggingface.co/bartowski/Qwen2.5.1-Coder-7B-Instruct-GGUF"
+                      )
+                    }
+                    style={{ cursor: "pointer" }}
+                  >
+                    Qwen2.5 Coder 7B Instruct (4.68GB)
+                  </a>
+                </p>
+                and
+                <p>
+                  <a
+                    onClick={() =>
+                      open(
+                        "https://huggingface.co/openai/whisper-large-v3-turbo"
+                      )
+                    }
+                    style={{ cursor: "pointer" }}
+                  >Whisper large v3 turbo (809MB)</a>
+                </p>
+                and
+                <p>
+                  <a
+                    onClick={() =>
+                      open(
+                        "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
+                      )
+                    }
+                    style={{ cursor: "pointer" }}
+                  >
+                    Bert Embedding model (90MB)
+                  </a>
+                </p>
               </j-text>
             </j-flex>
           )}
@@ -690,6 +673,33 @@ const Login = () => {
                   )}
                 </j-flex>
 
+                <j-text>
+                  This will still download
+                  <p>
+                    <a
+                      onClick={() =>
+                        open(
+                          "https://huggingface.co/openai/whisper-small"
+                        )
+                      }
+                      style={{ cursor: "pointer" }}
+                    >Whisper small (244MB)</a>
+                  </p>
+                  and
+                  <p>
+                    <a
+                      onClick={() =>
+                        open(
+                          "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
+                        )
+                      }
+                      style={{ cursor: "pointer" }}
+                    >
+                      Bert Embedding model (90MB)
+                    </a>
+                  </p>
+                </j-text>
+
                 {apiValid && (
                   <j-flex direction="column" a="center" gap="400">
                     <j-flex a="center" gap="400">
@@ -730,6 +740,7 @@ const Login = () => {
                       </j-text>
                     )}
                   </j-flex>
+
                 )}
 
                 {(!apiModelValid || !apiValid) && (
@@ -768,6 +779,33 @@ const Login = () => {
                 Selecting <b>None</b> here and not having any LLM configured
                 might result in new Synergy features not working in Flux...
               </j-text>
+
+              <j-text>
+                  This will still download
+                  <p>
+                    <a
+                      onClick={() =>
+                        open(
+                          "https://huggingface.co/openai/whisper-small"
+                        )
+                      }
+                      style={{ cursor: "pointer" }}
+                    >Whisper small (244MB)</a>
+                  </p>
+                  and
+                  <p>
+                    <a
+                      onClick={() =>
+                        open(
+                          "https://huggingface.co/Snowflake/snowflake-arctic-embed-xs"
+                        )
+                      }
+                      style={{ cursor: "pointer" }}
+                    >
+                      Bert Embedding model (90MB)
+                    </a>
+                  </p>
+                </j-text>
             </j-flex>
           )}
 

diff --git a/ui/src/components/ModelCard.tsx b/ui/src/components/ModelCard.tsx
@@ -78,6 +78,11 @@ export default function ModelCard(props: {
               </j-button>
             </j-flex>
           )}
+          {modelType == "TRANSCRIPTION" && (
+            <j-button variant="subtle" size="sm" onClick={editModel}>
+              <j-icon name="pencil-square" size="sm" />
+            </j-button>
+          )}
         </j-flex>
       </j-box>
       <j-flex direction="column" gap="400">