[router] add minmax m2 reasoning parser (#13137)

slin1237 · web-flow · commit 3cdec20c6b34 · 2025-11-12T18:27:05.000+09:00
diff --git a/sgl-router/src/reasoning_parser/factory.rs b/sgl-router/src/reasoning_parser/factory.rs
@@ -10,7 +10,7 @@ use tokio::sync::Mutex;
 
 use crate::reasoning_parser::{
     parsers::{
-        BaseReasoningParser, DeepSeekR1Parser, Glm45Parser, KimiParser, Qwen3Parser,
+        BaseReasoningParser, DeepSeekR1Parser, Glm45Parser, KimiParser, MiniMaxParser, Qwen3Parser,
         QwenThinkingParser, Step3Parser,
     },
     traits::{ParseError, ParserConfig, ReasoningParser},
@@ -189,6 +189,9 @@ impl ParserFactory {
         // Register Step3 parser (same format as DeepSeek-R1 but separate for debugging)
         registry.register_parser("step3", || Box::new(Step3Parser::new()));
 
+        // Register MiniMax parser (appends <think> token at the beginning)
+        registry.register_parser("minimax", || Box::new(MiniMaxParser::new()));
+
         // Register model patterns
         registry.register_pattern("deepseek-r1", "deepseek_r1");
         registry.register_pattern("qwen3-thinking", "qwen3_thinking");
@@ -198,6 +201,9 @@ impl ParserFactory {
         registry.register_pattern("glm45", "glm45");
         registry.register_pattern("kimi", "kimi");
         registry.register_pattern("step3", "step3");
+        registry.register_pattern("minimax", "minimax");
+        registry.register_pattern("minimax-m2", "minimax");
+        registry.register_pattern("mm-m2", "minimax");
 
         Self { registry }
     }
@@ -330,6 +336,17 @@ mod tests {
         assert_eq!(glm45.model_type(), "glm45");
     }
 
+    #[test]
+    fn test_minimax_model() {
+        let factory = ParserFactory::new();
+        let minimax = factory.create("minimax-m2").unwrap();
+        assert_eq!(minimax.model_type(), "minimax");
+
+        // Also test alternate patterns
+        let mm = factory.create("mm-m2-chat").unwrap();
+        assert_eq!(mm.model_type(), "minimax");
+    }
+
     #[tokio::test]
     async fn test_pooled_parser_reuse() {
         let factory = ParserFactory::new();
diff --git a/sgl-router/src/reasoning_parser/mod.rs b/sgl-router/src/reasoning_parser/mod.rs
@@ -4,7 +4,7 @@ pub mod traits;
 
 pub use factory::{ParserFactory, ParserRegistry, PooledParser};
 pub use parsers::{
-    BaseReasoningParser, DeepSeekR1Parser, Glm45Parser, KimiParser, Qwen3Parser,
+    BaseReasoningParser, DeepSeekR1Parser, Glm45Parser, KimiParser, MiniMaxParser, Qwen3Parser,
     QwenThinkingParser, Step3Parser,
 };
 pub use traits::{ParseError, ParserConfig, ParserResult, ReasoningParser};
diff --git a/sgl-router/src/reasoning_parser/parsers/minimax.rs b/sgl-router/src/reasoning_parser/parsers/minimax.rs
@@ -0,0 +1,166 @@
+// MiniMax M2 specific reasoning parser.
+// This parser automatically appends <think> token at the beginning of text,
+// similar to the Python MiniMaxAppendThinkDetector.
+
+use crate::reasoning_parser::{
+    parsers::BaseReasoningParser,
+    traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
+};
+
+/// MiniMax M2 reasoning parser.
+///
+/// This parser automatically appends <think> token at the beginning of the first chunk
+/// and uses <think> and </think> tokens for reasoning blocks.
+pub struct MiniMaxParser {
+    base: BaseReasoningParser,
+    is_first_chunk: bool,
+}
+
+impl MiniMaxParser {
+    /// Create a new MiniMax M2 parser.
+    pub fn new() -> Self {
+        let config = ParserConfig {
+            think_start_token: "<think>".to_string(),
+            think_end_token: "</think>".to_string(),
+            stream_reasoning: true,
+            max_buffer_size: 65536,
+            initial_in_reasoning: false, // Start with false, we'll add <think> manually
+        };
+
+        Self {
+            base: BaseReasoningParser::new(config).with_model_type("minimax".to_string()),
+            is_first_chunk: true,
+        }
+    }
+}
+
+impl Default for MiniMaxParser {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl ReasoningParser for MiniMaxParser {
+    fn detect_and_parse_reasoning(&mut self, text: &str) -> Result<ParserResult, ParseError> {
+        // For one-shot parsing, prepend <think> token to the text
+        let modified_text = format!("<think>{}", text);
+        self.base.detect_and_parse_reasoning(&modified_text)
+    }
+
+    fn parse_reasoning_streaming_incremental(
+        &mut self,
+        text: &str,
+    ) -> Result<ParserResult, ParseError> {
+        // For the first chunk, prepend <think> token
+        let modified_text = if self.is_first_chunk {
+            self.is_first_chunk = false;
+            format!("<think>{}", text)
+        } else {
+            text.to_string()
+        };
+
+        self.base
+            .parse_reasoning_streaming_incremental(&modified_text)
+    }
+
+    fn reset(&mut self) {
+        self.base.reset();
+        self.is_first_chunk = true; // Reset the first chunk flag
+    }
+
+    fn model_type(&self) -> &str {
+        self.base.model_type()
+    }
+
+    fn is_in_reasoning(&self) -> bool {
+        self.base.is_in_reasoning()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_minimax_append_think_oneshot() {
+        let mut parser = MiniMaxParser::new();
+
+        // Should automatically prepend <think> and parse as reasoning
+        let result = parser
+            .detect_and_parse_reasoning("reasoning content</think>normal content")
+            .unwrap();
+        assert_eq!(result.normal_text, "normal content");
+        assert_eq!(result.reasoning_text, "reasoning content");
+    }
+
+    #[test]
+    fn test_minimax_without_end_token() {
+        let mut parser = MiniMaxParser::new();
+
+        // Should treat all content as reasoning when no end token
+        let result = parser
+            .detect_and_parse_reasoning("all reasoning content")
+            .unwrap();
+        assert_eq!(result.normal_text, "");
+        assert_eq!(result.reasoning_text, "all reasoning content");
+    }
+
+    #[test]
+    fn test_minimax_streaming_first_chunk() {
+        let mut parser = MiniMaxParser::new();
+
+        // First chunk should have <think> prepended
+        let result1 = parser
+            .parse_reasoning_streaming_incremental("thinking about")
+            .unwrap();
+        assert_eq!(result1.reasoning_text, "thinking about");
+        assert_eq!(result1.normal_text, "");
+
+        // Second chunk should not have <think> prepended
+        let result2 = parser
+            .parse_reasoning_streaming_incremental(" the problem</think>answer")
+            .unwrap();
+        assert_eq!(result2.reasoning_text, "the problem"); // Text is trimmed
+        assert_eq!(result2.normal_text, "answer");
+    }
+
+    #[test]
+    fn test_minimax_reset() {
+        let mut parser = MiniMaxParser::new();
+
+        // First use
+        let result1 = parser
+            .parse_reasoning_streaming_incremental("first")
+            .unwrap();
+        assert_eq!(result1.reasoning_text, "first");
+
+        // Reset the parser
+        parser.reset();
+
+        // After reset, should be first chunk again
+        let result2 = parser
+            .parse_reasoning_streaming_incremental("second")
+            .unwrap();
+        assert_eq!(result2.reasoning_text, "second");
+    }
+
+    #[test]
+    fn test_minimax_already_has_think() {
+        let mut parser = MiniMaxParser::new();
+
+        // Even if text already has <think>, it will add another one
+        // This mimics the Python behavior
+        let result = parser
+            .detect_and_parse_reasoning("<think>content</think>answer")
+            .unwrap();
+        // The double <think> gets handled by the base parser which removes duplicates
+        assert_eq!(result.normal_text, "answer");
+        assert_eq!(result.reasoning_text, "content");
+    }
+
+    #[test]
+    fn test_model_type() {
+        let parser = MiniMaxParser::new();
+        assert_eq!(parser.model_type(), "minimax");
+    }
+}
diff --git a/sgl-router/src/reasoning_parser/parsers/mod.rs b/sgl-router/src/reasoning_parser/parsers/mod.rs
@@ -2,12 +2,14 @@ pub mod base;
 pub mod deepseek_r1;
 pub mod glm45;
 pub mod kimi;
+pub mod minimax;
 pub mod qwen3;
 pub mod step3;
 
 pub use base::BaseReasoningParser;
 pub use deepseek_r1::DeepSeekR1Parser;
 pub use glm45::Glm45Parser;
 pub use kimi::KimiParser;
+pub use minimax::MiniMaxParser;
 pub use qwen3::{Qwen3Parser, QwenThinkingParser};
 pub use step3::Step3Parser;