|
| 1 | +//! LLM-driven memory ranking via sidequery. |
| 2 | +//! |
| 3 | +//! Gated behind the `mem-ranker` Cargo feature. Uses a lightweight LLM |
| 4 | +//! call to select the most relevant memories from a manifest. |
| 5 | +
|
| 6 | +use std::future::Future; |
| 7 | +use std::pin::Pin; |
| 8 | +use std::sync::Arc; |
| 9 | + |
| 10 | +use crab_api::LlmBackend; |
| 11 | +use crab_api::types::MessageRequest; |
| 12 | +use crab_core::message::Message; |
| 13 | +use crab_core::model::ModelId; |
| 14 | + |
| 15 | +use crate::relevance::{MemoryRanker, format_manifest}; |
| 16 | +use crate::store::MemoryFile; |
| 17 | + |
| 18 | +const SYSTEM_PROMPT: &str = "\ |
| 19 | +You select memories relevant to the user's query.\n\ |
| 20 | +Return JSON: {\"selected\": [\"file1.md\", \"file2.md\"]}\n\ |
| 21 | +Be selective β only include clearly relevant memories.\n\ |
| 22 | +If nothing matches, return {\"selected\": []}.\ |
| 23 | +"; |
| 24 | + |
| 25 | +/// LLM-driven memory ranker using a sidequery to a fast model. |
| 26 | +pub struct LlmMemoryRanker { |
| 27 | + backend: Arc<LlmBackend>, |
| 28 | + model: ModelId, |
| 29 | +} |
| 30 | + |
| 31 | +impl LlmMemoryRanker { |
| 32 | + /// Create a ranker using the given backend and model. |
| 33 | + pub fn new(backend: Arc<LlmBackend>, model: ModelId) -> Self { |
| 34 | + Self { backend, model } |
| 35 | + } |
| 36 | +} |
| 37 | + |
| 38 | +impl MemoryRanker for LlmMemoryRanker { |
| 39 | + fn rank( |
| 40 | + &self, |
| 41 | + query: &str, |
| 42 | + manifest: &str, |
| 43 | + max_count: usize, |
| 44 | + ) -> Pin<Box<dyn Future<Output = crab_common::Result<Vec<String>>> + Send + '_>> { |
| 45 | + let query = query.to_string(); |
| 46 | + let manifest = manifest.to_string(); |
| 47 | + Box::pin(async move { |
| 48 | + let user_msg = format!( |
| 49 | + "Query: {query}\n\nAvailable memories (select up to {max_count}):\n{manifest}" |
| 50 | + ); |
| 51 | + |
| 52 | + let req = MessageRequest { |
| 53 | + model: self.model.clone(), |
| 54 | + messages: std::borrow::Cow::Owned(vec![Message::user(&user_msg)]), |
| 55 | + system: Some(SYSTEM_PROMPT.to_string()), |
| 56 | + max_tokens: 256, |
| 57 | + tools: vec![], |
| 58 | + temperature: Some(0.0), |
| 59 | + cache_breakpoints: vec![], |
| 60 | + budget_tokens: None, |
| 61 | + response_format: None, |
| 62 | + tool_choice: None, |
| 63 | + }; |
| 64 | + |
| 65 | + let response = self.backend.send_message(req).await.map_err(|e| { |
| 66 | + crab_common::Error::Other(format!("memory ranker LLM call failed: {e}")) |
| 67 | + })?; |
| 68 | + |
| 69 | + let text = response.message.text(); |
| 70 | + parse_ranker_response(&text, &manifest) |
| 71 | + }) |
| 72 | + } |
| 73 | +} |
| 74 | + |
| 75 | +/// Parse the JSON response from the ranker and filter to valid filenames. |
| 76 | +/// |
| 77 | +/// Accepts: `{"selected": ["file1.md", "file2.md"]}` or just the array. |
| 78 | +/// Filters out any filenames not present in the manifest. |
| 79 | +fn parse_ranker_response(response_text: &str, manifest: &str) -> crab_common::Result<Vec<String>> { |
| 80 | + // Try to extract JSON from the response (may be wrapped in markdown code blocks) |
| 81 | + let json_text = extract_json(response_text); |
| 82 | + |
| 83 | + // Try parsing as {"selected": [...]} |
| 84 | + if let Ok(obj) = serde_json::from_str::<serde_json::Value>(&json_text) |
| 85 | + && let Some(arr) = obj.get("selected").and_then(|v| v.as_array()) |
| 86 | + { |
| 87 | + let filenames: Vec<String> = arr |
| 88 | + .iter() |
| 89 | + .filter_map(|v| v.as_str().map(String::from)) |
| 90 | + .collect(); |
| 91 | + return Ok(filter_valid_filenames(&filenames, manifest)); |
| 92 | + } |
| 93 | + |
| 94 | + // Try parsing as a plain array |
| 95 | + if let Ok(arr) = serde_json::from_str::<Vec<String>>(&json_text) { |
| 96 | + return Ok(filter_valid_filenames(&arr, manifest)); |
| 97 | + } |
| 98 | + |
| 99 | + // Couldn't parse β return empty (graceful degradation, no error) |
| 100 | + Ok(Vec::new()) |
| 101 | +} |
| 102 | + |
| 103 | +/// Extract JSON from text that may be wrapped in markdown code fences. |
| 104 | +fn extract_json(text: &str) -> String { |
| 105 | + let text = text.trim(); |
| 106 | + // Strip ```json ... ``` wrapper |
| 107 | + if let Some(start) = text.find('{') |
| 108 | + && let Some(end) = text.rfind('}') |
| 109 | + { |
| 110 | + return text[start..=end].to_string(); |
| 111 | + } |
| 112 | + if let Some(start) = text.find('[') |
| 113 | + && let Some(end) = text.rfind(']') |
| 114 | + { |
| 115 | + return text[start..=end].to_string(); |
| 116 | + } |
| 117 | + text.to_string() |
| 118 | +} |
| 119 | + |
| 120 | +/// Keep only filenames that appear in the manifest text. |
| 121 | +fn filter_valid_filenames(filenames: &[String], manifest: &str) -> Vec<String> { |
| 122 | + filenames |
| 123 | + .iter() |
| 124 | + .filter(|f| manifest.contains(f.as_str())) |
| 125 | + .cloned() |
| 126 | + .collect() |
| 127 | +} |
| 128 | + |
| 129 | +/// Select memories using LLM ranking, falling back to keyword scoring on error. |
| 130 | +pub async fn select_with_ranker( |
| 131 | + ranker: &LlmMemoryRanker, |
| 132 | + memories: &[MemoryFile], |
| 133 | + query: &str, |
| 134 | + max_count: usize, |
| 135 | +) -> Vec<MemoryFile> { |
| 136 | + let manifest = format_manifest(memories); |
| 137 | + |
| 138 | + match ranker.rank(query, &manifest, max_count).await { |
| 139 | + Ok(selected_filenames) if !selected_filenames.is_empty() => { |
| 140 | + // Return memories matching selected filenames, preserving ranker order |
| 141 | + selected_filenames |
| 142 | + .iter() |
| 143 | + .filter_map(|name| memories.iter().find(|m| m.filename == *name)) |
| 144 | + .cloned() |
| 145 | + .collect() |
| 146 | + } |
| 147 | + _ => { |
| 148 | + // Fallback to keyword scoring |
| 149 | + let selector = crate::relevance::MemorySelector { |
| 150 | + max_memories: max_count, |
| 151 | + ..Default::default() |
| 152 | + }; |
| 153 | + selector |
| 154 | + .select_by_keywords(memories, query) |
| 155 | + .into_iter() |
| 156 | + .map(|s| s.file) |
| 157 | + .collect() |
| 158 | + } |
| 159 | + } |
| 160 | +} |
| 161 | + |
| 162 | +// ββ Tests βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 163 | + |
| 164 | +#[cfg(test)] |
| 165 | +mod tests { |
| 166 | + use super::*; |
| 167 | + |
| 168 | + const SAMPLE_MANIFEST: &str = "\ |
| 169 | +user_role.md β Senior Rust developer [user]\n\ |
| 170 | +feedback_style.md β Prefers terse responses [feedback]\n\ |
| 171 | +project_auth.md β Working on auth rewrite [project]\n\ |
| 172 | +"; |
| 173 | + |
| 174 | + #[test] |
| 175 | + fn parse_valid_json_object() { |
| 176 | + let response = r#"{"selected": ["user_role.md", "feedback_style.md"]}"#; |
| 177 | + let result = parse_ranker_response(response, SAMPLE_MANIFEST).unwrap(); |
| 178 | + assert_eq!(result, vec!["user_role.md", "feedback_style.md"]); |
| 179 | + } |
| 180 | + |
| 181 | + #[test] |
| 182 | + fn parse_json_in_code_fence() { |
| 183 | + let response = "```json\n{\"selected\": [\"user_role.md\"]}\n```"; |
| 184 | + let result = parse_ranker_response(response, SAMPLE_MANIFEST).unwrap(); |
| 185 | + assert_eq!(result, vec!["user_role.md"]); |
| 186 | + } |
| 187 | + |
| 188 | + #[test] |
| 189 | + fn parse_plain_array() { |
| 190 | + let response = r#"["user_role.md", "project_auth.md"]"#; |
| 191 | + let result = parse_ranker_response(response, SAMPLE_MANIFEST).unwrap(); |
| 192 | + assert_eq!(result, vec!["user_role.md", "project_auth.md"]); |
| 193 | + } |
| 194 | + |
| 195 | + #[test] |
| 196 | + fn parse_empty_selected() { |
| 197 | + let response = r#"{"selected": []}"#; |
| 198 | + let result = parse_ranker_response(response, SAMPLE_MANIFEST).unwrap(); |
| 199 | + assert!(result.is_empty()); |
| 200 | + } |
| 201 | + |
| 202 | + #[test] |
| 203 | + fn parse_invalid_json_returns_empty() { |
| 204 | + let response = "I don't know what to select"; |
| 205 | + let result = parse_ranker_response(response, SAMPLE_MANIFEST).unwrap(); |
| 206 | + assert!(result.is_empty()); |
| 207 | + } |
| 208 | + |
| 209 | + #[test] |
| 210 | + fn filter_hallucinated_filenames() { |
| 211 | + let response = r#"{"selected": ["user_role.md", "nonexistent.md", "fake.md"]}"#; |
| 212 | + let result = parse_ranker_response(response, SAMPLE_MANIFEST).unwrap(); |
| 213 | + assert_eq!(result, vec!["user_role.md"]); |
| 214 | + } |
| 215 | + |
| 216 | + #[test] |
| 217 | + fn extract_json_from_text() { |
| 218 | + assert_eq!(extract_json(" {\"a\": 1} "), "{\"a\": 1}"); |
| 219 | + assert_eq!(extract_json("```json\n[1,2]\n```"), "[1,2]"); |
| 220 | + assert_eq!(extract_json("plain text"), "plain text"); |
| 221 | + } |
| 222 | +} |
0 commit comments