diff --git a/cascadeflow/agent.py b/cascadeflow/agent.py index b448a568..c4e888f6 100644 --- a/cascadeflow/agent.py +++ b/cascadeflow/agent.py @@ -90,6 +90,10 @@ # Phase 4: Tool complexity routing (NEW - v19) from .rules import RuleContext, RuleEngine +# Phase 3: Tool routing +# Phase 2A: Routing module imports +# Phase 3.2: Domain detection (NEW) +# Phase 4: Tool complexity routing (NEW - v19) # Phase 3: Tool routing # Phase 2A: Routing module imports # Phase 3.2: Domain detection (NEW) diff --git a/cascadeflow/integrations/openclaw/openai_server.py b/cascadeflow/integrations/openclaw/openai_server.py index 5e09995f..ddd4a30f 100644 --- a/cascadeflow/integrations/openclaw/openai_server.py +++ b/cascadeflow/integrations/openclaw/openai_server.py @@ -30,6 +30,18 @@ oc_logger = logging.getLogger("cascadeflow.openclaw") +_DEFAULT_SENTINELS = ("NO_REPLY",) + + +def _strip_sentinel(content: str, sentinels: tuple[str, ...]) -> str: + """Strip known sentinel patterns from content. Returns empty string if only sentinels.""" + if not content: + return content + cleaned = content.strip() + for sentinel in sentinels: + cleaned = cleaned.replace(sentinel, "") + return cleaned.strip() + def _to_openai_tool_calls( tool_calls: list[dict[str, Any]], @@ -403,7 +415,9 @@ def _handle_chat(self, server: OpenClawOpenAIServer, payload: dict[str, Any]) -> result=result, total_ms=total_ms, ) - log_decision(trace) + # Skip ghost traces where cascade wasn't initialised (null models) + if trace.get("draft", {}).get("model"): + log_decision(trace) accepted = meta.get("draft_accepted", False) model_used = getattr(result, "model_used", "unknown") oc_logger.info( @@ -614,11 +628,11 @@ async def _produce() -> None: if "error" in error_box: self.log_error("Streaming error: %s", error_box["error"]) - full_content = "".join(chunk_parts) + full_content = _strip_sentinel("".join(chunk_parts), _DEFAULT_SENTINELS) if not full_content: completion_content = completion_result.get("content") if isinstance(completion_content, str): - full_content = completion_content + full_content = _strip_sentinel(completion_content, _DEFAULT_SENTINELS) # If no content was produced and there's an upstream error, send an # error event so the client gets a meaningful failure instead of an @@ -759,7 +773,9 @@ async def _produce() -> None: decision_data=captured_decision, total_ms=total_ms, ) - log_decision(trace) + # Skip ghost traces where cascade wasn't initialised (null models) + if trace.get("draft", {}).get("model"): + log_decision(trace) accepted = captured_decision.get("accepted", False) model_used = completion_result.get("model_used", "unknown") total_cost = completion_result.get("total_cost", 0.0) @@ -885,9 +901,11 @@ def _extract_upstream_error(result) -> dict[str, Any] | None: def _has_content(result) -> bool: - """Check if a result has non-empty content.""" + """Check if a result has non-empty content (excluding sentinels).""" content = getattr(result, "content", None) - return isinstance(content, str) and bool(content.strip()) + if not isinstance(content, str) or not content.strip(): + return False + return bool(_strip_sentinel(content, _DEFAULT_SENTINELS)) def _run_agent( @@ -1094,16 +1112,19 @@ def _build_openai_response(model: str, result) -> dict[str, Any]: content = getattr(result, "content", "") or "" if not isinstance(content, str): content = str(content) + content = _strip_sentinel(content, _DEFAULT_SENTINELS) # Never return an empty assistant message if we have usable content in metadata. # This can happen when an upstream verifier returns only reasoning output. if not tool_calls and not content.strip(): for source_key in ("verifier_response", "draft_response"): candidate = meta.get(source_key) - if isinstance(candidate, str) and candidate.strip(): - meta.setdefault("openclaw_content_fallback", source_key) - content = candidate - break + if isinstance(candidate, str): + candidate = _strip_sentinel(candidate, _DEFAULT_SENTINELS) + if candidate.strip(): + meta.setdefault("openclaw_content_fallback", source_key) + content = candidate + break message: dict[str, Any] = {"role": "assistant", "content": content} if tool_calls: diff --git a/cascadeflow/providers/anthropic.py b/cascadeflow/providers/anthropic.py index 44ba83b5..0b72eaa8 100644 --- a/cascadeflow/providers/anthropic.py +++ b/cascadeflow/providers/anthropic.py @@ -645,6 +645,10 @@ async def complete_with_tools( if extracted_system: payload["system"] = extracted_system + # Add system prompt if extracted from messages + if extracted_system: + payload["system"] = extracted_system + # Add tools if provided if anthropic_tools: payload["tools"] = anthropic_tools diff --git a/cascadeflow/utils/presets.py b/cascadeflow/utils/presets.py index 959e50db..969d86ef 100644 --- a/cascadeflow/utils/presets.py +++ b/cascadeflow/utils/presets.py @@ -189,6 +189,8 @@ def get_cost_optimized_agent( quality_config=quality_config, enable_cascade=enable_cascade, verbose=verbose, + enable_domain_detection=use_hybrid, + use_hybrid=use_hybrid, ) @@ -300,6 +302,8 @@ def get_balanced_agent( quality_config=quality_config, enable_cascade=enable_cascade, verbose=verbose, + enable_domain_detection=use_hybrid, + use_hybrid=use_hybrid, ) @@ -410,6 +414,8 @@ def get_speed_optimized_agent( quality_config=quality_config, enable_cascade=enable_cascade, verbose=verbose, + enable_domain_detection=use_hybrid, + use_hybrid=use_hybrid, ) @@ -524,6 +530,8 @@ def get_quality_optimized_agent( quality_config=quality_config, enable_cascade=enable_cascade, verbose=verbose, + enable_domain_detection=use_hybrid, + use_hybrid=use_hybrid, ) @@ -622,6 +630,8 @@ def get_development_agent( quality_config=quality_config, enable_cascade=enable_cascade, verbose=verbose, + enable_domain_detection=use_hybrid, + use_hybrid=use_hybrid, )