Skip to content

Commit e2c8d80

Browse files
fix: integration audit — 6 fixes across LiteLLM, n8n, LangChain, Vercel AI, OpenRouter (#118)
P0 (broken at runtime): - LiteLLM BudgetTracker: replace broken BudgetManager API calls with internal _user_budgets dict for reliable budget tracking - n8n node: replace nonexistent DomainDetector with DomainRouter, remove nonexistent CircuitBreaker (use direct model.invoke) P1 (significant user impact): - LangChain Python + TS: add modern models to MODEL_PRICING (GPT-5, GPT-5-mini, Claude Sonnet 4, Claude Opus 4.5, Claude Haiku 3.5/4.5, DeepSeek) - Vercel AI: forward request.extra.tools in generate() and stream(), extract tool_calls from generate() result - OpenRouter: parse tool call arguments from JSON string to dict with fallback P2 (consistency): - Add openrouter and deepseek to PROVIDER_CAPABILITIES in base.py
1 parent 351f616 commit e2c8d80

8 files changed

Lines changed: 113 additions & 135 deletions

File tree

cascadeflow/integrations/langchain/utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,27 @@
88
# Model pricing per 1M tokens (input/output)
99
MODEL_PRICING: dict[str, dict[str, float]] = {
1010
# OpenAI
11+
"gpt-5": {"input": 10.00, "output": 30.00},
12+
"gpt-5-mini": {"input": 0.20, "output": 0.80},
1113
"gpt-4o-mini": {"input": 0.150, "output": 0.600},
1214
"gpt-4o": {"input": 2.50, "output": 10.00},
1315
"gpt-4-turbo": {"input": 10.00, "output": 30.00},
1416
"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
15-
# Anthropic
17+
# Anthropic (4.x)
18+
"claude-opus-4-5": {"input": 5.00, "output": 25.00},
19+
"claude-sonnet-4": {"input": 3.00, "output": 15.00},
1620
"claude-sonnet-4-5-20250929": {"input": 3.00, "output": 15.00},
21+
"claude-haiku-3-5": {"input": 1.00, "output": 5.00},
22+
"claude-haiku-4-5": {"input": 1.00, "output": 5.00},
23+
# Anthropic (3.x)
1724
"claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
1825
"claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00},
1926
"claude-3-opus-20240229": {"input": 15.00, "output": 75.00},
2027
"claude-3-sonnet-20240229": {"input": 3.00, "output": 15.00},
2128
"claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
29+
# DeepSeek
30+
"deepseek-chat": {"input": 0.14, "output": 0.28},
31+
"deepseek-coder": {"input": 0.14, "output": 0.28},
2232
}
2333

2434

cascadeflow/integrations/litellm.py

Lines changed: 45 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,7 @@ def __init__(self, fallback_to_cascadeflow: bool = True):
601601
self.fallback_to_cascadeflow = fallback_to_cascadeflow
602602
self.budget_manager = None
603603
self.cost_provider = LiteLLMCostProvider()
604+
self._user_budgets: dict[str, dict] = {}
604605

605606
if BUDGET_MANAGER_AVAILABLE:
606607
self.budget_manager = BudgetManager(project_name="cascadeflow")
@@ -633,11 +634,18 @@ def set_user_budget(self, user: str, max_budget: float) -> None:
633634
Example:
634635
>>> tracker.set_user_budget("user_123", max_budget=10.0)
635636
"""
637+
self._user_budgets[user] = {
638+
"max_budget": max_budget,
639+
"current_cost": 0.0,
640+
}
641+
636642
if self.budget_manager:
637-
self.budget_manager.create_budget(user=user, max_budget=max_budget)
638-
logger.info(f"Set budget for {user}: ${max_budget:.2f}")
639-
else:
640-
logger.warning(f"Cannot set budget for {user} - BudgetManager unavailable")
643+
try:
644+
self.budget_manager.create_budget(user=user, total_budget=max_budget)
645+
except Exception as e:
646+
logger.debug(f"BudgetManager.create_budget failed for {user}: {e}")
647+
648+
logger.info(f"Set budget for {user}: ${max_budget:.2f}")
641649

642650
def update_cost(
643651
self,
@@ -677,51 +685,36 @@ def update_cost(
677685
... response=api_response
678686
... )
679687
"""
680-
if self.budget_manager:
681-
try:
682-
# If we have actual API response, use it
683-
if response:
684-
cost = self.budget_manager.update_cost(completion_obj=response, user=user)
685-
else:
686-
# Calculate cost from tokens
687-
cost = self.cost_provider.calculate_cost(
688-
model=model,
689-
input_tokens=prompt_tokens,
690-
output_tokens=completion_tokens,
691-
)
692-
693-
# Update budget manager
694-
self.budget_manager.update_cost(user=user, cost=cost)
695-
696-
logger.debug(f"Updated cost for {user}: ${cost:.6f}")
697-
return cost
698-
699-
except Exception as e:
700-
logger.error(f"Error updating cost for {user}: {e}")
701-
# Fall through to fallback
702-
703-
# Fallback to cascadeflow CostTracker
704-
if self.fallback_to_cascadeflow and hasattr(self, "cost_tracker"):
688+
# Calculate cost from tokens or response
689+
if response:
690+
cost = self.cost_provider.calculate_cost(
691+
model=model,
692+
input_tokens=prompt_tokens,
693+
output_tokens=completion_tokens,
694+
)
695+
else:
705696
cost = self.cost_provider.calculate_cost(
706697
model=model,
707698
input_tokens=prompt_tokens,
708699
output_tokens=completion_tokens,
709700
)
701+
702+
# Track in internal budget dict
703+
if user in self._user_budgets:
704+
self._user_budgets[user]["current_cost"] += cost
705+
706+
# Also track in cascadeflow CostTracker if available
707+
if self.fallback_to_cascadeflow and hasattr(self, "cost_tracker") and self.cost_tracker:
710708
self.cost_tracker.add_cost(
711709
model=model,
712710
provider="",
713711
tokens=prompt_tokens + completion_tokens,
714712
cost=cost,
715713
user_id=user,
716714
)
717-
return cost
718715

719-
# Just calculate cost without tracking
720-
return self.cost_provider.calculate_cost(
721-
model=model,
722-
input_tokens=prompt_tokens,
723-
output_tokens=completion_tokens,
724-
)
716+
logger.debug(f"Updated cost for {user}: ${cost:.6f}")
717+
return cost
725718

726719
def get_user_budget(self, user: str) -> dict:
727720
"""
@@ -742,23 +735,19 @@ def get_user_budget(self, user: str) -> dict:
742735
>>> print(f"Spent: ${info['current_cost']:.2f}")
743736
>>> print(f"Remaining: ${info['remaining']:.2f}")
744737
"""
745-
if self.budget_manager:
746-
try:
747-
budget = self.budget_manager.get_budget(user)
748-
749-
max_budget = budget.get("max_budget", 0)
750-
current_cost = budget.get("current_cost", 0)
751-
remaining = max_budget - current_cost
752-
exceeded = current_cost > max_budget
753-
754-
return {
755-
"max_budget": max_budget,
756-
"current_cost": current_cost,
757-
"remaining": remaining,
758-
"exceeded": exceeded,
759-
}
760-
except Exception as e:
761-
logger.error(f"Error getting budget for {user}: {e}")
738+
budget = self._user_budgets.get(user)
739+
if budget:
740+
max_budget = budget["max_budget"]
741+
current_cost = budget["current_cost"]
742+
remaining = max_budget - current_cost
743+
exceeded = current_cost > max_budget
744+
745+
return {
746+
"max_budget": max_budget,
747+
"current_cost": current_cost,
748+
"remaining": remaining,
749+
"exceeded": exceeded,
750+
}
762751

763752
return {
764753
"max_budget": 0,
@@ -805,12 +794,9 @@ def reset_user_budget(self, user: str) -> None:
805794
Example:
806795
>>> tracker.reset_user_budget("user_123")
807796
"""
808-
if self.budget_manager:
809-
try:
810-
self.budget_manager.reset_cost(user=user)
811-
logger.info(f"Reset budget for {user}")
812-
except Exception as e:
813-
logger.error(f"Error resetting budget for {user}: {e}")
797+
if user in self._user_budgets:
798+
self._user_budgets[user]["current_cost"] = 0.0
799+
logger.info(f"Reset budget for {user}")
814800

815801

816802
# ============================================================================

cascadeflow/providers/base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,6 +1623,20 @@ def _calculate_confidence_legacy(
16231623
"max_top_logprobs": 20,
16241624
"has_cost_tracking": True,
16251625
},
1626+
"openrouter": {
1627+
"supports_logprobs": True,
1628+
"supports_streaming": True,
1629+
"supports_tools": True,
1630+
"max_top_logprobs": 20,
1631+
"has_cost_tracking": True,
1632+
},
1633+
"deepseek": {
1634+
"supports_logprobs": False,
1635+
"supports_streaming": True,
1636+
"supports_tools": True,
1637+
"max_top_logprobs": 0,
1638+
"has_cost_tracking": True,
1639+
},
16261640
}
16271641

16281642

cascadeflow/providers/openrouter.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -556,11 +556,19 @@ def _parse_tool_calls(self, choice: dict[str, Any]) -> Optional[list[dict[str, A
556556
tool_calls = []
557557
for tc in raw_tool_calls:
558558
func = tc.get("function", {})
559+
raw_args = func.get("arguments", "{}")
560+
if isinstance(raw_args, str):
561+
try:
562+
parsed_args = json.loads(raw_args)
563+
except (json.JSONDecodeError, TypeError):
564+
parsed_args = raw_args
565+
else:
566+
parsed_args = raw_args
559567
tool_calls.append(
560568
{
561569
"id": tc.get("id", ""),
562570
"name": func.get("name", ""),
563-
"arguments": func.get("arguments", "{}"),
571+
"arguments": parsed_args,
564572
}
565573
)
566574

packages/core/src/providers/vercel-ai.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ export class VercelAISDKProvider extends BaseProvider {
217217
messages,
218218
maxTokens: request.maxTokens,
219219
temperature: request.temperature,
220+
...(request.extra?.tools ? { tools: request.extra.tools } : {}),
220221
...(request.extra?.aiSdkOptions ?? {}),
221222
});
222223

@@ -225,6 +226,7 @@ export class VercelAISDKProvider extends BaseProvider {
225226
model: request.model,
226227
usage: buildUsage(result.usage),
227228
finish_reason: result.finishReason,
229+
tool_calls: (result as any).toolCalls,
228230
raw: result,
229231
};
230232
}
@@ -239,6 +241,7 @@ export class VercelAISDKProvider extends BaseProvider {
239241
messages,
240242
maxTokens: request.maxTokens,
241243
temperature: request.temperature,
244+
...(request.extra?.tools ? { tools: request.extra.tools } : {}),
242245
...(request.extra?.aiSdkOptions ?? {}),
243246
});
244247

packages/integrations/n8n/nodes/CascadeFlowAgent/CascadeFlowAgent.node.ts

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -446,14 +446,6 @@ export class CascadeFlowAgent implements INodeType {
446446
default: true,
447447
description: 'Whether to route complex queries directly to the verifier',
448448
},
449-
{
450-
displayName: 'Enable Circuit Breaker',
451-
name: 'useCircuitBreaker',
452-
type: 'boolean',
453-
default: true,
454-
description:
455-
'Whether to use circuit breaker for fault tolerance (auto-fallback on repeated failures)',
456-
},
457449
{
458450
displayName: 'Max Tool Iterations',
459451
name: 'maxIterations',
@@ -513,7 +505,6 @@ export class CascadeFlowAgent implements INodeType {
513505
const useAlignmentScoring = this.getNodeParameter('useAlignmentScoring', 0, true) as boolean;
514506
const useComplexityRouting = this.getNodeParameter('useComplexityRouting', 0, true) as boolean;
515507
const useComplexityThresholds = this.getNodeParameter('useComplexityThresholds', 0, true) as boolean;
516-
const useCircuitBreaker = this.getNodeParameter('useCircuitBreaker', 0, true) as boolean;
517508
const maxIterations = this.getNodeParameter('maxIterations', 0, 3) as number;
518509

519510
const confidenceThresholds = useComplexityThresholds
@@ -575,7 +566,6 @@ export class CascadeFlowAgent implements INodeType {
575566
[],
576567
new Map(),
577568
new Map(),
578-
useCircuitBreaker,
579569
confidenceThresholds
580570
);
581571

0 commit comments

Comments
 (0)