Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/crewai/src/crewai/llms/providers/anthropic/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1580,10 +1580,12 @@ def _extract_anthropic_token_usage(
usage = response.usage
input_tokens = getattr(usage, "input_tokens", 0)
output_tokens = getattr(usage, "output_tokens", 0)
cache_read_tokens = getattr(usage, "cache_read_input_tokens", 0) or 0
return {
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"total_tokens": input_tokens + output_tokens,
"cached_prompt_tokens": cache_read_tokens,
}
return {"total_tokens": 0}

Expand Down
15 changes: 14 additions & 1 deletion lib/crewai/src/crewai/llms/providers/azure/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,9 @@ def _prepare_completion_params(
"stream": self.stream,
}

model_extras: dict[str, Any] = {}
if self.stream:
params["model_extras"] = {"stream_options": {"include_usage": True}}
model_extras["stream_options"] = {"include_usage": True}

if response_model and self.is_openai_model:
model_description = generate_model_description(response_model)
Expand Down Expand Up @@ -464,6 +465,13 @@ def _prepare_completion_params(
params["tools"] = self._convert_tools_for_interference(tools)
params["tool_choice"] = "auto"

prompt_cache_key = self.additional_params.get("prompt_cache_key")
if prompt_cache_key:
model_extras["prompt_cache_key"] = prompt_cache_key

if model_extras:
params["model_extras"] = model_extras

additional_params = self.additional_params
additional_drop_params = additional_params.get("additional_drop_params")
drop_params = additional_params.get("drop_params")
Expand Down Expand Up @@ -1063,10 +1071,15 @@ def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]:
"""Extract token usage from Azure response."""
if hasattr(response, "usage") and response.usage:
usage = response.usage
cached_tokens = 0
prompt_details = getattr(usage, "prompt_tokens_details", None)
if prompt_details:
cached_tokens = getattr(prompt_details, "cached_tokens", 0) or 0
return {
"prompt_tokens": getattr(usage, "prompt_tokens", 0),
"completion_tokens": getattr(usage, "completion_tokens", 0),
"total_tokens": getattr(usage, "total_tokens", 0),
"cached_prompt_tokens": cached_tokens,
}
return {"total_tokens": 0}

Expand Down
2 changes: 2 additions & 0 deletions lib/crewai/src/crewai/llms/providers/gemini/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,11 +1295,13 @@ def _extract_token_usage(response: GenerateContentResponse) -> dict[str, Any]:
"""Extract token usage from Gemini response."""
if response.usage_metadata:
usage = response.usage_metadata
cached_tokens = getattr(usage, "cached_content_token_count", 0) or 0
return {
"prompt_token_count": getattr(usage, "prompt_token_count", 0),
"candidates_token_count": getattr(usage, "candidates_token_count", 0),
"total_token_count": getattr(usage, "total_token_count", 0),
"total_tokens": getattr(usage, "total_token_count", 0),
"cached_prompt_tokens": cached_tokens,
}
return {"total_tokens": 0}

Expand Down
30 changes: 18 additions & 12 deletions lib/crewai/src/crewai/llms/providers/openai/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1094,11 +1094,7 @@ def _handle_streaming_responses(
if reasoning_items:
self._last_reasoning_items = reasoning_items
if event.response and event.response.usage:
usage = {
"prompt_tokens": event.response.usage.input_tokens,
"completion_tokens": event.response.usage.output_tokens,
"total_tokens": event.response.usage.total_tokens,
}
usage = self._extract_responses_token_usage(event.response)
self._track_token_usage_internal(usage)

# If parse_tool_outputs is enabled, return structured result
Expand Down Expand Up @@ -1222,11 +1218,7 @@ async def _ahandle_streaming_responses(
if reasoning_items:
self._last_reasoning_items = reasoning_items
if event.response and event.response.usage:
usage = {
"prompt_tokens": event.response.usage.input_tokens,
"completion_tokens": event.response.usage.output_tokens,
"total_tokens": event.response.usage.total_tokens,
}
usage = self._extract_responses_token_usage(event.response)
self._track_token_usage_internal(usage)

# If parse_tool_outputs is enabled, return structured result
Expand Down Expand Up @@ -1310,11 +1302,18 @@ def _extract_function_calls_from_response(
def _extract_responses_token_usage(self, response: Response) -> dict[str, Any]:
"""Extract token usage from Responses API response."""
if response.usage:
return {
result = {
"prompt_tokens": response.usage.input_tokens,
"completion_tokens": response.usage.output_tokens,
"total_tokens": response.usage.total_tokens,
}
# Extract cached prompt tokens from input_tokens_details
input_details = getattr(response.usage, "input_tokens_details", None)
if input_details:
result["cached_prompt_tokens"] = (
getattr(input_details, "cached_tokens", 0) or 0
)
return result
return {"total_tokens": 0}

def _extract_builtin_tool_outputs(self, response: Response) -> ResponsesAPIResult:
Expand Down Expand Up @@ -2264,11 +2263,18 @@ def _extract_openai_token_usage(
"""Extract token usage from OpenAI ChatCompletion or ChatCompletionChunk response."""
if hasattr(response, "usage") and response.usage:
usage = response.usage
return {
result = {
"prompt_tokens": getattr(usage, "prompt_tokens", 0),
"completion_tokens": getattr(usage, "completion_tokens", 0),
"total_tokens": getattr(usage, "total_tokens", 0),
}
# Extract cached prompt tokens from prompt_tokens_details
prompt_details = getattr(usage, "prompt_tokens_details", None)
if prompt_details:
result["cached_prompt_tokens"] = (
getattr(prompt_details, "cached_tokens", 0) or 0
)
return result
return {"total_tokens": 0}

def _format_messages(self, messages: str | list[LLMMessage]) -> list[LLMMessage]:
Expand Down
Loading
Loading