Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libs/agno/agno/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,7 @@ def _populate_assistant_message(
# Add usage metrics if provided
if provider_response.response_usage is not None:
assistant_message.metrics += provider_response.response_usage
assistant_message.metrics.calculate_cost(self.id)

return assistant_message

Expand Down Expand Up @@ -1322,6 +1323,7 @@ def _populate_assistant_message_from_stream_data(
assistant_message.role = stream_data.response_role
if stream_data.response_metrics is not None:
assistant_message.metrics = stream_data.response_metrics
assistant_message.metrics.calculate_cost(self.id)
if stream_data.response_content:
assistant_message.content = stream_data.response_content
if stream_data.response_reasoning_content:
Expand Down
75 changes: 64 additions & 11 deletions libs/agno/agno/models/metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import asdict, dataclass
from typing import Any, Dict, Optional

from agno.utils.pricing import PricingConfig
from agno.utils.timer import Timer


Expand All @@ -25,12 +26,17 @@ class Metrics:
# Tokens employed in reasoning
reasoning_tokens: int = 0

# -- Cost Estimates --
input_cost: Optional[float] = None
output_cost: Optional[float] = None
cache_read_cost: Optional[float] = None
cache_write_cost: Optional[float] = None
total_cost: Optional[float] = None
currency: Optional[str] = None

# Time metrics
# Internal timer utility for tracking execution time
timer: Optional[Timer] = None
# Time from run start to first token generation, in seconds
time_to_first_token: Optional[float] = None
# Total run time, in seconds
duration: Optional[float] = None

# Provider-specific metrics
Expand All @@ -39,20 +45,60 @@ class Metrics:
# Any additional metrics
additional_metrics: Optional[dict] = None

def calculate_cost(self, model_id: str):
"""Populate cost metrics based on the PricingConfig registry."""
costs = PricingConfig.calculate_cost(
model=model_id,
input_tokens=self.input_tokens,
output_tokens=self.output_tokens,
cache_read_tokens=self.cache_read_tokens,
cache_write_tokens=self.cache_write_tokens,
)
if costs:
self.input_cost = costs["input_cost"]
self.output_cost = costs["output_cost"]
self.cache_read_cost = costs["cache_read_cost"]
self.cache_write_cost = costs["cache_write_cost"]
self.total_cost = costs["total_cost"]
self.currency = costs["currency"]

def to_dict(self) -> Dict[str, Any]:
metrics_dict = asdict(self)
# Remove the timer util if present
metrics_dict.pop("timer", None)
metrics_dict = {
k: v
for k, v in metrics_dict.items()
if v is not None and (not isinstance(v, (int, float)) or v != 0) and (not isinstance(v, dict) or len(v) > 0)
}
return metrics_dict

cleaned_dict = {}
for k, v in metrics_dict.items():
# Skip None, Zero values (except 0 if it's not a boolean check), or empty dicts
if (
v is not None
and (not isinstance(v, (int, float)) or v != 0)
and (not isinstance(v, dict) or len(v) > 0)
):
# Format costs to avoid scientific notation (e.g. 2.6e-05 -> "0.000026")
if "cost" in k and isinstance(v, float):
cleaned_dict[k] = f"{v:.10f}".rstrip("0").rstrip(".")
else:
cleaned_dict[k] = v
if self.total_cost is not None and self.currency:
cleaned_dict["currency"] = self.currency
return cleaned_dict

def _sum_optional_floats(self, val1: Optional[float], val2: Optional[float]) -> Optional[float]:
"""Helper to sum costs safely."""
if val1 is None and val2 is None:
return None
return (val1 or 0.0) + (val2 or 0.0)

def __add__(self, other: "Metrics") -> "Metrics":
# Create new instance of the same type as self
result_class = type(self)

# Calculate sums for costs
new_input_cost = self._sum_optional_floats(self.input_cost, other.input_cost)
new_output_cost = self._sum_optional_floats(self.output_cost, other.output_cost)
new_cache_read_cost = self._sum_optional_floats(self.cache_read_cost, other.cache_read_cost)
new_cache_write_cost = self._sum_optional_floats(self.cache_write_cost, other.cache_write_cost)
new_total_cost = self._sum_optional_floats(self.total_cost, other.total_cost)
new_currency = self.currency or other.currency
result = result_class(
input_tokens=self.input_tokens + other.input_tokens,
output_tokens=self.output_tokens + other.output_tokens,
Expand All @@ -63,6 +109,13 @@ def __add__(self, other: "Metrics") -> "Metrics":
cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens,
cache_write_tokens=self.cache_write_tokens + other.cache_write_tokens,
reasoning_tokens=self.reasoning_tokens + other.reasoning_tokens,
# Pass calculated costs
input_cost=new_input_cost,
output_cost=new_output_cost,
cache_read_cost=new_cache_read_cost,
cache_write_cost=new_cache_write_cost,
total_cost=new_total_cost,
currency=new_currency,
)

# Handle provider_metrics
Expand Down
80 changes: 80 additions & 0 deletions libs/agno/agno/utils/pricing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from typing import Any, Dict, Optional, Union


class PricingConfig:
"""Singleton registry for model pricing."""

# Storage format: {"model_id": {"input": price_per_m, "output": price_per_m, "cache_read": price_per_m, "cache_write": price_per_m, "currency": str}}
# Use Any for inner dict values because "currency" is a str while other keys are floats
_prices: Dict[str, Dict[str, Any]] = {}

@classmethod
def set_price(
cls,
model: Union[str, Any],
input_price_per_million: float,
output_price_per_million: float,
cache_read_price_per_million: Optional[float] = None,
cache_write_price_per_million: Optional[float] = None,
currency: str = "USD",
):
"""
Set pricing for a specific model (USD per 1 million tokens).

Args:
model: Model identifier (e.g., "gpt-5.1")
input_token_price: Price per 1M input tokens
output_token_price: Price per 1M output tokens
cache_read_price_per_million: Price per 1M cached input tokens (optional)
cache_write_price_per_million: Price per 1M cached write tokens (optional)
currency: Currency code for display (default: "USD")

"""
# Auto-extract ID if a Model object is passed
model_id = model
if hasattr(model, "id"):
model_id = model.id
elif not isinstance(model, str):
# Fallback for unexpected objects that aren't strings and don't have .id
model_id = str(model)
cls._prices[model_id] = {
"input": input_price_per_million / 1_000_000,
"output": output_price_per_million / 1_000_000,
"cache_read": (cache_read_price_per_million or 0.0) / 1_000_000,
"cache_write": (cache_write_price_per_million or 0.0) / 1_000_000,
"currency": currency,
}

@classmethod
def get_price(cls, model: str) -> Optional[Dict[str, Any]]:
return cls._prices.get(model)

@classmethod
def calculate_cost(
cls,
model: str,
input_tokens: int,
output_tokens: int,
cache_read_tokens: int = 0,
cache_write_tokens: int = 0,
) -> Optional[Dict[str, Any]]:
"""Returns None if pricing not set, otherwise returns dict with costs."""
price = cls.get_price(model)
if not price:
return None

input_cost = input_tokens * price["input"]
output_cost = output_tokens * price["output"]
cache_read_cost = cache_read_tokens * price["cache_read"]
cache_write_cost = cache_write_tokens * price["cache_write"]

total_cost = input_cost + output_cost + cache_read_cost + cache_write_cost

return {
"input_cost": input_cost,
"output_cost": output_cost,
"cache_read_cost": cache_read_cost,
"cache_write_cost": cache_write_cost,
"total_cost": total_cost,
"currency": price["currency"],
}