Skip to content
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
bc3cc01
Update
Mustafa-Esoofally Dec 3, 2025
517f2d7
Update
Mustafa-Esoofally Dec 3, 2025
22ab15d
Update
Mustafa-Esoofally Dec 3, 2025
7011c06
Update
Mustafa-Esoofally Dec 4, 2025
8a43877
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 4, 2025
4c73db2
update
Mustafa-Esoofally Dec 4, 2025
6f43ed5
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 4, 2025
ff1e84a
update
Mustafa-Esoofally Dec 4, 2025
f6e7200
update
Mustafa-Esoofally Dec 4, 2025
259b5a7
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 4, 2025
8728502
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 5, 2025
3e51b13
Update
Mustafa-Esoofally Dec 5, 2025
2fec0e9
Update
Mustafa-Esoofally Dec 5, 2025
4269391
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 5, 2025
131f190
Update tests
Mustafa-Esoofally Dec 5, 2025
5d1ed33
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 5, 2025
5f09d4b
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 8, 2025
6458a30
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 8, 2025
be4e3c1
Update
Mustafa-Esoofally Dec 8, 2025
0f17f6d
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 8, 2025
d568ff0
Update
Mustafa-Esoofally Dec 8, 2025
4dc5a2b
Update tests and resolve comments
Mustafa-Esoofally Dec 8, 2025
5e7dbeb
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 9, 2025
bb73ed7
Update libs/agno/agno/utils/tokens.py
Mustafa-Esoofally Dec 9, 2025
7f4498e
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 9, 2025
c4a74aa
update ouput_schema tokens
Mustafa-Esoofally Dec 9, 2025
b38b84b
Merge branch 'main' into feat/improve-token-counting
Mustafa-Esoofally Dec 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
This example shows how to set a context token based limit for tool call compression.
Run: `python cookbook/agents/context_compression/token_based_tool_call_compression.py`
"""

from agno.agent import Agent
from agno.compression.manager import CompressionManager
from agno.db.sqlite import SqliteDb
from agno.models.openai import OpenAIChat
from agno.tools.duckduckgo import DuckDuckGoTools

compression_prompt = """
You are a compression expert. Your goal is to compress web search results for a competitive intelligence analyst.

YOUR GOAL: Extract only actionable competitive insights while being extremely concise.

MUST PRESERVE:
- Competitor names and specific actions (product launches, partnerships, acquisitions, pricing changes)
- Exact numbers (revenue, market share, growth rates, pricing, headcount)
- Precise dates (announcement dates, launch dates, deal dates)
- Direct quotes from executives or official statements
- Funding rounds and valuations

MUST REMOVE:
- Company history and background information
- General industry trends (unless competitor-specific)
- Analyst opinions and speculation (keep only facts)
- Detailed product descriptions (keep only key differentiators and pricing)
- Marketing fluff and promotional language

OUTPUT FORMAT:
Return a bullet-point list where each line follows this format:
"[Company Name] - [Date]: [Action/Event] ([Key Numbers/Details])"

Keep it under 200 words total. Be ruthlessly concise. Facts only.

Example:
- Acme Corp - Mar 15, 2024: Launched AcmeGPT at $99/user/month, targeting enterprise market
- TechCo - Feb 10, 2024: Acquired DataStart for $150M, gaining 500 enterprise customers
"""

compression_manager = CompressionManager(
model=OpenAIChat(id="gpt-5-mini"),
compress_token_limit=5000,
compress_tool_call_instructions=compression_prompt,
)

agent = Agent(
model=OpenAIChat(id="gpt-4o-mini"),
tools=[DuckDuckGoTools()],
description="Specialized in tracking competitor activities",
instructions="Use the search tools and always use the latest information and data.",
db=SqliteDb(db_file="tmp/dbs/token_based_tool_call_compression.db"),
compression_manager=compression_manager,
add_history_to_context=True, # Add history to context
num_history_runs=3,
session_id="token_based_tool_call_compression",
)

agent.print_response(
"""
Use the search tools and always use the latest information and data.
Research recent activities (last 3 months) for these AI companies:

1. OpenAI - product launches, partnerships, pricing
2. Anthropic - new features, enterprise deals, funding
3. Google DeepMind - research breakthroughs, product releases
4. Meta AI - open source releases, research papers

For each, find specific actions with dates and numbers.""",
stream=True,
)
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

compression_manager = CompressionManager(
model=OpenAIChat(id="gpt-4o-mini"),
compress_tool_results_limit=1,
compress_tool_results_limit=4,
compress_tool_call_instructions=compression_prompt,
)

Expand Down
1 change: 0 additions & 1 deletion libs/agno/agno/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5552,7 +5552,6 @@ def _handle_model_response_chunk(
run_response.files = []
run_response.files.append(file_obj)


reasoning_step: Optional[ReasoningStep] = None

tool_executions_list = model_response_event.tool_executions
Expand Down
95 changes: 80 additions & 15 deletions libs/agno/agno/compression/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,29 +46,54 @@

@dataclass
class CompressionManager:
model: Optional[Model] = None
model: Optional[Model] = None # model used for compression
compress_tool_results: bool = True
compress_tool_results_limit: int = 3
compress_tool_results_limit: Optional[int] = None
compress_token_limit: Optional[int] = None
compress_tool_call_instructions: Optional[str] = None

stats: Dict[str, Any] = field(default_factory=dict)

def __post_init__(self):
if self.compress_tool_results_limit is None and self.compress_token_limit is None:
self.compress_tool_results_limit = 3

def _is_tool_result_message(self, msg: Message) -> bool:
return msg.role == "tool"

def should_compress(self, messages: List[Message]) -> bool:
def should_compress(
self,
messages: List[Message],
tools: Optional[List] = None,
model: Optional[Model] = None,
) -> bool:
"""Check if tool results should be compressed.

Args:
messages: List of messages to check.
tools: Optional list of tools for token counting.
model: The Agent / Team model.
"""
if not self.compress_tool_results:
return False

uncompressed_tools_count = len(
[m for m in messages if self._is_tool_result_message(m) and m.compressed_content is None]
)
should_compress = uncompressed_tools_count >= self.compress_tool_results_limit

if should_compress:
log_info(f"Tool call compression threshold hit. Compressing {uncompressed_tools_count} tool results")
# Token-based threshold check
if self.compress_token_limit is not None and model is not None:
tokens = model.count_tokens(messages, tools)
if tokens >= self.compress_token_limit:
log_info(f"Token limit hit: {tokens} >= {self.compress_token_limit}")
return True

# Count-based threshold check
if self.compress_tool_results_limit is not None:
uncompressed_tools_count = len(
[m for m in messages if self._is_tool_result_message(m) and m.compressed_content is None]
)
if uncompressed_tools_count >= self.compress_tool_results_limit:
log_info(f"Tool count limit hit: {uncompressed_tools_count} >= {self.compress_tool_results_limit}")
return True

return should_compress
return False

def _compress_tool_result(self, tool_result: Message) -> Optional[str]:
if not tool_result:
Expand Down Expand Up @@ -112,14 +137,51 @@ def compress(self, messages: List[Message]) -> None:
compressed = self._compress_tool_result(tool_msg)
if compressed:
tool_msg.compressed_content = compressed
# Track stats
self.stats["messages_compressed"] = self.stats.get("messages_compressed", 0) + 1
# Count actual tool results (Gemini combines multiple in one message)
tool_results_count = len(tool_msg.tool_calls) if tool_msg.tool_calls else 1
self.stats["tool_results_compressed"] = (
self.stats.get("tool_results_compressed", 0) + tool_results_count
)
self.stats["original_size"] = self.stats.get("original_size", 0) + original_len
self.stats["compressed_size"] = self.stats.get("compressed_size", 0) + len(compressed)
else:
log_warning(f"Compression failed for {tool_msg.tool_name}")

# * Async methods *#
async def ashould_compress(
self,
messages: List[Message],
tools: Optional[List] = None,
model: Optional[Model] = None,
) -> bool:
"""Async check if tool results should be compressed.

Args:
messages: List of messages to check.
tools: Optional list of tools for token counting.
model: The Agent / Team model.
"""
if not self.compress_tool_results:
return False

# Token-based threshold check
if self.compress_token_limit is not None and model is not None:
tokens = await model.acount_tokens(messages, tools)
if tokens >= self.compress_token_limit:
log_info(f"Token limit hit: {tokens} >= {self.compress_token_limit}")
return True

# Count-based threshold check
if self.compress_tool_results_limit is not None:
uncompressed_tools_count = len(
[m for m in messages if self._is_tool_result_message(m) and m.compressed_content is None]
)
if uncompressed_tools_count >= self.compress_tool_results_limit:
log_info(f"Tool count limit hit: {uncompressed_tools_count} >= {self.compress_tool_results_limit}")
return True

return False

async def _acompress_tool_result(self, tool_result: Message) -> Optional[str]:
"""Async compress a single tool result"""
if not tool_result:
Expand Down Expand Up @@ -168,8 +230,11 @@ async def acompress(self, messages: List[Message]) -> None:
for msg, compressed, original_len in zip(uncompressed_tools, results, original_sizes):
if compressed:
msg.compressed_content = compressed
# Track stats
self.stats["messages_compressed"] = self.stats.get("messages_compressed", 0) + 1
# Count actual tool results (Gemini combines multiple in one message)
tool_results_count = len(msg.tool_calls) if msg.tool_calls else 1
self.stats["tool_results_compressed"] = (
self.stats.get("tool_results_compressed", 0) + tool_results_count
)
self.stats["original_size"] = self.stats.get("original_size", 0) + original_len
self.stats["compressed_size"] = self.stats.get("compressed_size", 0) + len(compressed)
else:
Expand Down
5 changes: 3 additions & 2 deletions libs/agno/agno/media.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
from uuid import uuid4
from agno.utils.log import log_error

from pydantic import BaseModel, field_validator, model_validator

from agno.utils.log import log_error


class Image(BaseModel):
"""Unified Image class for all use cases (input, output, artifacts)"""
Expand Down Expand Up @@ -397,7 +398,7 @@ def from_base64(
format: Optional[str] = None,
) -> "File":
"""Create File from base64 encoded content or plain text.

Handles both base64-encoded binary content and plain text content
(which is stored as UTF-8 strings for text/* MIME types).
"""
Expand Down
7 changes: 3 additions & 4 deletions libs/agno/agno/memory/strategies/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from agno.db.schemas import UserMemory
from agno.models.base import Model
from agno.utils.tokens import count_tokens as count_text_tokens
from agno.utils.tokens import count_text_tokens


class MemoryOptimizationStrategy(ABC):
Expand Down Expand Up @@ -60,8 +60,7 @@ def count_tokens(self, memories: List[UserMemory]) -> int:

Args:
memories: List of UserMemory objects

Returns:
Total token count using tiktoken (or fallback estimation)
Total token count
"""
return sum(count_text_tokens(mem.memory or "") for mem in memories)
return sum(count_text_tokens(m.memory or "") for m in memories)
41 changes: 41 additions & 0 deletions libs/agno/agno/models/anthropic/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from agno.models.metrics import Metrics
from agno.models.response import ModelResponse
from agno.run.agent import RunOutput
from agno.tools.function import Function
from agno.utils.http import get_default_async_client, get_default_sync_client
from agno.utils.log import log_debug, log_error, log_warning
from agno.utils.models.claude import MCPServerConfiguration, format_messages, format_tools_for_model
Expand Down Expand Up @@ -399,6 +400,46 @@ def get_async_client(self) -> AsyncAnthropicClient:
self.async_client = AsyncAnthropicClient(**_client_params)
return self.async_client

def count_tokens(
self,
messages: List[Message],
tools: Optional[List[Union[Function, Dict[str, Any]]]] = None,
) -> int:
anthropic_messages, system_prompt = format_messages(messages, compress_tool_results=True)
anthropic_tools = None
if tools:
formatted_tools = self._format_tools(tools)
anthropic_tools = format_tools_for_model(formatted_tools)

kwargs: Dict[str, Any] = {"messages": anthropic_messages, "model": self.id}
if system_prompt:
kwargs["system"] = system_prompt
if anthropic_tools:
kwargs["tools"] = anthropic_tools

response = self.get_client().messages.count_tokens(**kwargs)
return response.input_tokens

async def acount_tokens(
self,
messages: List[Message],
tools: Optional[List[Union[Function, Dict[str, Any]]]] = None,
) -> int:
anthropic_messages, system_prompt = format_messages(messages, compress_tool_results=True)
anthropic_tools = None
if tools:
formatted_tools = self._format_tools(tools)
anthropic_tools = format_tools_for_model(formatted_tools)

kwargs: Dict[str, Any] = {"messages": anthropic_messages, "model": self.id}
if system_prompt:
kwargs["system"] = system_prompt
if anthropic_tools:
kwargs["tools"] = anthropic_tools

response = await self.get_async_client().messages.count_tokens(**kwargs)
return response.input_tokens

def get_request_params(
self,
response_format: Optional[Union[Dict, Type[BaseModel]]] = None,
Expand Down
53 changes: 53 additions & 0 deletions libs/agno/agno/models/aws/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,59 @@ def _format_messages(
# TODO: Add caching: https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-call.html
return formatted_messages, system_message

def count_tokens(
self,
messages: List[Message],
tools: Optional[List[Dict[str, Any]]] = None,
) -> int:
try:
formatted_messages, system_message = self._format_messages(messages, compress_tool_results=True)
converse_input: Dict[str, Any] = {"messages": formatted_messages}
if system_message:
converse_input["system"] = system_message

response = self.get_client().count_tokens(modelId=self.id, input={"converse": converse_input})
tokens = response.get("inputTokens", 0)

# Count tool tokens
if tools:
from agno.utils.tokens import count_tool_tokens

includes_system = any(m.role == "system" for m in messages)
tokens += count_tool_tokens(tools, self.id, includes_system)

return tokens
except Exception as e:
log_warning(f"Failed to count tokens via Bedrock API: {e}")
return super().count_tokens(messages, tools)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we just use this? probably the same counting mechanism, as it should just depend on the model encoding?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The token counting logic won't work for our Claude models

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why cant we use the count_tokens fn of the base Claude class: libs/agno/agno/models/anthropic/claude.py ?

Copy link
Contributor Author

@Mustafa-Esoofally Mustafa-Esoofally Dec 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bedrock supports other Non-Anthropic models as well so count_tokens fn of the base Claude class won't work? Also I am not sure if token couting is same here because Claude has intelligent caching


async def acount_tokens(
self,
messages: List[Message],
tools: Optional[List[Dict[str, Any]]] = None,
) -> int:
try:
formatted_messages, system_message = self._format_messages(messages, compress_tool_results=True)
converse_input: Dict[str, Any] = {"messages": formatted_messages}
if system_message:
converse_input["system"] = system_message

async with self.get_async_client() as client:
response = await client.count_tokens(modelId=self.id, input={"converse": converse_input})
tokens = response.get("inputTokens", 0)

# Count tool tokens
if tools:
from agno.utils.tokens import count_tool_tokens

includes_system = any(m.role == "system" for m in messages)
tokens += count_tool_tokens(tools, self.id, includes_system)

return tokens
except Exception as e:
log_warning(f"Failed to count tokens via Bedrock API: {e}")
return await super().acount_tokens(messages, tools)

def invoke(
self,
messages: List[Message],
Expand Down
Loading