Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
fb5ad1e
✨ feat(core): implement native function calling support for AI Studio
MasuRii Dec 23, 2025
227b1dd
🐛 fix(core): disable google search grounding during function calling …
MasuRii Dec 23, 2025
2103cf8
🚦 test(core): add unit tests and e2e script for native function calling
MasuRii Dec 23, 2025
e3de821
🐛 fix(browser): resolve conflicts between function calling and search…
MasuRii Dec 23, 2025
deca8e5
🐛 fix(function-calling): improve schema conversion for Gemini compati…
MasuRii Dec 23, 2025
3ad2659
✨ feat(function-calling): add native response parsing with dual-mode …
MasuRii Dec 23, 2025
4796ed2
⚡️ feat(function-calling): add toggle state caching to skip redundant…
MasuRii Dec 23, 2025
b000cd8
🐛 fix(function-calling): handle variable nesting depth in wire format…
MasuRii Dec 23, 2025
bfbd6d8
🐛 fix(function-calling): add fallback parsing for text-formatted tool…
MasuRii Dec 24, 2025
f89ecea
🐛 fix(function-calling): resolve 6 emulated FC parsing edge cases
MasuRii Dec 24, 2025
ee2ac83
🐛 fix(function-calling): resolve race conditions in native FC detection
MasuRii Dec 24, 2025
75e158a
🐛 fix(function-calling): handle wire format edge cases with empty arg…
MasuRii Dec 24, 2025
97a3531
🐛 fix(function-calling): resolve AUTO mode fallback and client switch…
MasuRii Dec 24, 2025
441a94e
🐛 fix(function-calling): verify UI toggle state on cache HIT to preve…
MasuRii Dec 24, 2025
9c11e50
✨ feat(logging): add modular FC debug logging system
MasuRii Dec 25, 2025
1099f8a
🐛 fix(function-calling): correct array param parsing for objects in w…
MasuRii Dec 25, 2025
f62fd0d
⚡️ perf(logging): gate all FC debug logs behind FUNCTION_CALLING_DEBU…
MasuRii Dec 25, 2025
b99a3cc
✨ feat(function-calling): add tool name caching and fuzzy matching fo…
MasuRii Dec 25, 2025
e237964
📚 docs: consolidate function calling documentation and cleanup obsole…
MasuRii Dec 25, 2025
b318a55
🐛 fix(function-calling): resolve test suite desync and namespaced too…
MasuRii Dec 25, 2025
4f674da
🐛 fix(ci): resolve extensive linting errors and test suite failures
MasuRii Dec 26, 2025
844e723
Minor Recommended Setting
MasuRii Dec 26, 2025
f6b1490
🐛 fix(stream): deduplicate function calls in native FC wire parsing
MasuRii Dec 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 154 additions & 6 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ AUTO_CONFIRM_LOGIN=true

# Quota Rotation Thresholds (Graceful Rotation)
# Soft Limit: Triggers "Rotation Pending". Current stream finishes, then rotates.
QUOTA_SOFT_LIMIT=450000
QUOTA_SOFT_LIMIT=850000
# Hard Limit: Triggers immediate "Kill Signal" to prevent hard bans.
QUOTA_HARD_LIMIT=550000
QUOTA_HARD_LIMIT=950000

# =============================================================================
# 5. Browser & Model Configuration
Expand Down Expand Up @@ -115,13 +115,13 @@ DEFAULT_TOP_P=0.95
DEFAULT_STOP_SEQUENCES=["User:"]

# Thinking Model Configuration (e.g., gemini-2.0-flash-thinking)
ENABLE_THINKING_BUDGET=false
ENABLE_THINKING_BUDGET=true
DEFAULT_THINKING_BUDGET=8192

# Thinking Budget Level Values (tokens) - Used for "low", "medium", "high" presets
THINKING_BUDGET_LOW=8000
THINKING_BUDGET_MEDIUM=16000
THINKING_BUDGET_HIGH=32000
THINKING_BUDGET_LOW=10923
THINKING_BUDGET_MEDIUM=21845
THINKING_BUDGET_HIGH=32768

# Gemini 3 Default Thinking Levels
DEFAULT_THINKING_LEVEL_PRO=high
Expand All @@ -134,6 +134,154 @@ DISABLE_THINKING_BUDGET_ON_STREAMING_DISABLE=false
ENABLE_GOOGLE_SEARCH=false
ENABLE_URL_CONTEXT=false

# =============================================================================
# Function Calling Configuration (Native vs Emulated)
# =============================================================================
# This configures how OpenAI-compatible tool calls (tools parameter) are handled.
# NOTE: XML-based tools in prompts are NOT affected - they pass through as plain text.

# Function Calling Mode: "auto" | "native" | "emulated"
# - "auto": (RECOMMENDED) Tries native first, auto-fallback to emulated on failure
# - "native": AI Studio UI-driven function calling (best reliability)
# - "emulated": Text-based prompt injection (legacy, backwards compatible)
#
# When to use each:
# - "auto": Best for most users - resilient with automatic fallback
# - "native": When you need maximum reliability and your models support it
# - "emulated": For older setups or if native mode causes issues
FUNCTION_CALLING_MODE=auto

# Enable automatic fallback to emulated mode when native mode fails
# Only applies when FUNCTION_CALLING_MODE=native (auto mode always has fallback)
FUNCTION_CALLING_NATIVE_FALLBACK=true

# Timeout for function calling UI operations (milliseconds)
FUNCTION_CALLING_UI_TIMEOUT=10000

# Native mode retry attempts before fallback
FUNCTION_CALLING_NATIVE_RETRY_COUNT=3

# Clear function definitions between requests (stateless behavior)
# Set to false if you want to reuse tool definitions across requests
FUNCTION_CALLING_CLEAR_BETWEEN_REQUESTS=true

# -----------------------------------------------------------------------------
# Function Calling Debug Logging (Master Switch)
# -----------------------------------------------------------------------------
# When false, ALL function calling logs (console & modular) are disabled.
# Useful for production to minimize noise and disk I/O.
FUNCTION_CALLING_DEBUG=false

# Enable function calling state caching for performance
# Reduces UI operations when same tools are used in subsequent requests
FUNCTION_CALLING_CACHE_ENABLED=true

# Cache TTL in seconds (0 = no expiration within session)
FUNCTION_CALLING_CACHE_TTL=0

# -----------------------------------------------------------------------------
# Modular Per-Component Logging (Active only if FUNCTION_CALLING_DEBUG=true)
# -----------------------------------------------------------------------------
# Fine-grained logging with separate log files in logs/fc_debug/<module>.log
# See docs/architecture/FC_DEBUG_LOGGING_DESIGN.md for full documentation.

# Enable individual modules for targeted debugging. Only enabled modules
# will create log files. All default to false to minimize disk I/O.
#
# ORCHESTRATOR: Mode selection, fallback logic, high-level flow
# UI: Browser UI automation (toggle, dialog, paste)
# CACHE: Cache hits/misses/invalidation
# WIRE: Wire format parsing from network responses
# DOM: DOM-based function call extraction
# SCHEMA: Tool schema conversion and validation
# RESPONSE: Response formatting for OpenAI compatibility

FC_DEBUG_ORCHESTRATOR=false
FC_DEBUG_UI=false
FC_DEBUG_CACHE=false
FC_DEBUG_WIRE=false
FC_DEBUG_DOM=false
FC_DEBUG_SCHEMA=false
FC_DEBUG_RESPONSE=false

# -----------------------------------------------------------------------------
# Per-Module Log Levels
# -----------------------------------------------------------------------------
# Available levels: DEBUG, INFO, WARNING, ERROR, CRITICAL
# Only affects modules that are enabled above.
# Recommendation: Use DEBUG for troubleshooting, INFO for general monitoring.

FC_DEBUG_LEVEL_ORCHESTRATOR=DEBUG
FC_DEBUG_LEVEL_UI=DEBUG
FC_DEBUG_LEVEL_CACHE=DEBUG
FC_DEBUG_LEVEL_WIRE=DEBUG
FC_DEBUG_LEVEL_DOM=DEBUG
FC_DEBUG_LEVEL_SCHEMA=DEBUG
FC_DEBUG_LEVEL_RESPONSE=DEBUG

# -----------------------------------------------------------------------------
# Log File Rotation
# -----------------------------------------------------------------------------
# Configure rotation to prevent unbounded disk usage.
# Max file size in bytes (default: 5MB = 5242880)
# Backup count determines how many rotated files to keep.

FC_DEBUG_LOG_MAX_BYTES=5242880
FC_DEBUG_LOG_BACKUP_COUNT=3

# -----------------------------------------------------------------------------
# Payload Truncation
# -----------------------------------------------------------------------------
# Large payloads (tool definitions, arguments) can clutter logs.
# Enable truncation to keep logs readable while preserving key info.

# Master switch for truncation
FC_DEBUG_TRUNCATE_ENABLED=true

# Maximum characters for different payload types:
# - TOOL_DEF: Tool/function schema definitions (often 10KB+)
# - ARGS: Function call arguments
# - RESPONSE: Response bodies

FC_DEBUG_TRUNCATE_MAX_TOOL_DEF=500
FC_DEBUG_TRUNCATE_MAX_ARGS=1000
FC_DEBUG_TRUNCATE_MAX_RESPONSE=2000

# -----------------------------------------------------------------------------
# Combined Log (Optional)
# -----------------------------------------------------------------------------
# Additionally write all FC debug logs to a single combined file.
# Useful for seeing cross-module request flow in one place.
# File: logs/fc_debug/fc_combined.log

FC_DEBUG_COMBINED_LOG=false

# =============================================================================
# Quick Start Examples:
# =============================================================================
#
# Example 1: Debug cache issues only
# FUNCTION_CALLING_DEBUG=true
# FC_DEBUG_CACHE=true
#
# Example 2: Full FC debugging with combined log
# FUNCTION_CALLING_DEBUG=true
# FC_DEBUG_ORCHESTRATOR=true
# FC_DEBUG_UI=true
# FC_DEBUG_CACHE=true
# FC_DEBUG_WIRE=true
# FC_DEBUG_DOM=true
# FC_DEBUG_SCHEMA=true
# FC_DEBUG_RESPONSE=true
# FC_DEBUG_COMBINED_LOG=true
#
# Example 3: Production monitoring (errors only)
# FUNCTION_CALLING_DEBUG=true
# FC_DEBUG_ORCHESTRATOR=true
# FC_DEBUG_LEVEL_ORCHESTRATOR=ERROR
# =============================================================================


# =============================================================================
# 7. Advanced Timeout Configuration (milliseconds)
# =============================================================================
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,12 @@ At the same time, we sincerely thank every friend who has silently contributed t
## Main Features

- **OpenAI Compatible API**: Supports `/v1/chat/completions` endpoint, fully compatible with OpenAI clients and third-party tools
- **Native Function Calling**: Full OpenAI-compatible `tools`/`tool_calls` support via AI Studio's built-in function calling UI, with automatic fallback to text-based emulation 🆕
- **Three-Layer Streaming Response Mechanism**: Integrated streaming proxy → external Helper service → Playwright page interaction multiple guarantees
- **Smart Model Switching**: Dynamically switch models in AI Studio through the `model` field in API requests
- **Complete Parameter Control**: Supports all major parameters like `temperature`, `max_output_tokens`, `top_p`, `stop`, `reasoning_effort`
- **Anti-Fingerprinting Detection**: Uses Camoufox browser to reduce the risk of being detected as automated scripts
- **Script Injection Feature v3.0**: Uses Playwright native network interception, supports Tampermonkey script dynamic mounting, 100% reliable 🆕
- **Script Injection Feature v3.0**: Uses Playwright native network interception, supports Tampermonkey script dynamic mounting, 100% reliable
- **Modern Web UI**: Built-in testing interface, supports real-time chat, status monitoring, hierarchical API key management
- **Graphical Interface Launcher**: Provides feature-rich GUI launcher, simplifying configuration and process management
- **Flexible Authentication System**: Supports optional API key authentication, fully compatible with OpenAI standard Bearer token format
Expand Down Expand Up @@ -309,10 +310,11 @@ nano .env # or use other editor
#### 🔧 Feature Usage

- [API Usage Guide](docs/guides/api-usage.md) - API endpoints and client configuration
- **[Native Function Calling Guide](docs/guides/native-function-calling.md)** - Complete guide for OpenAI-compatible tool calls 🆕
- **[OpenAI Compatibility Guide](docs/guides/openai-compatibility.md)** - Differences and limitations with OpenAI API 🔄
- **[Client Integration Examples](docs/guides/client-examples.md)** - Python, JavaScript, cURL and other example code 💻
- [Web UI Usage Guide](docs/guides/webui-guide.md) - Web interface feature description
- [Script Injection Guide](docs/guides/script_injection_guide.md) - Tampermonkey script dynamic mounting feature usage guide (v3.0) 🆕
- [Script Injection Guide](docs/guides/script_injection_guide.md) - Tampermonkey script dynamic mounting feature usage guide (v3.0)

#### 🌍 Platform & Deployment

Expand Down
107 changes: 100 additions & 7 deletions api_utils/request_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@
prepare_combined_prompt,
)
from .utils_ext.files import collect_and_validate_attachments
from .utils_ext.function_calling_orchestrator import (
FunctionCallingState,
get_function_calling_orchestrator,
)
from .utils_ext.stream import use_stream_response
from .utils_ext.tokens import calculate_usage_stats
from .utils_ext.usage_tracker import increment_profile_usage
Expand Down Expand Up @@ -157,8 +161,9 @@ async def _prepare_and_validate_request(
req_id: str,
request: ChatCompletionRequest,
check_client_disconnected: Callable,
) -> Tuple[str, List[str]]:
"""Prepare and validate request, return (combined prompt, attachment path list)."""
fc_state: Optional[FunctionCallingState] = None,
) -> Tuple[str, List[str], Optional[List[Dict[str, Any]]]]:
"""Prepare and validate request, return (combined prompt, attachment path list, tool_exec_results)."""
try:
validate_chat_request(request.messages, req_id)
except ValueError as e:
Expand All @@ -169,6 +174,7 @@ async def _prepare_and_validate_request(
req_id,
getattr(request, "tools", None),
getattr(request, "tool_choice", None),
fc_state=fc_state,
)
# Active function execution based on tools/tool_choice (supports per-request MCP endpoints)
try:
Expand Down Expand Up @@ -205,7 +211,7 @@ async def _prepare_and_validate_request(
request, req_id, attachments_list
)

return prepared_prompt, final_attachments
return prepared_prompt, final_attachments, tool_exec_results


async def _handle_response_processing(
Expand Down Expand Up @@ -578,8 +584,22 @@ def playwright_stream_factory(event_to_signal: Event):
await increment_profile_usage(state.current_auth_profile_path, total_tokens)

model_name_for_json = current_ai_studio_model_id or MODEL_NAME
message_payload = {"role": "assistant", "content": consolidated_content}
finish_reason_val = "stop"

# Handle function calls if detected
if response_data.get("has_function_calls"):
from api_utils.utils_ext.function_calling_orchestrator import (
get_function_calling_orchestrator,
)

orchestrator = get_function_calling_orchestrator()
message_payload, finish_reason_val = (
orchestrator.format_function_calls_for_response(
response_data.get("function_calls", []), consolidated_content
)
)
else:
message_payload = {"role": "assistant", "content": consolidated_content}
finish_reason_val = "stop"

response_payload = build_chat_completion_response_json(
req_id,
Expand Down Expand Up @@ -776,10 +796,83 @@ async def _process_request_refactored(
await _handle_model_switching(req_id, context, check_client_disconnected)
await _handle_parameter_cache(req_id, context)

prepared_prompt, attachments_list = await _prepare_and_validate_request(
req_id, request, check_client_disconnected
# --- Native Function Calling Setup (Phase 3) ---
# Configure native function calling if mode is native/auto and tools are present
fc_orchestrator = get_function_calling_orchestrator()
fc_state: Optional[FunctionCallingState] = None

if getattr(request, "tools", None):
try:
fc_state = await fc_orchestrator.prepare_request(
tools=request.tools,
tool_choice=getattr(request, "tool_choice", None),
page_controller=page_controller,
check_client_disconnected=check_client_disconnected,
req_id=req_id,
)
except Exception as fc_err:
logger.warning(
f"[{req_id}] Function calling setup failed: {fc_err}, continuing with emulated mode"
)
# Continue with request - fallback to emulated mode happens in prepare_combined_prompt

(
prepared_prompt,
attachments_list,
tool_exec_results,
) = await _prepare_and_validate_request(
req_id, request, check_client_disconnected, fc_state=fc_state
)

# [TOOL-FORCED] If tool was executed locally (forced), return immediately bypassing AI Studio flow
if tool_exec_results:
logger.info(
f"[{req_id}] Active tool execution detected, returning results immediately."
)
tool_calls_list = []
for res in tool_exec_results:
tool_calls_list.append(
{
"id": f"call_{_random_id()}",
"type": "function",
"function": {
"name": res["name"],
"arguments": res["arguments"],
},
}
)

message_payload = {
"role": "assistant",
"content": None,
"tool_calls": tool_calls_list,
}

usage_stats = calculate_usage_stats(
[msg.model_dump() for msg in request.messages],
"",
"",
)

response_payload = build_chat_completion_response_json(
req_id,
request.model or MODEL_NAME,
message_payload,
"tool_calls",
usage_stats,
seed=request.seed
if hasattr(request, "seed") and request.seed is not None
else 0,
)

if not result_future.done():
result_future.set_result(JSONResponse(content=response_payload))

# Return dummy event for forced tool execution to satisfy type requirement
dummy_event = Event()
dummy_event.set()
return dummy_event, submit_button_locator, check_client_disconnected

request_params = request.model_dump(exclude_none=True)
if "stop" in request.model_fields_set and request.stop is None:
request_params["stop"] = None
Expand Down
Loading