From dee2e387ea109078569e109a8dd423dd80fc7489 Mon Sep 17 00:00:00 2001 From: Ahmed Sadiki Date: Wed, 24 Jun 2026 13:52:56 +0100 Subject: [PATCH 1/2] fix(tools): return structured JSON error data from tool exceptions Replace generic "Error executing tool: {e}" strings with structured JSON containing exception type, message, and retryability hint. This gives agents the information needed to decide whether to retry, fix their input, or skip the tool entirely. Closes #6262 Co-Authored-By: Claude Opus 4.6 --- .../src/crewai/agents/crew_agent_executor.py | 3 +- .../src/crewai/experimental/agent_executor.py | 10 +- lib/crewai/src/crewai/llm.py | 6 +- .../src/crewai/utilities/agent_utils.py | 3 +- .../src/crewai/utilities/tool_errors.py | 29 +++++ lib/crewai/tests/test_tool_errors.py | 121 ++++++++++++++++++ 6 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 lib/crewai/src/crewai/utilities/tool_errors.py create mode 100644 lib/crewai/tests/test_tool_errors.py diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index de2315e3a9..66a17915d4 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -80,6 +80,7 @@ aexecute_tool_and_check_finality, execute_tool_and_check_finality, ) +from crewai.utilities.tool_errors import format_tool_error from crewai.utilities.training_handler import CrewTrainingHandler @@ -1006,7 +1007,7 @@ def _execute_single_native_tool_call( result = format_native_tool_output_for_agent(output_tool, raw_result) except Exception as e: - result = f"Error executing tool: {e}" + result = format_tool_error(e) raw_tool_result = result if self.task: self.task.increment_tools_errors() diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index 303330dc6a..3111283eda 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -108,6 +108,7 @@ ) from crewai.utilities.step_execution_context import StepExecutionContext, StepResult from crewai.utilities.string_utils import sanitize_tool_name +from crewai.utilities.tool_errors import format_tool_error from crewai.utilities.tool_utils import execute_tool_and_check_finality from crewai.utilities.training_handler import CrewTrainingHandler from crewai.utilities.types import LLMMessage @@ -1615,9 +1616,10 @@ def execute_tool_action(self) -> Literal["tool_completed", "tool_result_is_final if self.task: self.task.increment_tools_errors() - error_observation = f"\nObservation: Error executing tool: {e}" + structured_error = format_tool_error(e) + error_observation = f"\nObservation: {structured_error}" action.text += error_observation - action.result = str(e) + action.result = structured_error self._append_message_to_state(action.text) reasoning_prompt = I18N_DEFAULT.slice("post_tool_reasoning") @@ -1736,7 +1738,7 @@ def execute_native_tool( ordered_results[idx] = { "call_id": call_id, "func_name": func_name, - "result": f"Error executing tool: {e}", + "result": format_tool_error(e), "from_cache": False, "original_tool": None, } @@ -1999,7 +2001,7 @@ def _execute_single_native_tool_call(self, tool_call: Any) -> dict[str, Any]: output_tool, raw_result ) except Exception as e: - result = f"Error executing tool: {e}" + result = format_tool_error(e) raw_tool_result = result if self.task: self.task.increment_tools_errors() diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 153bbd2d73..3af96fde82 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -52,6 +52,7 @@ from crewai.utilities.logger_utils import suppress_warnings from crewai.utilities.string_utils import sanitize_tool_name from crewai.utilities.token_counter_callback import TokenCalcHandler +from crewai.utilities.tool_errors import format_tool_error try: @@ -1755,11 +1756,12 @@ def _handle_tool_call( return result except Exception as e: fn = available_functions.get(function_name, lambda: None) + structured_error = format_tool_error(e) logging.error(f"Error executing function '{function_name}': {e}") crewai_event_bus.emit( self, event=LLMCallFailedEvent( - error=f"Tool execution error: {e!s}", + error=structured_error, from_task=from_task, from_agent=from_agent, call_id=get_current_call_id(), @@ -1770,7 +1772,7 @@ def _handle_tool_call( event=ToolUsageErrorEvent( tool_name=function_name, tool_args=function_args, - error=f"Tool execution error: {e!s}", + error=structured_error, from_task=from_task, from_agent=from_agent, ), diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py index e933a38a80..cba9ca157a 100644 --- a/lib/crewai/src/crewai/utilities/agent_utils.py +++ b/lib/crewai/src/crewai/utilities/agent_utils.py @@ -37,6 +37,7 @@ from crewai.utilities.pydantic_schema_utils import generate_model_description from crewai.utilities.string_utils import sanitize_tool_name from crewai.utilities.token_counter_callback import TokenCalcHandler +from crewai.utilities.tool_errors import format_tool_error from crewai.utilities.types import LLMMessage @@ -1546,7 +1547,7 @@ def execute_single_native_tool_call( result = format_native_tool_output_for_agent(output_tool, raw_result) except Exception as e: - result = f"Error executing tool: {e}" + result = format_tool_error(e) raw_tool_result = result if task: task.increment_tools_errors() diff --git a/lib/crewai/src/crewai/utilities/tool_errors.py b/lib/crewai/src/crewai/utilities/tool_errors.py new file mode 100644 index 0000000000..2c8ac3230b --- /dev/null +++ b/lib/crewai/src/crewai/utilities/tool_errors.py @@ -0,0 +1,29 @@ +"""Structured tool error formatting for agent consumption. + +When a tool raises an exception, the agent needs structured information +to decide whether to retry, fix its input, or skip the tool entirely. +This module provides a consistent error format across all executors. +""" + +import json +import traceback + +RETRYABLE_EXCEPTIONS = (TimeoutError, ConnectionError, OSError) + + +def format_tool_error(exception: Exception, include_traceback: bool = False) -> str: + """Format a tool execution error as structured JSON for the agent. + + Returns a string with the "Error executing tool:" prefix (for backward + compatibility with existing parsing) followed by a JSON object containing + the exception type, message, and retryability hint. + """ + error_data = { + "error": True, + "type": type(exception).__name__, + "message": str(exception), + "retryable": isinstance(exception, RETRYABLE_EXCEPTIONS), + } + if include_traceback: + error_data["traceback"] = traceback.format_exc(limit=3) + return f"Error executing tool: {json.dumps(error_data)}" diff --git a/lib/crewai/tests/test_tool_errors.py b/lib/crewai/tests/test_tool_errors.py new file mode 100644 index 0000000000..45db28698b --- /dev/null +++ b/lib/crewai/tests/test_tool_errors.py @@ -0,0 +1,121 @@ +"""Tests for structured tool error formatting.""" + +import json + +import pytest + +from crewai.utilities.tool_errors import RETRYABLE_EXCEPTIONS, format_tool_error + + +class TestFormatToolError: + """Tests for the format_tool_error utility function.""" + + def test_returns_string_with_prefix(self): + err = ValueError("bad input") + result = format_tool_error(err) + assert result.startswith("Error executing tool: ") + + def test_contains_valid_json_after_prefix(self): + err = ValueError("bad input") + result = format_tool_error(err) + json_str = result[len("Error executing tool: "):] + parsed = json.loads(json_str) + assert isinstance(parsed, dict) + + def test_error_flag_is_true(self): + err = RuntimeError("something broke") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["error"] is True + + def test_preserves_exception_type(self): + err = KeyError("missing_key") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["type"] == "KeyError" + + def test_preserves_exception_message(self): + err = ValueError("count must be positive") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["message"] == "count must be positive" + + def test_retryable_true_for_timeout(self): + err = TimeoutError("connection timed out") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["retryable"] is True + + def test_retryable_true_for_connection_error(self): + err = ConnectionError("refused") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["retryable"] is True + + def test_retryable_true_for_os_error(self): + err = OSError("disk full") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["retryable"] is True + + def test_retryable_false_for_value_error(self): + err = ValueError("invalid") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["retryable"] is False + + def test_retryable_false_for_type_error(self): + err = TypeError("wrong type") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["retryable"] is False + + def test_retryable_false_for_key_error(self): + err = KeyError("not found") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["retryable"] is False + + def test_no_traceback_by_default(self): + err = ValueError("test") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert "traceback" not in parsed + + def test_traceback_included_when_requested(self): + try: + raise ValueError("deliberate error") + except ValueError as e: + result = format_tool_error(e, include_traceback=True) + parsed = json.loads(result[len("Error executing tool: "):]) + assert "traceback" in parsed + assert "ValueError" in parsed["traceback"] + + def test_handles_exception_with_special_characters(self): + err = ValueError('path "C:\\Users\\test" not found') + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert 'C:\\Users\\test' in parsed["message"] + + def test_handles_exception_with_empty_message(self): + err = RuntimeError() + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["type"] == "RuntimeError" + assert parsed["message"] == "" + + def test_handles_custom_exception(self): + class MyToolError(Exception): + pass + + err = MyToolError("custom failure") + result = format_tool_error(err) + parsed = json.loads(result[len("Error executing tool: "):]) + assert parsed["type"] == "MyToolError" + assert parsed["message"] == "custom failure" + assert parsed["retryable"] is False + + def test_retryable_exceptions_tuple_contains_expected_types(self): + assert TimeoutError in RETRYABLE_EXCEPTIONS + assert ConnectionError in RETRYABLE_EXCEPTIONS + assert OSError in RETRYABLE_EXCEPTIONS From 37fc0a663efcbfddecb637668073ab81f730a4ae Mon Sep 17 00:00:00 2001 From: Ahmed Sadiki Date: Wed, 24 Jun 2026 14:06:10 +0100 Subject: [PATCH 2/2] feat(llm): add native Groq provider support and fix cache_breakpoint for non-Anthropic Adds Groq as a natively supported OpenAI-compatible provider, routing groq/* models directly to https://api.groq.com/openai/v1 without requiring the heavier LiteLLM fallback. Also fixes the cache_breakpoint bug (#5886) where the Anthropic-specific cache_breakpoint key was sent to non-Anthropic providers, causing BadRequestError on Groq, OpenAI-compatible, and other providers. Closes #6286 Fixes #5886 Co-Authored-By: Claude Opus 4.6 --- lib/crewai/src/crewai/llm.py | 20 ++++ .../providers/openai_compatible/completion.py | 6 ++ lib/crewai/tests/llms/test_groq_provider.py | 98 +++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 lib/crewai/tests/llms/test_groq_provider.py diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 3af96fde82..29e34d4f12 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -343,6 +343,7 @@ def _ensure_litellm() -> bool: "cerebras", "dashscope", "snowflake", + "groq", ] @@ -432,6 +433,7 @@ def __new__(cls, model: str, is_litellm: bool = False, **kwargs: Any) -> LLM: "cerebras": "cerebras", "dashscope": "dashscope", "snowflake": "snowflake", + "groq": "groq", } canonical_provider = provider_mapping.get(prefix.lower()) @@ -554,6 +556,12 @@ def _matches_provider_pattern(cls, model: str, provider: str) -> bool: if provider == "snowflake": return True + if provider == "groq": + return any( + model_lower.startswith(prefix) + for prefix in ["llama", "gemma", "mixtral", "whisper", "deepseek"] + ) + return False @classmethod @@ -665,6 +673,7 @@ def _get_native_provider(cls, provider: str) -> type | None: "hosted_vllm", "cerebras", "dashscope", + "groq", } if provider in openai_compatible_providers: from crewai.llms.providers.openai_compatible.completion import ( @@ -2288,6 +2297,17 @@ def _format_messages_for_provider( "Invalid message format. Each message must be a dict with 'role' and 'content' keys" ) + # Strip cache_breakpoint from messages for non-Anthropic providers. + # This key is only meaningful for Anthropic's prompt caching API; + # other providers (Groq, OpenAI-compatible) reject unknown fields. + if not self.is_anthropic: + from crewai.llms.cache import CACHE_BREAKPOINT_KEY + + messages = [ + {k: v for k, v in msg.items() if k != CACHE_BREAKPOINT_KEY} + for msg in messages + ] # type: ignore[assignment] + if "o1" in self.model.lower(): formatted_messages = [] for msg in messages: diff --git a/lib/crewai/src/crewai/llms/providers/openai_compatible/completion.py b/lib/crewai/src/crewai/llms/providers/openai_compatible/completion.py index da4cfd03db..0fd64b14f6 100644 --- a/lib/crewai/src/crewai/llms/providers/openai_compatible/completion.py +++ b/lib/crewai/src/crewai/llms/providers/openai_compatible/completion.py @@ -89,6 +89,12 @@ class ProviderConfig: base_url_env="DASHSCOPE_BASE_URL", api_key_required=True, ), + "groq": ProviderConfig( + base_url="https://api.groq.com/openai/v1", + api_key_env="GROQ_API_KEY", + base_url_env="GROQ_BASE_URL", + api_key_required=True, + ), } diff --git a/lib/crewai/tests/llms/test_groq_provider.py b/lib/crewai/tests/llms/test_groq_provider.py new file mode 100644 index 0000000000..9df42691d0 --- /dev/null +++ b/lib/crewai/tests/llms/test_groq_provider.py @@ -0,0 +1,98 @@ +"""Tests for native Groq provider support and cache_breakpoint stripping.""" + +import pytest + + +class TestGroqNativeRouting: + """Test that Groq models route to the native OpenAI-compatible provider.""" + + def test_groq_in_supported_providers(self): + from crewai.llm import SUPPORTED_NATIVE_PROVIDERS + + assert "groq" in SUPPORTED_NATIVE_PROVIDERS + + def test_groq_provider_config_exists(self): + from crewai.llms.providers.openai_compatible.completion import ( + OPENAI_COMPATIBLE_PROVIDERS, + ) + + assert "groq" in OPENAI_COMPATIBLE_PROVIDERS + config = OPENAI_COMPATIBLE_PROVIDERS["groq"] + assert config.base_url == "https://api.groq.com/openai/v1" + assert config.api_key_env == "GROQ_API_KEY" + assert config.api_key_required is True + + def test_groq_model_pattern_matching(self): + from crewai.llm import LLM + + assert LLM._matches_provider_pattern("llama-3.3-70b-versatile", "groq") is True + assert LLM._matches_provider_pattern("mixtral-8x7b-32768", "groq") is True + assert LLM._matches_provider_pattern("gemma-7b-it", "groq") is True + assert LLM._matches_provider_pattern("whisper-large-v3", "groq") is True + assert ( + LLM._matches_provider_pattern("deepseek-r1-distill-llama-70b", "groq") + is True + ) + assert LLM._matches_provider_pattern("gpt-4o", "groq") is False + + def test_groq_routes_to_openai_compatible(self): + from crewai.llm import LLM + from crewai.llms.providers.openai_compatible.completion import ( + OpenAICompatibleCompletion, + ) + + provider_class = LLM._get_native_provider("groq") + assert provider_class is OpenAICompatibleCompletion + + +class TestCacheBreakpointStripping: + """Test that cache_breakpoint is stripped for non-Anthropic providers.""" + + def test_strip_cache_breakpoint_for_non_anthropic(self): + from crewai.llm import LLM + + llm = LLM.__new__(LLM, model="groq/llama-3.3-70b-versatile") + llm.model = "groq/llama-3.3-70b-versatile" + llm.is_anthropic = False + + messages = [ + {"role": "system", "content": "You are helpful.", "cache_breakpoint": True}, + {"role": "user", "content": "Hello", "cache_breakpoint": True}, + ] + + result = llm._format_messages_for_provider(messages) + + for msg in result: + assert "cache_breakpoint" not in msg + + def test_preserve_cache_breakpoint_for_anthropic(self): + from crewai.llm import LLM + + llm = LLM.__new__(LLM, model="anthropic/claude-sonnet-4-20250514") + llm.model = "anthropic/claude-sonnet-4-20250514" + llm.is_anthropic = True + + messages = [ + {"role": "user", "content": "Hello", "cache_breakpoint": True}, + ] + + result = llm._format_messages_for_provider(messages) + + assert result[0].get("cache_breakpoint") is True + + def test_strip_does_not_remove_role_or_content(self): + from crewai.llm import LLM + + llm = LLM.__new__(LLM, model="groq/llama-3.3-70b-versatile") + llm.model = "groq/llama-3.3-70b-versatile" + llm.is_anthropic = False + + messages = [ + {"role": "user", "content": "Test message", "cache_breakpoint": True}, + ] + + result = llm._format_messages_for_provider(messages) + + assert result[0]["role"] == "user" + assert result[0]["content"] == "Test message" + assert "cache_breakpoint" not in result[0]