diff --git a/strands-py/src/strands/event_loop/event_loop.py b/strands-py/src/strands/event_loop/event_loop.py index 5d28879c7d..c5612b0df6 100644 --- a/strands-py/src/strands/event_loop/event_loop.py +++ b/strands-py/src/strands/event_loop/event_loop.py @@ -304,16 +304,11 @@ async def event_loop_cycle( try: if stop_reason == "max_tokens": - """ - Handle max_tokens limit reached by the model. - - When the model reaches its maximum token limit, this represents a potentially unrecoverable - state where the model's response was truncated. By default, Strands fails hard with an - MaxTokensReachedException to maintain consistency with other failure types. - """ raise MaxTokensReachedException( message=( - "Agent has reached an unrecoverable state due to max_tokens limit. " + "Model stopped generating due to maximum token limit. " + "The partial message has been added to the conversation history. " + "You can continue by calling the agent again. " "For more information see: " "https://strandsagents.com/docs/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception" ) diff --git a/strands-py/src/strands/types/exceptions.py b/strands-py/src/strands/types/exceptions.py index 7d621191d7..c4dc4655b5 100644 --- a/strands-py/src/strands/types/exceptions.py +++ b/strands-py/src/strands/types/exceptions.py @@ -22,12 +22,15 @@ class MaxTokensReachedException(Exception): """Exception raised when the model reaches its maximum token generation limit. This exception is raised when the model stops generating tokens because it has reached the maximum number of - tokens allowed for output generation. This can occur when the model's max_tokens parameter is set too low for - the complexity of the response, or when the model naturally reaches its configured output limit during generation. + tokens allowed for output generation. The partial message is automatically added to agent.messages and you can + continue the conversation by calling the agent again. + + This can occur when the model's max_tokens parameter is set too low for the complexity of the response, or when + the model naturally reaches its configured output limit during generation. """ def __init__(self, message: str): - """Initialize the exception with an error message and the incomplete message object. + """Initialize the exception with an error message. Args: message: The error message describing the token limit issue diff --git a/strands-py/tests/strands/event_loop/test_event_loop.py b/strands-py/tests/strands/event_loop/test_event_loop.py index 48efbc6d11..21fb445e71 100644 --- a/strands-py/tests/strands/event_loop/test_event_loop.py +++ b/strands-py/tests/strands/event_loop/test_event_loop.py @@ -635,7 +635,9 @@ async def test_event_loop_cycle_max_tokens_exception( # Call event_loop_cycle, expecting it to raise MaxTokensReachedException expected_message = ( - "Agent has reached an unrecoverable state due to max_tokens limit. " + "Model stopped generating due to maximum token limit. " + "The partial message has been added to the conversation history. " + "You can continue by calling the agent again. " "For more information see: " "https://strandsagents.com/docs/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception" )