diff --git a/strands-py/src/strands/event_loop/event_loop.py b/strands-py/src/strands/event_loop/event_loop.py
index 5d28879c7d..c5612b0df6 100644
--- a/strands-py/src/strands/event_loop/event_loop.py
+++ b/strands-py/src/strands/event_loop/event_loop.py
@@ -304,16 +304,11 @@ async def event_loop_cycle(
 
         try:
             if stop_reason == "max_tokens":
-                """
-                Handle max_tokens limit reached by the model.
-
-                When the model reaches its maximum token limit, this represents a potentially unrecoverable
-                state where the model's response was truncated. By default, Strands fails hard with an
-                MaxTokensReachedException to maintain consistency with other failure types.
-                """
                 raise MaxTokensReachedException(
                     message=(
-                        "Agent has reached an unrecoverable state due to max_tokens limit. "
+                        "Model stopped generating due to maximum token limit. "
+                        "The partial message has been added to the conversation history. "
+                        "You can continue by calling the agent again. "
                         "For more information see: "
                         "https://strandsagents.com/docs/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
                     )
diff --git a/strands-py/src/strands/types/exceptions.py b/strands-py/src/strands/types/exceptions.py
index 7d621191d7..c4dc4655b5 100644
--- a/strands-py/src/strands/types/exceptions.py
+++ b/strands-py/src/strands/types/exceptions.py
@@ -22,12 +22,15 @@ class MaxTokensReachedException(Exception):
     """Exception raised when the model reaches its maximum token generation limit.
 
     This exception is raised when the model stops generating tokens because it has reached the maximum number of
-    tokens allowed for output generation. This can occur when the model's max_tokens parameter is set too low for
-    the complexity of the response, or when the model naturally reaches its configured output limit during generation.
+    tokens allowed for output generation. The partial message is automatically added to agent.messages and you can
+    continue the conversation by calling the agent again.
+
+    This can occur when the model's max_tokens parameter is set too low for the complexity of the response, or when
+    the model naturally reaches its configured output limit during generation.
     """
 
     def __init__(self, message: str):
-        """Initialize the exception with an error message and the incomplete message object.
+        """Initialize the exception with an error message.
 
         Args:
             message: The error message describing the token limit issue
diff --git a/strands-py/tests/strands/event_loop/test_event_loop.py b/strands-py/tests/strands/event_loop/test_event_loop.py
index 48efbc6d11..21fb445e71 100644
--- a/strands-py/tests/strands/event_loop/test_event_loop.py
+++ b/strands-py/tests/strands/event_loop/test_event_loop.py
@@ -635,7 +635,9 @@ async def test_event_loop_cycle_max_tokens_exception(
 
     # Call event_loop_cycle, expecting it to raise MaxTokensReachedException
     expected_message = (
-        "Agent has reached an unrecoverable state due to max_tokens limit. "
+        "Model stopped generating due to maximum token limit. "
+        "The partial message has been added to the conversation history. "
+        "You can continue by calling the agent again. "
         "For more information see: "
         "https://strandsagents.com/docs/user-guide/concepts/agents/agent-loop/#maxtokensreachedexception"
     )