Skip to content
39 changes: 27 additions & 12 deletions src/google/adk/models/lite_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,9 @@ def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
continue
thinking_text = block.get("thinking", "")
signature = block.get("signature", "")
if not thinking_text:
# Anthropic streams a signature in a final chunk with empty text.
# Preserve signature-only blocks so the signature survives aggregation.
if not thinking_text and not signature:
continue
part = types.Part(text=thinking_text, thought=True)
if signature:
Expand Down Expand Up @@ -892,25 +894,33 @@ async def _content_to_message_param(
# For Anthropic models, rebuild thinking_blocks with signatures so that
# thinking is preserved across tool call boundaries. Without this,
# Anthropic silently drops thinking after the first turn.
#
# Streaming splits one Anthropic thinking block across many deltas:
# text-only chunks followed by a signature-only chunk at block_stop.
# Aggregate them back into one thinking block for outbound.
if model and _is_anthropic_model(model) and reasoning_parts:
thinking_blocks = []
combined_text_parts: List[str] = []
block_signature: Optional[str] = None
for part in reasoning_parts:
if part.text and part.thought_signature:
if part.text:
combined_text_parts.append(part.text)
if part.thought_signature:
sig = part.thought_signature
if isinstance(sig, bytes):
sig = sig.decode("utf-8")
thinking_blocks.append({
"type": "thinking",
"thinking": part.text,
"signature": sig,
})
if thinking_blocks:
block_signature = (
sig.decode("utf-8") if isinstance(sig, bytes) else sig
)
combined_text = "".join(combined_text_parts)
if combined_text and block_signature:
msg = ChatCompletionAssistantMessage(
role=role,
content=final_content,
tool_calls=tool_calls or None,
)
msg["thinking_blocks"] = thinking_blocks # type: ignore[typeddict-unknown-key]
msg["thinking_blocks"] = [{ # type: ignore[typeddict-unknown-key]
"type": "thinking",
"thinking": combined_text,
"signature": block_signature,
}]
return msg

reasoning_texts = []
Expand Down Expand Up @@ -1534,6 +1544,11 @@ def _has_meaningful_signal(message: Message | Delta | None) -> bool:
or message.get("function_call")
or message.get("reasoning_content")
or message.get("reasoning")
# Anthropic streams the thinking block's signature in a final delta
# where content/reasoning_content are empty and only thinking_blocks
# carries the signature. Without this, the delta is discarded before
# _extract_reasoning_value can preserve the signature.
or message.get("thinking_blocks")
)

if isinstance(response, ModelResponseStream):
Expand Down
92 changes: 88 additions & 4 deletions tests/unittests/models/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4834,15 +4834,29 @@ def test_convert_reasoning_value_to_parts_skips_redacted_blocks():
assert parts[0].text == "visible"


def test_convert_reasoning_value_to_parts_skips_empty_thinking():
"""Blocks with empty thinking text are excluded."""
def test_convert_reasoning_value_to_parts_preserves_signature_only_blocks():
"""Signature-only blocks (empty text) are preserved for streaming aggregation.

Anthropic emits the block_stop signature as a delta with empty thinking text.
Dropping it would lose the signature, breaking multi-turn thinking continuity.
Blocks with neither text nor signature are still skipped.
"""
thinking_blocks = [
{"type": "thinking", "thinking": "", "signature": "sig1"},
{"type": "thinking", "thinking": "real thought", "signature": "sig2"},
{
"type": "thinking",
"thinking": "",
"signature": "",
}, # fully empty: drop
]
parts = _convert_reasoning_value_to_parts(thinking_blocks)
assert len(parts) == 1
assert parts[0].text == "real thought"
assert len(parts) == 2
assert parts[0].text == ""
assert parts[0].thought is True
assert parts[0].thought_signature == b"sig1"
assert parts[1].text == "real thought"
assert parts[1].thought_signature == b"sig2"


def test_convert_reasoning_value_to_parts_flat_string_unchanged():
Expand Down Expand Up @@ -4954,3 +4968,73 @@ async def test_content_to_message_param_anthropic_no_signature_falls_back():
# Falls back to reasoning_content when no signatures present
assert result.get("reasoning_content") == "thinking without sig"
assert "thinking_blocks" not in result


@pytest.mark.asyncio
async def test_content_to_message_param_anthropic_aggregates_streaming_split_thinking():
"""Streaming splits one Anthropic thinking block across many parts:
text-only chunks followed by a signature-only chunk at block_stop.
_content_to_message_param must re-join them into one thinking_block.
"""
content = types.Content(
role="model",
parts=[
# Text-only chunks from streaming deltas (no signature)
types.Part(text="The user wants ", thought=True),
types.Part(text="GST research ", thought=True),
types.Part(text="on secondment.", thought=True),
# Final signature-only chunk (empty text, signature carries the whole block)
types.Part(
text="", thought=True, thought_signature=b"ErEDClsIDBACGAIfull"
),
# Non-thought response content
types.Part.from_function_call(name="create_plan", args={"q": "test"}),
],
)
result = await _content_to_message_param(
content, model="anthropic/claude-4-sonnet"
)
# One aggregated thinking block with combined text and the block's signature
blocks = result["thinking_blocks"]
assert len(blocks) == 1
assert blocks[0]["type"] == "thinking"
assert blocks[0]["thinking"] == "The user wants GST research on secondment."
assert blocks[0]["signature"] == "ErEDClsIDBACGAIfull"
# Legacy reasoning_content is not set when the Anthropic branch takes
assert result.get("reasoning_content") is None


def test_model_response_to_chunk_preserves_signature_only_delta():
"""Anthropic streams a final thinking delta where content and
reasoning_content are empty but thinking_blocks carries the signature.
_has_meaningful_signal must recognize thinking_blocks as signal so the
signature survives into a ReasoningChunk.
"""
stream = ModelResponseStream(
id="x",
created=0,
model="claude",
choices=[
StreamingChoices(
index=0,
delta=Delta(
role=None,
content="",
reasoning_content="",
thinking_blocks=[{
"type": "thinking",
"thinking": "",
"signature": "SignatureOnlyChunk",
}],
),
)
],
)
chunks = list(_model_response_to_chunk(stream))
reasoning_chunks = [c for c, _ in chunks if isinstance(c, ReasoningChunk)]
assert len(reasoning_chunks) == 1
parts = reasoning_chunks[0].parts
assert len(parts) == 1
assert parts[0].text == ""
assert parts[0].thought is True
assert parts[0].thought_signature == b"SignatureOnlyChunk"
Loading