Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/google/adk/models/google_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@
"""


def _remove_old_thought_signatures(contents: list[types.Content]) -> None:
"""Keeps only the latest thought signature in Gemini request contents."""
latest_signature_seen = False
for content in reversed(contents):
if not content.parts:
continue
for part in reversed(content.parts):
if part.thought_signature is None:
continue
if latest_signature_seen:
part.thought_signature = None
else:
latest_signature_seen = True


class _ResourceExhaustedError(ClientError):
"""Represents a resources exhausted error received from the Model."""

Expand Down Expand Up @@ -195,6 +210,7 @@ async def generate_content_async(
"""
await self._preprocess_request(llm_request)
self._maybe_append_user_content(llm_request)
_remove_old_thought_signatures(llm_request.contents)

# Handle context caching if configured
cache_metadata = None
Expand Down
47 changes: 47 additions & 0 deletions tests/unittests/models/test_google_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,53 @@ async def mock_coro():
mock_client.aio.models.generate_content.assert_called_once()


@pytest.mark.asyncio
async def test_generate_content_async_keeps_only_latest_thought_signature(
gemini_llm, generate_content_response
):
"""Gemini requests keep only the newest thought signature."""

def _function_call_part(name, signature):
return Part(
function_call=types.FunctionCall(name=name, args={}),
thought_signature=signature,
)

old_part = _function_call_part("first_tool", b"old")
newer_part = _function_call_part("second_tool", b"newer")
latest_part = _function_call_part("third_tool", b"latest")
llm_request = LlmRequest(
model="gemini-2.5-flash",
contents=[
Content(role="model", parts=[old_part]),
Content(role="user", parts=[Part.from_text(text="tool result")]),
Content(role="model", parts=[newer_part, latest_part]),
],
)

with mock.patch.object(gemini_llm, "api_client") as mock_client:

async def mock_coro():
return generate_content_response

mock_client.aio.models.generate_content.return_value = mock_coro()

responses = [
resp
async for resp in gemini_llm.generate_content_async(
llm_request, stream=False
)
]

assert len(responses) == 1
request_contents = mock_client.aio.models.generate_content.call_args.kwargs[
"contents"
]
assert request_contents[0].parts[0].thought_signature is None
assert request_contents[2].parts[0].thought_signature is None
assert request_contents[2].parts[1].thought_signature == b"latest"


@pytest.mark.asyncio
async def test_generate_content_async_stream(gemini_llm, llm_request):
with mock.patch.object(gemini_llm, "api_client") as mock_client:
Expand Down