diff --git a/contributing/samples/gepa/experiment.py b/contributing/samples/gepa/experiment.py
index f3751206a8..2710c3894c 100644
--- a/contributing/samples/gepa/experiment.py
+++ b/contributing/samples/gepa/experiment.py
@@ -43,7 +43,6 @@
 from tau_bench.types import EnvRunResult
 from tau_bench.types import RunConfig
 import tau_bench_agent as tau_bench_agent_lib
-
 import utils
 
 
diff --git a/contributing/samples/gepa/run_experiment.py b/contributing/samples/gepa/run_experiment.py
index d857da9635..e31db15788 100644
--- a/contributing/samples/gepa/run_experiment.py
+++ b/contributing/samples/gepa/run_experiment.py
@@ -25,7 +25,6 @@
 from absl import flags
 import experiment
 from google.genai import types
-
 import utils
 
 _OUTPUT_DIR = flags.DEFINE_string(
diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py
index f8fb6795aa..fafd1d28c6 100644
--- a/src/google/adk/evaluation/evaluation_generator.py
+++ b/src/google/adk/evaluation/evaluation_generator.py
@@ -280,7 +280,7 @@ def convert_events_to_eval_invocations(
     invocations = []
     for invocation_id, events in events_by_invocation_id.items():
       final_response = None
-      final_event = None
+      final_event: Optional[Event] = None
       user_content = Content(parts=[])
       invocation_timestamp = 0
       app_details = None
@@ -315,7 +315,9 @@ def convert_events_to_eval_invocations(
       invocation_events = [
           InvocationEvent(author=e.author, content=e.content)
           for e in events_to_add
-          if e is not final_event
+          if final_event is None
+          or e is not final_event
+          or e.get_function_calls()
       ]
       invocations.append(
           Invocation(
diff --git a/src/google/adk/flows/llm_flows/functions.py b/src/google/adk/flows/llm_flows/functions.py
index eda8474c01..f9393c4986 100644
--- a/src/google/adk/flows/llm_flows/functions.py
+++ b/src/google/adk/flows/llm_flows/functions.py
@@ -68,6 +68,13 @@
 _TOOL_THREAD_POOLS: dict[int, ThreadPoolExecutor] = {}
 _TOOL_THREAD_POOL_LOCK = threading.Lock()
 
+# Sentinel object used to distinguish a FunctionTool that legitimately returns
+# None from a non-FunctionTool sync tool that skips thread-pool execution.
+# Using None as a sentinel would cause tools whose underlying function has no
+# explicit return statement (implicit None) to fall through to the async
+# fallback path and execute a second time.
+_SYNC_TOOL_RESULT_UNSET = object()
+
 
 def _is_live_request_queue_annotation(param: inspect.Parameter) -> bool:
   """Check whether a parameter is annotated as LiveRequestQueue.
@@ -159,13 +166,14 @@ def run_sync_tool():
         }
         return tool.func(**args_to_call)
       else:
-        # For other sync tool types, we can't easily run them in thread pool
-        return None
+        # For other sync tool types, we can't easily run them in thread pool.
+        # Return the sentinel so the caller knows to fall back to run_async.
+        return _SYNC_TOOL_RESULT_UNSET
 
     result = await loop.run_in_executor(
         executor, lambda: ctx.run(run_sync_tool)
     )
-    if result is not None:
+    if result is not _SYNC_TOOL_RESULT_UNSET:
       return result
   else:
     # For async tools, run them in a new event loop in a background thread.
diff --git a/tests/unittests/evaluation/test_evaluation_generator.py b/tests/unittests/evaluation/test_evaluation_generator.py
index a4aa8691fd..122d3b7c97 100644
--- a/tests/unittests/evaluation/test_evaluation_generator.py
+++ b/tests/unittests/evaluation/test_evaluation_generator.py
@@ -16,12 +16,14 @@
 
 from google.adk.evaluation.app_details import AgentDetails
 from google.adk.evaluation.app_details import AppDetails
+from google.adk.evaluation.eval_case import get_all_tool_calls
 from google.adk.evaluation.evaluation_generator import EvaluationGenerator
 from google.adk.evaluation.request_intercepter_plugin import _RequestIntercepterPlugin
 from google.adk.evaluation.simulation.user_simulator import NextUserMessage
 from google.adk.evaluation.simulation.user_simulator import Status as UserSimulatorStatus
 from google.adk.evaluation.simulation.user_simulator import UserSimulator
 from google.adk.events.event import Event
+from google.adk.events.event_actions import EventActions
 from google.adk.models.llm_request import LlmRequest
 from google.genai import types
 import pytest
@@ -479,3 +481,48 @@ async def mock_generate_inferences_side_effect(
     mock_generate_inferences.assert_called_once()
     called_with_content = mock_generate_inferences.call_args.args[3]
     assert called_with_content.parts[0].text == "message 1"
+
+
+def test_convert_events_preserves_tool_calls_when_skip_summarization():
+  """Regression test for #5410.
+
+  When an event has skip_summarization=True, is_final_response() returns True
+  even if the event contains function calls.  Previously such an event was
+  treated as final_event and excluded from invocation_events, causing
+  get_all_tool_calls() to return an empty list and tool_trajectory_avg_score
+  to always be 0.0 despite matching tool calls.
+  """
+  events = [
+      Event(
+          invocation_id="inv1",
+          author="user",
+          content=types.Content(
+              parts=[types.Part(text="run a query")], role="user"
+          ),
+          timestamp=1000.0,
+      ),
+      Event(
+          invocation_id="inv1",
+          author="agent",
+          content=types.Content(
+              parts=[
+                  types.Part(
+                      function_call=types.FunctionCall(
+                          id="call_01",
+                          name="execute_sql",
+                          args={"project_id": "my-proj", "query": "SELECT 1"},
+                      )
+                  )
+              ]
+          ),
+          actions=EventActions(skip_summarization=True),
+      ),
+  ]
+
+  invocations = EvaluationGenerator.convert_events_to_eval_invocations(events)
+  assert len(invocations) == 1
+
+  tool_calls = get_all_tool_calls(invocations[0].intermediate_data)
+  assert len(tool_calls) == 1
+  assert tool_calls[0].name == "execute_sql"
+  assert tool_calls[0].args == {"project_id": "my-proj", "query": "SELECT 1"}
diff --git a/tests/unittests/evaluation/test_trajectory_evaluator.py b/tests/unittests/evaluation/test_trajectory_evaluator.py
index 0fa3fa5a73..8a3dae02a7 100644
--- a/tests/unittests/evaluation/test_trajectory_evaluator.py
+++ b/tests/unittests/evaluation/test_trajectory_evaluator.py
@@ -16,6 +16,8 @@
 
 from google.adk.evaluation.eval_case import IntermediateData
 from google.adk.evaluation.eval_case import Invocation
+from google.adk.evaluation.eval_case import InvocationEvent
+from google.adk.evaluation.eval_case import InvocationEvents
 from google.adk.evaluation.eval_metrics import EvalMetric
 from google.adk.evaluation.eval_metrics import PrebuiltMetrics
 from google.adk.evaluation.eval_metrics import ToolTrajectoryCriterion
@@ -462,3 +464,62 @@ def test_evaluate_invocations_no_invocations(evaluator: TrajectoryEvaluator):
   assert result.overall_score is None
   assert result.overall_eval_status == EvalStatus.NOT_EVALUATED
   assert not result.per_invocation_results
+
+
+def _make_invocation_events(
+    *tool_calls: genai_types.FunctionCall,
+) -> Invocation:
+  """Returns an Invocation using InvocationEvents intermediate_data format."""
+  return Invocation(
+      user_content=_USER_CONTENT,
+      intermediate_data=InvocationEvents(
+          invocation_events=[
+              InvocationEvent(
+                  author="agent",
+                  content=genai_types.Content(
+                      parts=[genai_types.Part(function_call=tc)]
+                  ),
+              )
+              for tc in tool_calls
+          ]
+      ),
+  )
+
+
+def test_evaluate_invocations_invocation_events_format_exact_match(
+    evaluator: TrajectoryEvaluator,
+):
+  """InvocationEvents intermediate_data format should score 1.0 on exact match.
+
+  Regression test for #5410: tool_trajectory_avg_score returned 0.0 even when
+  tool name and args were identical because function-call events with
+  skip_summarization=True were incorrectly excluded from invocation_events.
+  """
+  tool_call = genai_types.FunctionCall(
+      id="toolu_01", name="execute_sql", args={"query": "SELECT 1"}
+  )
+  expected_tool_call = genai_types.FunctionCall(
+      name="execute_sql", args={"query": "SELECT 1"}
+  )
+  actual = _make_invocation_events(tool_call)
+  expected = _make_invocation_events(expected_tool_call)
+
+  result = evaluator.evaluate_invocations([actual], [expected])
+  assert result.overall_score == 1.0
+  assert result.overall_eval_status == EvalStatus.PASSED
+
+
+def test_evaluate_invocations_invocation_events_format_mismatch(
+    evaluator: TrajectoryEvaluator,
+):
+  """InvocationEvents format should score 0.0 when tool calls differ."""
+  actual = _make_invocation_events(
+      genai_types.FunctionCall(name="tool_a", args={"x": "1"})
+  )
+  expected = _make_invocation_events(
+      genai_types.FunctionCall(name="tool_b", args={"x": "1"})
+  )
+
+  result = evaluator.evaluate_invocations([actual], [expected])
+  assert result.overall_score == 0.0
+  assert result.overall_eval_status == EvalStatus.FAILED
diff --git a/tests/unittests/flows/llm_flows/test_functions_thread_pool.py b/tests/unittests/flows/llm_flows/test_functions_thread_pool.py
index e134a555c1..f129d7c3c3 100644
--- a/tests/unittests/flows/llm_flows/test_functions_thread_pool.py
+++ b/tests/unittests/flows/llm_flows/test_functions_thread_pool.py
@@ -277,6 +277,56 @@ def blocking_sleep() -> dict:
         event_loop_ticks >= 5
     ), f'Event loop should have ticked at least 5 times, got {event_loop_ticks}'
 
+  @pytest.mark.asyncio
+  @pytest.mark.parametrize(
+      'return_value,use_implicit_return',
+      [
+          (None, True),  # implicit None (no return statement)
+          (None, False),  # explicit `return None`
+          (0, False),  # falsy int
+          ('', False),  # falsy str
+          ({}, False),  # falsy dict
+          (False, False),  # falsy bool
+      ],
+  )
+  async def test_sync_tool_falsy_return_executes_exactly_once(
+      self, return_value, use_implicit_return
+  ):
+    """FunctionTools returning None or other falsy values must execute exactly once.
+
+    Regression test for https://github.com/google/adk-python/issues/5284.
+    Previously, a None return was mistaken for the internal sentinel used to
+    signal 'non-FunctionTool, fall back to run_async', causing a second
+    invocation. The fix uses an identity-based sentinel so that None and other
+    falsy values (0, '', {}, False) are treated as valid results.
+    """
+    call_count = 0
+
+    def sync_func():
+      nonlocal call_count
+      call_count += 1
+      if not use_implicit_return:
+        return return_value
+      # implicit None — no return statement
+
+    tool = FunctionTool(sync_func)
+    model = testing_utils.MockModel.create(responses=[])
+    agent = Agent(name='test_agent', model=model, tools=[tool])
+    invocation_context = await testing_utils.create_invocation_context(
+        agent=agent, user_content=''
+    )
+    tool_context = ToolContext(
+        invocation_context=invocation_context,
+        function_call_id='test_id',
+    )
+
+    result = await _call_tool_in_thread_pool(tool, {}, tool_context)
+
+    assert result == return_value
+    assert (
+        call_count == 1
+    ), f'Tool function executed {call_count} time(s); expected exactly 1.'
+
   @pytest.mark.asyncio
   async def test_sync_tool_exception_propagates(self):
     """Test that exceptions from sync tools propagate correctly."""