Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ Kevin J. Foley
Kian Eliasi
Kian-Meng Ang
Kim Soo
Kiril Klein
Kodi B. Arfer
Kojo Idrissa
Kostis Anagnostopoulos
Expand Down
3 changes: 3 additions & 0 deletions changelog/8998.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Assertion failures comparing very large strings, lists, or dataclasses no longer hang for a long time (sometimes minutes) while building the diff.

When the inputs are large enough that :func:`difflib.ndiff` would be pathologically slow, pytest now runs it over a bounded prefix of the input instead, so the detailed (character-level) diff is kept for the part shown while the rest is truncated with a note.
5 changes: 5 additions & 0 deletions src/_pytest/assertion/_compare_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from _pytest._io.pprint import PrettyPrinter
from _pytest._io.saferepr import saferepr
from _pytest.assertion._diff import ndiff_too_slow_for_lines
from _pytest.assertion._diff import truncated_ndiff
from _pytest.assertion._typing import _HighlightFunc
from _pytest.compat import running_on_ci

Expand All @@ -27,6 +29,9 @@ def _compare_eq_iterable(

yield ""
yield "Full diff:"
if ndiff_too_slow_for_lines(left_formatting, right_formatting):
yield from truncated_ndiff(left_formatting, right_formatting, highlighter)
return
# "right" is the expected base against which we compare "left",
# see https://github.com/pytest-dev/pytest/issues/3333
yield from highlighter(
Expand Down
94 changes: 94 additions & 0 deletions src/_pytest/assertion/_diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from __future__ import annotations

from collections.abc import Iterator
from collections.abc import Sequence
from itertools import chain

from _pytest.assertion._typing import _HighlightFunc


# Past these limits ``difflib.ndiff`` becomes pathologically slow: its
# character-level "fancy replace" step compares every pair of similar lines in a
# differing block, so its cost grows with the *product* of the line count and
# the character count. A few hundred similar lines can already take seconds, and
# the pretty-printed form of a large list/dataclass takes minutes (see issue
# #8998). The limits below keep ``ndiff`` under roughly a second in the worst
# case. Above them we still run ``ndiff`` -- so the detailed diff is kept -- but
# only over a bounded prefix of the input.
NDIFF_MAX_INPUT_SIZE = 10_000 # characters (left + right)
DIFF_MAX_LINES = 100 # lines (left + right)


def ndiff_too_slow_for_text(left: str, right: str) -> bool:
"""Whether ``ndiff`` would be pathologically slow for these strings.

Counts line separators instead of splitting into lines, so the check stays
cheap even for huge inputs.
"""
if left.count("\n") + right.count("\n") > DIFF_MAX_LINES:
return True
return len(left) + len(right) > NDIFF_MAX_INPUT_SIZE


def ndiff_too_slow_for_lines(
left_lines: Sequence[str], right_lines: Sequence[str]
) -> bool:
"""Whether ``ndiff`` would be pathologically slow for these lines.

Exits as soon as a limit is exceeded instead of measuring the whole input.
"""
if len(left_lines) + len(right_lines) > DIFF_MAX_LINES:
return True
size = 0
for line in chain(left_lines, right_lines):
size += len(line)
if size > NDIFF_MAX_INPUT_SIZE:
return True
return False


def truncated_ndiff(
left_lines: Sequence[str],
right_lines: Sequence[str],
highlighter: _HighlightFunc,
) -> Iterator[str]:
"""Yield an ``ndiff`` over a bounded prefix of the input (issue #8998).

The character-level diff is kept, but only for a slice small enough to
compute quickly; the rest of the input is dropped.
"""
from difflib import ndiff

left = _bounded_prefix(left_lines, DIFF_MAX_LINES // 2, NDIFF_MAX_INPUT_SIZE // 2)
right = _bounded_prefix(right_lines, DIFF_MAX_LINES // 2, NDIFF_MAX_INPUT_SIZE // 2)
yield (
f"Diff too large to show in full (over {NDIFF_MAX_INPUT_SIZE} characters "
f"or {DIFF_MAX_LINES} lines); showing a truncated diff:"
)
Comment on lines +64 to +67

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Message is wrong here, could be either too many line or too many chars.

# "right" is the expected base against which we compare "left",
# see https://github.com/pytest-dev/pytest/issues/3333
yield from highlighter(
"\n".join(line.rstrip("\n") for line in ndiff(right, left)),
lexer="diff",
).splitlines()


def _bounded_prefix(lines: Sequence[str], max_lines: int, max_chars: int) -> list[str]:
"""Return the longest prefix of ``lines`` within both limits.

The line that would cross the character limit is included truncated, so a
single huge line still yields some (bounded) output.
"""
kept: list[str] = []
chars = 0
for line in lines:
if len(kept) >= max_lines:
break
room = max_chars - chars
if len(line) > room:
if room > 0:
kept.append(line[:room])
break
kept.append(line)
chars += len(line)
return kept
7 changes: 7 additions & 0 deletions src/_pytest/assertion/compare_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from collections.abc import Iterator

from _pytest._io.saferepr import saferepr
from _pytest.assertion._diff import ndiff_too_slow_for_text
from _pytest.assertion._diff import truncated_ndiff
from _pytest.assertion._typing import _AssertionTextDiffStyle
from _pytest.assertion._typing import _HighlightFunc
from _pytest.assertion.highlight import dummy_highlighter
Expand Down Expand Up @@ -75,6 +77,11 @@ def _diff_text(
left = repr(str(left))
right = repr(str(right))
yield "Strings contain only whitespace, escaping them using repr()"
if ndiff_too_slow_for_text(left, right):
yield from truncated_ndiff(
left.splitlines(keepends), right.splitlines(keepends), highlighter
)
return
# "right" is the expected base against which we compare "left",
# see https://github.com/pytest-dev/pytest/issues/3333
yield from highlighter(
Expand Down
84 changes: 84 additions & 0 deletions testing/test_assertion.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@

from _pytest import outcomes
import _pytest.assertion as plugin
from _pytest.assertion import _diff
from _pytest.assertion import truncate
from _pytest.assertion import util
from _pytest.assertion._compare_any import _compare_eq_cls
from _pytest.assertion._diff import ndiff_too_slow_for_lines
from _pytest.assertion._diff import ndiff_too_slow_for_text
from _pytest.assertion.compare_text import _compare_eq_text
from _pytest.config import Config as _Config
from _pytest.monkeypatch import MonkeyPatch
Expand Down Expand Up @@ -459,6 +462,45 @@ def callequal(
)


class TestNdiffTooSlow:
"""Heuristic guarding against pathologically slow diffs (#8998)."""

def test_small_input_is_not_too_slow(self) -> None:
assert ndiff_too_slow_for_text("spam", "eggs") is False
assert ndiff_too_slow_for_lines(["spam"], ["eggs"]) is False

def test_too_many_characters(self, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setattr(_diff, "NDIFF_MAX_INPUT_SIZE", 5)
assert ndiff_too_slow_for_text("abc", "abcd") is True
assert ndiff_too_slow_for_lines(["abc"], ["abcd"]) is True

def test_too_many_lines(self, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 3)
assert ndiff_too_slow_for_text("a\nb\nc\nd\ne", "f") is True
assert ndiff_too_slow_for_lines(["a", "b", "c", "d"], ["e"]) is True

def test_bounded_prefix(self) -> None:
# All lines fit within both limits: everything is kept.
assert _diff._bounded_prefix(["a", "b"], max_lines=10, max_chars=100) == [
"a",
"b",
]
# The line limit stops collection.
assert _diff._bounded_prefix(["a", "b", "c"], max_lines=2, max_chars=100) == [
"a",
"b",
]
# The line crossing the character limit is kept truncated.
assert _diff._bounded_prefix(["abc", "defgh"], max_lines=10, max_chars=4) == [
"abc",
"d",
]
# When the character limit is exactly full, the next line is dropped.
assert _diff._bounded_prefix(["abcd", "e"], max_lines=10, max_chars=4) == [
"abcd"
]


class TestAssert_reprcompare:
def test_different_types(self) -> None:
assert callequal([0, 1], "foo") is None
Expand Down Expand Up @@ -513,6 +555,34 @@ def test_text_skipping_verbose(self) -> None:
assert "- " + "a" * 50 + "eggs" in lines
assert "+ " + "a" * 50 + "spam" in lines

def test_text_diff_large_input_is_truncated(self, monkeypatch: MonkeyPatch) -> None:
# Inputs over the character limit show a fast, truncated diff instead
# of the pathologically slow full ndiff (#8998).
monkeypatch.setattr(_diff, "NDIFF_MAX_INPUT_SIZE", 40)
left = "the answer is 41\nand a tail" + "x" * 100
right = "the answer is 42\nand a tail" + "x" * 100
lines = callequal(left, right, verbose=1)
assert lines is not None
assert any("Diff too large to show in full" in line for line in lines)
# ndiff is still used, so the character-level detail is kept.
assert any(line.startswith("? ") for line in lines)

def test_text_diff_many_lines_is_truncated(self, monkeypatch: MonkeyPatch) -> None:
# Inputs over the line limit are diffed over a bounded prefix only, so
# far fewer than all the lines are shown (#8998).
monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 4)
left = "\n".join(f"left line {i}" for i in range(50))
right = "\n".join(f"right line {i}" for i in range(50))
lines = callequal(left, right, verbose=1)
assert lines is not None
assert any("Diff too large to show in full" in line for line in lines)
# The fallback still shows which of the first lines differ.
assert "- right line 0" in lines
assert "+ left line 0" in lines
# Only a bounded prefix is diffed, not all 50 lines.
differing = [line for line in lines if line.startswith(("- ", "+ "))]
assert 0 < len(differing) < 50

def test_multiline_text_diff(self) -> None:
left = "foo\nspam\nbar"
right = "foo\neggs\nbar"
Expand Down Expand Up @@ -673,6 +743,20 @@ def test_iterable_quiet(self) -> None:
"Use -v to get more diff",
]

def test_iterable_large_input_is_truncated(self, monkeypatch: MonkeyPatch) -> None:
# Large iterables show a truncated diff over a bounded prefix of their
# pprint output instead of the pathologically slow full ndiff (#8998).
monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 6)
left = [f"item-{i}" for i in range(50)]
right = [f"other-{i}" for i in range(50)]
lines = callequal(left, right, verbose=1)
assert lines is not None
assert "Full diff:" in lines
assert any("Diff too large to show in full" in line for line in lines)
# Only a bounded prefix is diffed, not all 50+ pprint lines.
differing = [line for line in lines if line.startswith(("- ", "+ "))]
assert 0 < len(differing) < 50

def test_iterable_full_diff_ci(
self, monkeypatch: MonkeyPatch, pytester: Pytester
) -> None:
Expand Down
Loading