Coverage for jsonschema_diff/color/stages/replace.py: 99%
69 statements
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-25 07:00 +0000
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-25 07:00 +0000
1from __future__ import annotations
3"""
4Token-level diff high-lighter
5=============================
7A Rich-native replacement for the original ``ReplaceGenericHighlighter`` that
8marks *token-by-token* differences inside a ``OLD -> NEW`` tail. It operates
9directly on :class:`rich.text.Text` so you can embed the result in Rich tables
10or live dashboards without ANSI parsing.
12Detection strategy
13------------------
14#) Split *OLD* and *NEW* into tokens (numbers, words, spaces, punctuation).
15#) Run :class:`difflib.SequenceMatcher` to classify *replace*, *delete*,
16 *insert* spans.
17#) Apply background colour ± underline only to the differing tokens.
19Everything left of the first ``:`` is treated as an opaque *head*.
20"""
21import difflib
22import re
23from typing import List, Optional, Tuple
25from rich.style import Style
26from rich.text import Text
28from ..abstraction import LineHighlighter
31class ReplaceGenericHighlighter(LineHighlighter):
32 """Highlight token differences in ``OLD -> NEW`` tails.
34 Parameters
35 ----------
36 bg_color :
37 Background colour used to mark differing spans.
38 arrow_color :
39 Optional foreground colour for the ``->`` arrow.
40 case_sensitive :
41 Compare tokens case-sensitively when *True* (default).
42 underline_changes :
43 Underline differing spans in addition to background colour.
44 """
46 # -- regex patterns & helpers -------------------------------------
47 _TAIL_PATTERN = re.compile(
48 r"(?P<left_ws>\s*)" # leading spaces
49 r"(?P<old>.*?)" # OLD
50 r"(?P<between_ws>\s*)"
51 r"(?P<arrow>->)"
52 r"(?P<right_ws>\s*)"
53 r"(?P<new>.*?)" # NEW
54 r"(?P<trailing_ws>\s*)$",
55 )
57 _TOKEN_RE = re.compile(
58 r"""
59 (?P<num>[+-]?\d+(?:[.,]\d+)?(?:[a-z%]+)?|∞) |
60 (?P<word>\w+) |
61 (?P<space>\s+) |
62 (?P<punc>.?)
63 """,
64 re.VERBOSE | re.UNICODE,
65 )
67 # -----------------------------------------------------------------
68 # Construction
69 # -----------------------------------------------------------------
70 def __init__(
71 self,
72 *,
73 bg_color: str = "grey35",
74 arrow_color: Optional[str] = None,
75 case_sensitive: bool = True,
76 underline_changes: bool = False,
77 ) -> None:
78 self.bg_color = bg_color
79 self.arrow_color = arrow_color
80 self.case_sensitive = case_sensitive
81 self.underline_changes = underline_changes
83 self._bg_style = Style(bgcolor=self.bg_color, underline=self.underline_changes)
84 self._arrow_style = Style(color=self.arrow_color) if self.arrow_color else None
86 # -----------------------------------------------------------------
87 # Public API
88 # -----------------------------------------------------------------
89 def colorize_line(self, line: Text) -> Text:
90 """Apply diff-based styling **in place**.
92 Parameters
93 ----------
94 line :
95 The :class:`rich.text.Text` instance containing a diff line.
97 Returns
98 -------
99 rich.text.Text
100 The same object, now decorated with background and/or underline
101 spans on the differing tokens.
102 """
103 plain = line.plain
105 # 1) locate first ':' — tail is everything to its right
106 colon_idx = plain.find(":")
107 if colon_idx == -1:
108 return line
110 head_plain = plain[: colon_idx + 1]
111 tail_plain = plain[colon_idx + 1 :]
113 m = self._TAIL_PATTERN.match(tail_plain)
114 if not m:
115 return line # format didn't match
117 # 2) extract tail pieces
118 left_ws = m.group("left_ws")
119 old_text = m.group("old")
120 between_ws = m.group("between_ws")
121 arrow = m.group("arrow")
122 right_ws = m.group("right_ws")
123 new_text = m.group("new")
125 # 3) absolute indices within *plain* string
126 base = len(head_plain)
127 old_start = base + len(left_ws)
128 old_end = old_start + len(old_text)
130 arrow_start = old_end + len(between_ws)
131 arrow_end = arrow_start + len(arrow)
133 new_start = arrow_end + len(right_ws)
135 # 4) diff tokens
136 old_tokens = self._tokenize(old_text)
137 new_tokens = self._tokenize(new_text)
139 sm = difflib.SequenceMatcher(
140 a=[t[3] for t in old_tokens],
141 b=[t[3] for t in new_tokens],
142 )
144 for tag, i1, i2, j1, j2 in sm.get_opcodes():
145 # OLD side: replace/delete
146 if tag in ("replace", "delete"):
147 span = self._span_from_tokens(old_tokens, i1, i2)
148 if span:
149 s, e = span
150 line.stylize(self._bg_style, old_start + s, old_start + e)
151 # NEW side: replace/insert
152 if tag in ("replace", "insert"):
153 span = self._span_from_tokens(new_tokens, j1, j2)
154 if span:
155 s, e = span
156 line.stylize(self._bg_style, new_start + s, new_start + e)
158 # 5) recolour arrow if requested
159 if self._arrow_style:
160 line.stylize(self._arrow_style, arrow_start, arrow_end)
162 return line
164 # ------------------------------------------------------------------
165 # Internal helpers
166 # ------------------------------------------------------------------
167 def _tokenize(self, s: str) -> List[Tuple[str, int, int, str]]:
168 """Return token list: ``(raw, start, end, cmp)``."""
169 toks: List[Tuple[str, int, int, str]] = []
170 for m in self._TOKEN_RE.finditer(s):
171 raw = m.group(0)
172 cmpv = raw if self.case_sensitive else raw.lower()
173 toks.append((raw, m.start(), m.end(), cmpv))
174 return toks
176 @staticmethod
177 def _span_from_tokens(
178 tokens: List[Tuple[str, int, int, str]],
179 i1: int,
180 i2: int,
181 ) -> Optional[Tuple[int, int]]:
182 if i1 >= i2:
183 return None
184 return tokens[i1][1], tokens[i2 - 1][2]