Coverage for jsonschema_diff/color/stages/replace.py: 99%

1from __future__ import annotations

3"""

4Token-level diff high-lighter

5=============================

7A Rich-native replacement for the original ``ReplaceGenericHighlighter`` that

8marks *token-by-token* differences inside a ``OLD -> NEW`` tail. It operates

9directly on :class:`rich.text.Text` so you can embed the result in Rich tables

10or live dashboards without ANSI parsing.

12Detection strategy

13------------------

14#) Split *OLD* and *NEW* into tokens (numbers, words, spaces, punctuation).

15#) Run :class:`difflib.SequenceMatcher` to classify *replace*, *delete*,

16 *insert* spans.

17#) Apply background colour ± underline only to the differing tokens.

19Everything left of the first ``:`` is treated as an opaque *head*.

20"""

21import difflib

22import re

23from typing import List, Optional, Tuple

25from rich.style import Style

26from rich.text import Text

28from ..abstraction import LineHighlighter

31class ReplaceGenericHighlighter(LineHighlighter):

32 """Highlight token differences in ``OLD -> NEW`` tails.

34 Parameters

35 ----------

36 bg_color :

37 Background colour used to mark differing spans.

38 arrow_color :

39 Optional foreground colour for the ``->`` arrow.

40 case_sensitive :

41 Compare tokens case-sensitively when *True* (default).

42 underline_changes :

43 Underline differing spans in addition to background colour.

44 """

46 # -- regex patterns & helpers -------------------------------------

47 _TAIL_PATTERN = re.compile(

48 r"(?P<left_ws>\s*)" # leading spaces

49 r"(?P<old>.*?)" # OLD

50 r"(?P<between_ws>\s*)"

51 r"(?P<arrow>->)"

52 r"(?P<right_ws>\s*)"

53 r"(?P<new>.*?)" # NEW

54 r"(?P<trailing_ws>\s*)$",

55 )

57 _TOKEN_RE = re.compile(

58 r"""

59 (?P<num>[+-]?\d+(?:[.,]\d+)?(?:[a-z%]+)?|∞) |

60 (?P<word>\w+) |

61 (?P<space>\s+) |

62 (?P<punc>.?)

63 """,

64 re.VERBOSE | re.UNICODE,

65 )

67 # -----------------------------------------------------------------

68 # Construction

69 # -----------------------------------------------------------------

70 def __init__(

71 self,

72 *,

73 bg_color: str = "grey35",

74 arrow_color: Optional[str] = None,

75 case_sensitive: bool = True,

76 underline_changes: bool = False,

77 ) -> None:

78 self.bg_color = bg_color

79 self.arrow_color = arrow_color

80 self.case_sensitive = case_sensitive

81 self.underline_changes = underline_changes

83 self._bg_style = Style(bgcolor=self.bg_color, underline=self.underline_changes)

84 self._arrow_style = Style(color=self.arrow_color) if self.arrow_color else None

86 # -----------------------------------------------------------------

87 # Public API

88 # -----------------------------------------------------------------

89 def colorize_line(self, line: Text) -> Text:

90 """Apply diff-based styling **in place**.

92 Parameters

93 ----------

94 line :

95 The :class:`rich.text.Text` instance containing a diff line.

97 Returns

98 -------

99 rich.text.Text

100 The same object, now decorated with background and/or underline

101 spans on the differing tokens.

102 """

103 plain = line.plain

104

105 # 1) locate first ':' — tail is everything to its right

106 colon_idx = plain.find(":")

107 if colon_idx == -1:

108 return line

109

110 head_plain = plain[: colon_idx + 1]

111 tail_plain = plain[colon_idx + 1 :]

112

113 m = self._TAIL_PATTERN.match(tail_plain)

114 if not m:

115 return line # format didn't match

116

117 # 2) extract tail pieces

118 left_ws = m.group("left_ws")

119 old_text = m.group("old")

120 between_ws = m.group("between_ws")

121 arrow = m.group("arrow")

122 right_ws = m.group("right_ws")

123 new_text = m.group("new")

124

125 # 3) absolute indices within *plain* string

126 base = len(head_plain)

127 old_start = base + len(left_ws)

128 old_end = old_start + len(old_text)

129

130 arrow_start = old_end + len(between_ws)

131 arrow_end = arrow_start + len(arrow)

132

133 new_start = arrow_end + len(right_ws)

134

135 # 4) diff tokens

136 old_tokens = self._tokenize(old_text)

137 new_tokens = self._tokenize(new_text)

138

139 sm = difflib.SequenceMatcher(

140 a=[t[3] for t in old_tokens],

141 b=[t[3] for t in new_tokens],

142 )

143

144 for tag, i1, i2, j1, j2 in sm.get_opcodes():

145 # OLD side: replace/delete

146 if tag in ("replace", "delete"):

147 span = self._span_from_tokens(old_tokens, i1, i2)

148 if span:

149 s, e = span

150 line.stylize(self._bg_style, old_start + s, old_start + e)

151 # NEW side: replace/insert

152 if tag in ("replace", "insert"):

153 span = self._span_from_tokens(new_tokens, j1, j2)

154 if span:

155 s, e = span

156 line.stylize(self._bg_style, new_start + s, new_start + e)

157

158 # 5) recolour arrow if requested

159 if self._arrow_style:

160 line.stylize(self._arrow_style, arrow_start, arrow_end)

161

162 return line

163

164 # ------------------------------------------------------------------

165 # Internal helpers

166 # ------------------------------------------------------------------

167 def _tokenize(self, s: str) -> List[Tuple[str, int, int, str]]:

168 """Return token list: ``(raw, start, end, cmp)``."""

169 toks: List[Tuple[str, int, int, str]] = []

170 for m in self._TOKEN_RE.finditer(s):

171 raw = m.group(0)

172 cmpv = raw if self.case_sensitive else raw.lower()

173 toks.append((raw, m.start(), m.end(), cmpv))

174 return toks

175

176 @staticmethod

177 def _span_from_tokens(

178 tokens: List[Tuple[str, int, int, str]],

179 i1: int,

180 i2: int,

181 ) -> Optional[Tuple[int, int]]:

182 if i1 >= i2:

183 return None

184 return tokens[i1][1], tokens[i2 - 1][2]