Coverage for jsonschema_diff/color/stages/replace.py: 99%

69 statements  

« prev     ^ index     » next       coverage.py v7.10.5, created at 2025-08-25 07:00 +0000

1from __future__ import annotations 

2 

3""" 

4Token-level diff high-lighter 

5============================= 

6 

7A Rich-native replacement for the original ``ReplaceGenericHighlighter`` that 

8marks *token-by-token* differences inside a ``OLD -> NEW`` tail. It operates 

9directly on :class:`rich.text.Text` so you can embed the result in Rich tables 

10or live dashboards without ANSI parsing. 

11 

12Detection strategy 

13------------------ 

14#) Split *OLD* and *NEW* into tokens (numbers, words, spaces, punctuation). 

15#) Run :class:`difflib.SequenceMatcher` to classify *replace*, *delete*, 

16 *insert* spans. 

17#) Apply background colour ± underline only to the differing tokens. 

18 

19Everything left of the first ``:`` is treated as an opaque *head*. 

20""" 

21import difflib 

22import re 

23from typing import List, Optional, Tuple 

24 

25from rich.style import Style 

26from rich.text import Text 

27 

28from ..abstraction import LineHighlighter 

29 

30 

31class ReplaceGenericHighlighter(LineHighlighter): 

32 """Highlight token differences in ``OLD -> NEW`` tails. 

33 

34 Parameters 

35 ---------- 

36 bg_color : 

37 Background colour used to mark differing spans. 

38 arrow_color : 

39 Optional foreground colour for the ``->`` arrow. 

40 case_sensitive : 

41 Compare tokens case-sensitively when *True* (default). 

42 underline_changes : 

43 Underline differing spans in addition to background colour. 

44 """ 

45 

46 # -- regex patterns & helpers ------------------------------------- 

47 _TAIL_PATTERN = re.compile( 

48 r"(?P<left_ws>\s*)" # leading spaces 

49 r"(?P<old>.*?)" # OLD 

50 r"(?P<between_ws>\s*)" 

51 r"(?P<arrow>->)" 

52 r"(?P<right_ws>\s*)" 

53 r"(?P<new>.*?)" # NEW 

54 r"(?P<trailing_ws>\s*)$", 

55 ) 

56 

57 _TOKEN_RE = re.compile( 

58 r""" 

59 (?P<num>[+-]?\d+(?:[.,]\d+)?(?:[a-z%]+)?|∞) | 

60 (?P<word>\w+) | 

61 (?P<space>\s+) | 

62 (?P<punc>.?) 

63 """, 

64 re.VERBOSE | re.UNICODE, 

65 ) 

66 

67 # ----------------------------------------------------------------- 

68 # Construction 

69 # ----------------------------------------------------------------- 

70 def __init__( 

71 self, 

72 *, 

73 bg_color: str = "grey35", 

74 arrow_color: Optional[str] = None, 

75 case_sensitive: bool = True, 

76 underline_changes: bool = False, 

77 ) -> None: 

78 self.bg_color = bg_color 

79 self.arrow_color = arrow_color 

80 self.case_sensitive = case_sensitive 

81 self.underline_changes = underline_changes 

82 

83 self._bg_style = Style(bgcolor=self.bg_color, underline=self.underline_changes) 

84 self._arrow_style = Style(color=self.arrow_color) if self.arrow_color else None 

85 

86 # ----------------------------------------------------------------- 

87 # Public API 

88 # ----------------------------------------------------------------- 

89 def colorize_line(self, line: Text) -> Text: 

90 """Apply diff-based styling **in place**. 

91 

92 Parameters 

93 ---------- 

94 line : 

95 The :class:`rich.text.Text` instance containing a diff line. 

96 

97 Returns 

98 ------- 

99 rich.text.Text 

100 The same object, now decorated with background and/or underline 

101 spans on the differing tokens. 

102 """ 

103 plain = line.plain 

104 

105 # 1) locate first ':' — tail is everything to its right 

106 colon_idx = plain.find(":") 

107 if colon_idx == -1: 

108 return line 

109 

110 head_plain = plain[: colon_idx + 1] 

111 tail_plain = plain[colon_idx + 1 :] 

112 

113 m = self._TAIL_PATTERN.match(tail_plain) 

114 if not m: 

115 return line # format didn't match 

116 

117 # 2) extract tail pieces 

118 left_ws = m.group("left_ws") 

119 old_text = m.group("old") 

120 between_ws = m.group("between_ws") 

121 arrow = m.group("arrow") 

122 right_ws = m.group("right_ws") 

123 new_text = m.group("new") 

124 

125 # 3) absolute indices within *plain* string 

126 base = len(head_plain) 

127 old_start = base + len(left_ws) 

128 old_end = old_start + len(old_text) 

129 

130 arrow_start = old_end + len(between_ws) 

131 arrow_end = arrow_start + len(arrow) 

132 

133 new_start = arrow_end + len(right_ws) 

134 

135 # 4) diff tokens 

136 old_tokens = self._tokenize(old_text) 

137 new_tokens = self._tokenize(new_text) 

138 

139 sm = difflib.SequenceMatcher( 

140 a=[t[3] for t in old_tokens], 

141 b=[t[3] for t in new_tokens], 

142 ) 

143 

144 for tag, i1, i2, j1, j2 in sm.get_opcodes(): 

145 # OLD side: replace/delete 

146 if tag in ("replace", "delete"): 

147 span = self._span_from_tokens(old_tokens, i1, i2) 

148 if span: 

149 s, e = span 

150 line.stylize(self._bg_style, old_start + s, old_start + e) 

151 # NEW side: replace/insert 

152 if tag in ("replace", "insert"): 

153 span = self._span_from_tokens(new_tokens, j1, j2) 

154 if span: 

155 s, e = span 

156 line.stylize(self._bg_style, new_start + s, new_start + e) 

157 

158 # 5) recolour arrow if requested 

159 if self._arrow_style: 

160 line.stylize(self._arrow_style, arrow_start, arrow_end) 

161 

162 return line 

163 

164 # ------------------------------------------------------------------ 

165 # Internal helpers 

166 # ------------------------------------------------------------------ 

167 def _tokenize(self, s: str) -> List[Tuple[str, int, int, str]]: 

168 """Return token list: ``(raw, start, end, cmp)``.""" 

169 toks: List[Tuple[str, int, int, str]] = [] 

170 for m in self._TOKEN_RE.finditer(s): 

171 raw = m.group(0) 

172 cmpv = raw if self.case_sensitive else raw.lower() 

173 toks.append((raw, m.start(), m.end(), cmpv)) 

174 return toks 

175 

176 @staticmethod 

177 def _span_from_tokens( 

178 tokens: List[Tuple[str, int, int, str]], 

179 i1: int, 

180 i2: int, 

181 ) -> Optional[Tuple[int, int]]: 

182 if i1 >= i2: 

183 return None 

184 return tokens[i1][1], tokens[i2 - 1][2]