Coverage for pytest_jsonschema_snapshot/tools/genson_addon/to_schema_converter.py: 75%

51 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-02 00:37 +0000

1"""Json → Schema with optional format handling. 

2 

3`format_mode` options 

4--------------------- 

5* ``"on"`` – detect formats and let validators assert them (default). 

6* ``"off"`` – ignore formats entirely. 

7* ``"safe"`` – keep the annotations but embed a ``$vocabulary`` block that 

8 **disables** the draft‑2020‑12 *format‑assertion* vocabulary. 

9 This makes every ``format`` purely informational, regardless 

10 of validator settings. 

11""" 

12 

13from typing import Any, Dict, Literal 

14 

15from genson import SchemaBuilder # type: ignore[import-untyped] 

16 

17from .format_detector import FormatDetector 

18 

19_FormatMode = Literal["on", "off", "safe"] 

20 

21 

22class JsonToSchemaConverter(SchemaBuilder): 

23 """A thin wrapper around :class:`genson.SchemaBuilder`.""" 

24 

25 # ------------------------------------------------------------------ 

26 # Construction 

27 # ------------------------------------------------------------------ 

28 def __init__( 

29 self, 

30 schema_uri: str = "https://json-schema.org/draft/2020-12/schema", 

31 *, 

32 format_mode: _FormatMode = "on", 

33 ): 

34 super().__init__(schema_uri) if schema_uri else super().__init__() 

35 if format_mode not in {"on", "off", "safe"}: 

36 raise ValueError("format_mode must be 'on', 'off', or 'safe'.") 

37 self._format_mode: _FormatMode = format_mode 

38 self._format_cache: Dict[str, set[str]] = {} 

39 

40 # ------------------------------------------------------------------ 

41 # Public API (overrides) 

42 # ------------------------------------------------------------------ 

43 def add_object(self, obj: Any, path: str = "root") -> None: 

44 super().add_object(obj) 

45 if self._format_mode != "off": 

46 self._collect_formats(obj, path) 

47 

48 def to_schema(self) -> Dict[str, Any]: 

49 schema = dict(super().to_schema()) # shallow‑copy 

50 

51 if self._format_mode != "off": 

52 self._inject_formats(schema, "root") 

53 

54 if self._format_mode == "safe": 

55 schema.setdefault( 

56 "$vocabulary", 

57 { 

58 "https://json-schema.org/draft/2020-12/vocab/core": True, 

59 "https://json-schema.org/draft/2020-12/vocab/applicator": True, 

60 "https://json-schema.org/draft/2020-12/vocab/format-annotation": True, 

61 "https://json-schema.org/draft/2020-12/vocab/format-assertion": False, 

62 }, 

63 ) 

64 

65 return schema 

66 

67 # ------------------------------------------------------------------ 

68 # Internals 

69 # ------------------------------------------------------------------ 

70 def _collect_formats(self, obj: Any, path: str) -> None: 

71 if isinstance(obj, str): 

72 fmt = FormatDetector.detect_format(obj) 

73 if fmt: 

74 self._format_cache.setdefault(path, set()).add(fmt) 

75 elif isinstance(obj, dict): 

76 for k, v in obj.items(): 

77 self._collect_formats(v, f"{path}.{k}") 

78 elif isinstance(obj, (list, tuple)): 

79 for i, item in enumerate(obj): 

80 self._collect_formats(item, f"{path}[{i}]") 

81 

82 def _inject_formats(self, schema: Dict[str, Any], path: str) -> None: 

83 t = schema.get("type") 

84 if t == "string": 

85 fmts = self._format_cache.get(path) 

86 if fmts and len(fmts) == 1: 

87 schema["format"] = next(iter(fmts)) 

88 elif t == "object" and "properties" in schema: 

89 for name, subschema in schema["properties"].items(): 

90 self._inject_formats(subschema, f"{path}.{name}") 

91 elif t == "array" and "items" in schema: 

92 items_schema = schema["items"] 

93 if isinstance(items_schema, dict): 

94 self._inject_formats(items_schema, f"{path}[0]") 

95 else: 

96 for idx, subschema in enumerate(items_schema): 

97 self._inject_formats(subschema, f"{path}[{idx}]") 

98 elif "anyOf" in schema: 

99 for subschema in schema["anyOf"]: 

100 self._inject_formats(subschema, path)