Source code for pytest_jsonschema_snapshot.tools.genson_addon.to_schema_converter

"""Json → Schema with optional format handling.

`format_mode` options
---------------------
* ``"on"``   – detect formats and let validators assert them (default).
* ``"off"``  – ignore formats entirely.
* ``"safe"`` – keep the annotations but embed a ``$vocabulary`` block that
                **disables** the draft‑2020‑12 *format‑assertion* vocabulary.
                This makes every ``format`` purely informational, regardless
                of validator settings.
"""

from typing import Any, Dict, Literal

from genson import SchemaBuilder  # type: ignore[import-untyped]

from .format_detector import FormatDetector

_FormatMode = Literal["on", "off", "safe"]


[docs] class JsonToSchemaConverter(SchemaBuilder): """A thin wrapper around :class:`genson.SchemaBuilder`.""" # ------------------------------------------------------------------ # Construction # ------------------------------------------------------------------ def __init__( self, schema_uri: str = "https://json-schema.org/draft/2020-12/schema", *, format_mode: _FormatMode = "on", ): super().__init__(schema_uri) if schema_uri else super().__init__() if format_mode not in {"on", "off", "safe"}: raise ValueError("format_mode must be 'on', 'off', or 'safe'.") self._format_mode: _FormatMode = format_mode self._format_cache: Dict[str, set[str]] = {} # ------------------------------------------------------------------ # Public API (overrides) # ------------------------------------------------------------------
[docs] def add_object(self, obj: Any, path: str = "root") -> None: super().add_object(obj) if self._format_mode != "off": self._collect_formats(obj, path)
[docs] def to_schema(self) -> Dict[str, Any]: schema = dict(super().to_schema()) # shallow‑copy if self._format_mode != "off": self._inject_formats(schema, "root") if self._format_mode == "safe": schema.setdefault( "$vocabulary", { "https://json-schema.org/draft/2020-12/vocab/core": True, "https://json-schema.org/draft/2020-12/vocab/applicator": True, "https://json-schema.org/draft/2020-12/vocab/format-annotation": True, "https://json-schema.org/draft/2020-12/vocab/format-assertion": False, }, ) return schema
# ------------------------------------------------------------------ # Internals # ------------------------------------------------------------------ def _collect_formats(self, obj: Any, path: str) -> None: if isinstance(obj, str): fmt = FormatDetector.detect_format(obj) if fmt: self._format_cache.setdefault(path, set()).add(fmt) elif isinstance(obj, dict): for k, v in obj.items(): self._collect_formats(v, f"{path}.{k}") elif isinstance(obj, (list, tuple)): for i, item in enumerate(obj): self._collect_formats(item, f"{path}[{i}]") def _inject_formats(self, schema: Dict[str, Any], path: str) -> None: t = schema.get("type") if t == "string": fmts = self._format_cache.get(path) if fmts and len(fmts) == 1: schema["format"] = next(iter(fmts)) elif t == "object" and "properties" in schema: for name, subschema in schema["properties"].items(): self._inject_formats(subschema, f"{path}.{name}") elif t == "array" and "items" in schema: items_schema = schema["items"] if isinstance(items_schema, dict): self._inject_formats(items_schema, f"{path}[0]") else: for idx, subschema in enumerate(items_schema): self._inject_formats(subschema, f"{path}[{idx}]") elif "anyOf" in schema: for subschema in schema["anyOf"]: self._inject_formats(subschema, path)