Coverage for pytest_jsonschema_snapshot/tools/genson_addon/format_detector.py: 58%

26 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-02 00:37 +0000

1import re 

2from typing import Optional 

3 

4 

5class FormatDetector: 

6 """Class for detecting string formats""" 

7 

8 # Regular expressions for various formats 

9 EMAIL_PATTERN = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$") 

10 UUID_PATTERN = re.compile( 

11 r"^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$", 

12 re.I, 

13 ) 

14 DATE_PATTERN = re.compile(r"^\d{4}-\d{2}-\d{2}$") 

15 DATETIME_PATTERN = re.compile( 

16 r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$" 

17 ) 

18 URI_PATTERN = re.compile(r"^https?://[^\s/$.?#].[^\s]*$", re.I) 

19 IPV4_PATTERN = re.compile( 

20 r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}" 

21 r"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$" 

22 ) 

23 

24 @classmethod 

25 def detect_format(cls, value: str) -> Optional[str]: 

26 """ 

27 Detects the format of a string. 

28 

29 Args: 

30 value: The string to analyze 

31 

32 Returns: 

33 The name of the format or None if the format is not defined 

34 """ 

35 if not isinstance(value, str) or not value: 

36 return None 

37 

38 # Check formats from more specific to less specific 

39 if cls.EMAIL_PATTERN.match(value): 

40 return "email" 

41 elif cls.UUID_PATTERN.match(value): 

42 return "uuid" 

43 elif cls.DATETIME_PATTERN.match(value): 

44 return "date-time" 

45 elif cls.DATE_PATTERN.match(value): 

46 return "date" 

47 elif cls.URI_PATTERN.match(value): 

48 return "uri" 

49 elif cls.IPV4_PATTERN.match(value): 

50 return "ipv4" 

51 

52 return None