Coverage for human_requests/impersonation.py: 89%

82 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-13 21:41 +0000

1from __future__ import annotations 

2 

3import random 

4from dataclasses import dataclass, field 

5from enum import Enum, auto 

6from typing import Callable, Iterable, Sequence, get_args 

7 

8from browserforge.headers import HeaderGenerator 

9from browserforge.headers.generator import SUPPORTED_BROWSERS as HD_BROWSERS 

10from curl_cffi import requests as cffi_requests 

11 

12# --------------------------------------------------------------------------- 

13# Доступные профили curl_cffi (динамически, без хардкода) 

14# --------------------------------------------------------------------------- 

15_ALL_PROFILES: list[str] = sorted(get_args(cffi_requests.impersonate.BrowserTypeLiteral)) 

16_ENGINE_FAM = { 

17 "chromium": "chrome", 

18 "patchright": "chrome", 

19 "edge": "chrome", 

20 "opera": "chrome", 

21 "yandex": "chrome", 

22 "webkit": "safari", 

23 "firefox": "firefox", 

24 "camoufox": "firefox", 

25 "tor": "firefox", 

26} 

27_SPOOF_ENGINES_FAM = ["chrome", "firefox", "safari", "edge", "opera", "tor"] 

28 

29 

30def _family(profile: str) -> str: # 'chrome122' -> 'chrome' 

31 for fam in _SPOOF_ENGINES_FAM: 

32 if profile.startswith(fam): 

33 return fam 

34 return "other" 

35 

36 

37# --------------------------------------------------------------------------- 

38# Политика выбора профиля для impersonate() 

39# --------------------------------------------------------------------------- 

40class Policy(Enum): 

41 """Policy for selecting a profile in ImpersonationConfig""" 

42 

43 INIT_RANDOM = auto() # profile is selected when the session is created 

44 """Profile is selected at session creation and then does not change""" 

45 RANDOM_EACH_REQUEST = auto() # new profile before each request 

46 """Profile is selected for every request""" 

47 

48 

49# --------------------------------------------------------------------------- 

50# Dataclass config 

51# --------------------------------------------------------------------------- 

52def _always(_: str) -> bool: 

53 """Default filter for ImpersonationConfig.custom_filter""" 

54 return True 

55 

56 

57@dataclass(slots=True) 

58class ImpersonationConfig: 

59 """ 

60 Spoofing settings for curl_cffi **and** browser header generation. 

61 

62 Example:: 

63 

64 cfg = ImpersonationConfig( 

65 policy=Policy.RANDOM_EACH_REQUEST, 

66 browser_family=["chrome", "edge"], 

67 min_version=120, 

68 geo_country="DE", 

69 sync_with_engine=True, 

70 ) 

71 """ 

72 

73 # --- main policy ------------------------------------------------------- 

74 policy: Policy = Policy.INIT_RANDOM 

75 """Policy for when a profile is selected""" 

76 

77 # --- profile selection filters ---------------------------------------- 

78 browser_family: str | Sequence[str] | None = None # 'chrome' or ['chrome','edge'] 

79 """Browser family (chrome, edge, opera, firefox, safari)""" 

80 min_version: int | None = None # >= 

81 """Minimum browser version""" 

82 custom_filter: Callable[[str], bool] = _always 

83 """Custom script for filtering impersonation profiles. 

84 Must return a bool""" 

85 

86 # --- additional parameters -------------------------------------------- 

87 geo_country: str = "en-US" 

88 """Language tag in BCP 47 format (en-US, ru-RU, etc.)""" 

89 sync_with_engine: bool = True # restrict to Playwright engine family 

90 """Restrict to the current Playwright engine family (chromium, firefox, webkit), 

91 or camoufox=firefox""" 

92 rotate_headers: bool = True # use HeaderGenerator 

93 """Whether to generate browser-like headers (user-agent, accept-language, etc.)""" 

94 

95 # --- внутреннее -------------------------------------------------------- 

96 _cached: str = field(default="", init=False, repr=False) 

97 

98 # ------------------------------------------------------------------ utils 

99 def _filter_pool(self, engine: str) -> list[str]: 

100 """Filters available impersonation profiles by Playwright engine""" 

101 

102 fam_set: set[str] = ( 

103 {self.browser_family} 

104 if isinstance(self.browser_family, str) 

105 else set(self.browser_family or []) 

106 ) 

107 

108 pool: Iterable[str] = _ALL_PROFILES 

109 if fam_set: 

110 pool = [p for p in pool if _family(p) in fam_set] 

111 if self.min_version: 

112 pool = [p for p in pool if int("".join(filter(str.isdigit, p))) >= self.min_version] 

113 

114 if self.sync_with_engine: 

115 need = _ENGINE_FAM.get(engine, engine) 

116 first_pass = [p for p in pool if _family(p) == need] 

117 pool = first_pass or list(pool) # ← fallback если «webkit» не нашёлся 

118 

119 pool = [p for p in pool if self.custom_filter(p)] 

120 pool = list(pool) 

121 if not pool: 

122 raise RuntimeError("No impersonation profile satisfies filters") 

123 return pool 

124 

125 # ---------------------------------------------------------------- public 

126 def choose(self, engine: str) -> str: 

127 """ 

128 Returns the impersonation profile name for the current request. 

129 """ 

130 

131 def _pick(engine: str) -> str: 

132 return random.choice(self._filter_pool(engine)) 

133 

134 if self.policy is Policy.RANDOM_EACH_REQUEST: 

135 return _pick(engine) 

136 if not self._cached: 

137 self._cached = _pick(engine) 

138 return self._cached 

139 

140 def forge_headers(self, profile: str) -> dict[str, str]: 

141 """ 

142 Generates a set of real-browser headers for *the same* profile, 

143 using *browserforge.HeaderGenerator*. 

144 """ 

145 if not self.rotate_headers: 

146 return {} 

147 

148 real_browser = "unknown" 

149 for brow in HD_BROWSERS: 

150 if profile.startswith(brow): 

151 real_browser = brow 

152 break 

153 else: 

154 raise ValueError(f"Unknown impersonation profile: {profile}") 

155 

156 try: 

157 hg = HeaderGenerator( 

158 browser=[real_browser], 

159 locale=[self.geo_country] if self.geo_country else "en-US", 

160 ) 

161 hdrs = hg.generate() 

162 except ValueError as e: 

163 raise RuntimeError( 

164 f"Failed to generate headers for `{profile}` as `{real_browser}`: {e}" 

165 ) 

166 

167 # HeaderGenerator возвращает UA отдельным полем (не всегда кладёт в dict) 

168 ua = hdrs.get("user-agent", hdrs.pop("User-Agent", None)) 

169 if ua: 

170 hdrs["user-agent"] = ua 

171 return {k.lower(): v for k, v in hdrs.items()}