Coverage for human_requests/impersonation.py: 89%
82 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-13 21:41 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-13 21:41 +0000
1from __future__ import annotations
3import random
4from dataclasses import dataclass, field
5from enum import Enum, auto
6from typing import Callable, Iterable, Sequence, get_args
8from browserforge.headers import HeaderGenerator
9from browserforge.headers.generator import SUPPORTED_BROWSERS as HD_BROWSERS
10from curl_cffi import requests as cffi_requests
12# ---------------------------------------------------------------------------
13# Доступные профили curl_cffi (динамически, без хардкода)
14# ---------------------------------------------------------------------------
15_ALL_PROFILES: list[str] = sorted(get_args(cffi_requests.impersonate.BrowserTypeLiteral))
16_ENGINE_FAM = {
17 "chromium": "chrome",
18 "patchright": "chrome",
19 "edge": "chrome",
20 "opera": "chrome",
21 "yandex": "chrome",
22 "webkit": "safari",
23 "firefox": "firefox",
24 "camoufox": "firefox",
25 "tor": "firefox",
26}
27_SPOOF_ENGINES_FAM = ["chrome", "firefox", "safari", "edge", "opera", "tor"]
30def _family(profile: str) -> str: # 'chrome122' -> 'chrome'
31 for fam in _SPOOF_ENGINES_FAM:
32 if profile.startswith(fam):
33 return fam
34 return "other"
37# ---------------------------------------------------------------------------
38# Политика выбора профиля для impersonate()
39# ---------------------------------------------------------------------------
40class Policy(Enum):
41 """Policy for selecting a profile in ImpersonationConfig"""
43 INIT_RANDOM = auto() # profile is selected when the session is created
44 """Profile is selected at session creation and then does not change"""
45 RANDOM_EACH_REQUEST = auto() # new profile before each request
46 """Profile is selected for every request"""
49# ---------------------------------------------------------------------------
50# Dataclass config
51# ---------------------------------------------------------------------------
52def _always(_: str) -> bool:
53 """Default filter for ImpersonationConfig.custom_filter"""
54 return True
57@dataclass(slots=True)
58class ImpersonationConfig:
59 """
60 Spoofing settings for curl_cffi **and** browser header generation.
62 Example::
64 cfg = ImpersonationConfig(
65 policy=Policy.RANDOM_EACH_REQUEST,
66 browser_family=["chrome", "edge"],
67 min_version=120,
68 geo_country="DE",
69 sync_with_engine=True,
70 )
71 """
73 # --- main policy -------------------------------------------------------
74 policy: Policy = Policy.INIT_RANDOM
75 """Policy for when a profile is selected"""
77 # --- profile selection filters ----------------------------------------
78 browser_family: str | Sequence[str] | None = None # 'chrome' or ['chrome','edge']
79 """Browser family (chrome, edge, opera, firefox, safari)"""
80 min_version: int | None = None # >=
81 """Minimum browser version"""
82 custom_filter: Callable[[str], bool] = _always
83 """Custom script for filtering impersonation profiles.
84 Must return a bool"""
86 # --- additional parameters --------------------------------------------
87 geo_country: str = "en-US"
88 """Language tag in BCP 47 format (en-US, ru-RU, etc.)"""
89 sync_with_engine: bool = True # restrict to Playwright engine family
90 """Restrict to the current Playwright engine family (chromium, firefox, webkit),
91 or camoufox=firefox"""
92 rotate_headers: bool = True # use HeaderGenerator
93 """Whether to generate browser-like headers (user-agent, accept-language, etc.)"""
95 # --- внутреннее --------------------------------------------------------
96 _cached: str = field(default="", init=False, repr=False)
98 # ------------------------------------------------------------------ utils
99 def _filter_pool(self, engine: str) -> list[str]:
100 """Filters available impersonation profiles by Playwright engine"""
102 fam_set: set[str] = (
103 {self.browser_family}
104 if isinstance(self.browser_family, str)
105 else set(self.browser_family or [])
106 )
108 pool: Iterable[str] = _ALL_PROFILES
109 if fam_set:
110 pool = [p for p in pool if _family(p) in fam_set]
111 if self.min_version:
112 pool = [p for p in pool if int("".join(filter(str.isdigit, p))) >= self.min_version]
114 if self.sync_with_engine:
115 need = _ENGINE_FAM.get(engine, engine)
116 first_pass = [p for p in pool if _family(p) == need]
117 pool = first_pass or list(pool) # ← fallback если «webkit» не нашёлся
119 pool = [p for p in pool if self.custom_filter(p)]
120 pool = list(pool)
121 if not pool:
122 raise RuntimeError("No impersonation profile satisfies filters")
123 return pool
125 # ---------------------------------------------------------------- public
126 def choose(self, engine: str) -> str:
127 """
128 Returns the impersonation profile name for the current request.
129 """
131 def _pick(engine: str) -> str:
132 return random.choice(self._filter_pool(engine))
134 if self.policy is Policy.RANDOM_EACH_REQUEST:
135 return _pick(engine)
136 if not self._cached:
137 self._cached = _pick(engine)
138 return self._cached
140 def forge_headers(self, profile: str) -> dict[str, str]:
141 """
142 Generates a set of real-browser headers for *the same* profile,
143 using *browserforge.HeaderGenerator*.
144 """
145 if not self.rotate_headers:
146 return {}
148 real_browser = "unknown"
149 for brow in HD_BROWSERS:
150 if profile.startswith(brow):
151 real_browser = brow
152 break
153 else:
154 raise ValueError(f"Unknown impersonation profile: {profile}")
156 try:
157 hg = HeaderGenerator(
158 browser=[real_browser],
159 locale=[self.geo_country] if self.geo_country else "en-US",
160 )
161 hdrs = hg.generate()
162 except ValueError as e:
163 raise RuntimeError(
164 f"Failed to generate headers for `{profile}` as `{real_browser}`: {e}"
165 )
167 # HeaderGenerator возвращает UA отдельным полем (не всегда кладёт в dict)
168 ua = hdrs.get("user-agent", hdrs.pop("User-Agent", None))
169 if ua:
170 hdrs["user-agent"] = ua
171 return {k.lower(): v for k, v in hdrs.items()}