Source code for human_requests.impersonation

from __future__ import annotations

import random
from dataclasses import dataclass, field
from enum import Enum, auto
from typing import Callable, Iterable, Sequence, get_args

from browserforge.headers import HeaderGenerator
from browserforge.headers.generator import SUPPORTED_BROWSERS as HD_BROWSERS
from curl_cffi import requests as cffi_requests

# ---------------------------------------------------------------------------
# Доступные профили curl_cffi (динамически, без хардкода)
# ---------------------------------------------------------------------------
_ALL_PROFILES: list[str] = sorted(get_args(cffi_requests.impersonate.BrowserTypeLiteral))
_ENGINE_FAM = {
    "chromium": "chrome",
    "patchright": "chrome",
    "edge": "chrome",
    "opera": "chrome",
    "yandex": "chrome",
    "webkit": "safari",
    "firefox": "firefox",
    "camoufox": "firefox",
    "tor": "firefox",
}
_SPOOF_ENGINES_FAM = ["chrome", "firefox", "safari", "edge", "opera", "tor"]


def _family(profile: str) -> str:  # 'chrome122' -> 'chrome'
    for fam in _SPOOF_ENGINES_FAM:
        if profile.startswith(fam):
            return fam
    return "other"


# ---------------------------------------------------------------------------
# Политика выбора профиля для impersonate()
# ---------------------------------------------------------------------------
[docs] class Policy(Enum): """Policy for selecting a profile in ImpersonationConfig"""
[docs] INIT_RANDOM = auto() # profile is selected when the session is created
"""Profile is selected at session creation and then does not change"""
[docs] RANDOM_EACH_REQUEST = auto() # new profile before each request
"""Profile is selected for every request"""
# --------------------------------------------------------------------------- # Dataclass config # --------------------------------------------------------------------------- def _always(_: str) -> bool: """Default filter for ImpersonationConfig.custom_filter""" return True @dataclass(slots=True)
[docs] class ImpersonationConfig: """ Spoofing settings for curl_cffi **and** browser header generation. Example:: cfg = ImpersonationConfig( policy=Policy.RANDOM_EACH_REQUEST, browser_family=["chrome", "edge"], min_version=120, geo_country="DE", sync_with_engine=True, ) """ # --- main policy -------------------------------------------------------
[docs] policy: Policy = Policy.INIT_RANDOM
"""Policy for when a profile is selected""" # --- profile selection filters ----------------------------------------
[docs] browser_family: str | Sequence[str] | None = None # 'chrome' or ['chrome','edge']
"""Browser family (chrome, edge, opera, firefox, safari)"""
[docs] min_version: int | None = None # >=
"""Minimum browser version"""
[docs] custom_filter: Callable[[str], bool] = _always
"""Custom script for filtering impersonation profiles. Must return a bool""" # --- additional parameters --------------------------------------------
[docs] geo_country: str = "en-US"
"""Language tag in BCP 47 format (en-US, ru-RU, etc.)"""
[docs] sync_with_engine: bool = True # restrict to Playwright engine family
"""Restrict to the current Playwright engine family (chromium, firefox, webkit), or camoufox=firefox"""
[docs] rotate_headers: bool = True # use HeaderGenerator
"""Whether to generate browser-like headers (user-agent, accept-language, etc.)""" # --- внутреннее -------------------------------------------------------- _cached: str = field(default="", init=False, repr=False) # ------------------------------------------------------------------ utils def _filter_pool(self, engine: str) -> list[str]: """Filters available impersonation profiles by Playwright engine""" fam_set: set[str] = ( {self.browser_family} if isinstance(self.browser_family, str) else set(self.browser_family or []) ) pool: Iterable[str] = _ALL_PROFILES if fam_set: pool = [p for p in pool if _family(p) in fam_set] if self.min_version: pool = [p for p in pool if int("".join(filter(str.isdigit, p))) >= self.min_version] if self.sync_with_engine: need = _ENGINE_FAM.get(engine, engine) first_pass = [p for p in pool if _family(p) == need] pool = first_pass or list(pool) # ← fallback если «webkit» не нашёлся pool = [p for p in pool if self.custom_filter(p)] pool = list(pool) if not pool: raise RuntimeError("No impersonation profile satisfies filters") return pool # ---------------------------------------------------------------- public
[docs] def choose(self, engine: str) -> str: """ Returns the impersonation profile name for the current request. """ def _pick(engine: str) -> str: return random.choice(self._filter_pool(engine)) if self.policy is Policy.RANDOM_EACH_REQUEST: return _pick(engine) if not self._cached: self._cached = _pick(engine) return self._cached
[docs] def forge_headers(self, profile: str) -> dict[str, str]: """ Generates a set of real-browser headers for *the same* profile, using *browserforge.HeaderGenerator*. """ if not self.rotate_headers: return {} real_browser = "unknown" for brow in HD_BROWSERS: if profile.startswith(brow): real_browser = brow break else: raise ValueError(f"Unknown impersonation profile: {profile}") try: hg = HeaderGenerator( browser=[real_browser], locale=[self.geo_country] if self.geo_country else "en-US", ) hdrs = hg.generate() except ValueError as e: raise RuntimeError( f"Failed to generate headers for `{profile}` as `{real_browser}`: {e}" ) # HeaderGenerator возвращает UA отдельным полем (не всегда кладёт в dict) ua = hdrs.get("user-agent", hdrs.pop("User-Agent", None)) if ua: hdrs["user-agent"] = ua return {k.lower(): v for k, v in hdrs.items()}