Source code for human_requests.session

"""
core.session — unified stateful session for *curl_cffi* and *Playwright*-compatible engines.

Main Methods
============
* ``Session.request``   — low-level HTTP request (curl_cffi) with cookie jar.
* ``Session.goto_page`` — opens a URL in the browser, returns a Page inside
  a context manager; upon exit synchronizes cookies + localStorage.
* ``Response.render``   — offline render of a pre-fetched Response.

Optional Dependencies
=====================
- playwright-stealth: enabled via `playwright_stealth=True`.
  If the package is not installed and the flag is set — raises RuntimeError
  with installation instructions.
- camoufox: selected with `browser='camoufox'`.
- patchright: selected with `browser='patchright'`.
- Incompatibility: camoufox/patchright + playwright_stealth cannot be used together.
  Raises RuntimeError.


Additional
==========
- Browser launch arguments are assembled via `make_browser_launch_opts()` from:
  - `browser_launch_opts` (arbitrary dict)
  - `headless` (always overrides the key of the same name)
  - `proxy` (string URL or dict) → adapted for Playwright/Patchright/Camoufox
- Proxy is also applied to curl_cffi (if no custom `proxy` is passed in .request()).
"""

from __future__ import annotations

from contextlib import asynccontextmanager
from time import perf_counter
from types import TracebackType
from typing import Any, AsyncGenerator, Literal, Mapping, Optional, cast
from urllib.parse import urlsplit

from curl_cffi import requests as cffi_requests
from playwright.async_api import BrowserContext, Page
from playwright.async_api import Request as PWRequest
from playwright.async_api import Route

from .abstraction.cookies import CookieManager
from .abstraction.http import URL, HttpMethod
from .abstraction.proxy_manager import ParsedProxy
from .abstraction.request import Request
from .abstraction.response import Response
from .browsers import BrowserMaster, Engine
from .impersonation import ImpersonationConfig
from .tools.helper_tools import (
    build_storage_state_for_context,
    handle_nav_with_retries,
    merge_storage_state_from_context,
)
from .tools.http_utils import (
    collect_set_cookie_headers,
    compose_cookie_header,
    guess_encoding,
    parse_set_cookie,
)

__all__ = ["Session"]


[docs] class Session: """curl_cffi.AsyncSession + BrowserMaster + CookieManager.""" def __init__( self, *, timeout: float = 15.0, headless: bool = True, browser: Engine = "chromium", spoof: ImpersonationConfig | None = None, playwright_stealth: bool = True, page_retry: int = 2, direct_retry: int = 1, browser_launch_opts: Mapping[str, Any] = {}, proxy: str | None = None, ) -> None: """ Args: timeout: default timeout for both direct and goto requests headless: launch mode (passed into browser launch arguments) browser: chromium/firefox/webkit — standard; camoufox/patchright — special builds spoof: configuration for direct requests playwright_stealth: hides certain automation browser signatures page_retry: number of "soft" retries for page navigation (after the initial attempt) direct_retry: retries for direct requests on curl_cffi Timeout (after first attempt) """
[docs] self.timeout: float = timeout
"""Timeout for goto/direct requests."""
[docs] self.headless: bool = bool(headless)
"""Whether to run the browser in headless mode."""
[docs] self.browser_name: Engine = browser
"""Current browser (chromium/firefox/webkit/camoufox/patchright)."""
[docs] self.spoof: ImpersonationConfig = spoof or ImpersonationConfig()
"""Impersonation settings (user-agent, TLS, client-hello)."""
[docs] self.playwright_stealth: bool = bool(playwright_stealth)
"""Hide certain automation signatures? Implemented via JS injection. Some sites may detect this."""
[docs] self.page_retry: int = int(page_retry)
"""If a timeout occurs after N seconds — retry with page.reload()."""
[docs] self.direct_retry: int = int(direct_retry)
"""If a timeout occurs after N seconds — retry the direct request.""" if self.browser_name in ("camoufox", "patchright") and self.playwright_stealth: raise RuntimeError( "playwright_stealth=True is incompatible with browser='camoufox'/'patchright'. " "Disable stealth or use chromium/firefox/webkit." ) # Custom browser launch parameters + proxy
[docs] self.browser_launch_opts: Mapping[str, Any] = browser_launch_opts
"""Browser launch arguments (arbitrary keys)."""
[docs] self.proxy: str | dict[str, str] | None = proxy
""" Proxy server, one of: a. URL string in the form: `schema://user:pass@host:port` b. playwright-like dict """ # Cookie/localStorage state
[docs] self.cookies: CookieManager = CookieManager([])
"""Storage of all active cookies."""
[docs] self.local_storage: dict[str, dict[str, str]] = {}
"""localStorage from the last browser context (goto run).""" # Низкоуровневый HTTP self._curl: Optional[cffi_requests.AsyncSession] = None # Браузерный движок — через мастер (всегда отдаёт Browser) self._bm: BrowserMaster = BrowserMaster( engine=self.browser_name, stealth=self.playwright_stealth, launch_opts=self._make_browser_launch_opts(), # первичный снапшот ) # ──────────────── Launch args & proxy helpers ──────────────── def _make_browser_launch_opts(self) -> dict[str, Any]: """ Merges launch arguments for BrowserMaster from Session settings. Sources: - self.browser_launch_opts (arbitrary keys) - self.headless (overrides the key of the same name) - self.proxy (URL string or dict) → converted to Playwright-style proxy """ opts = dict(self.browser_launch_opts) opts["headless"] = bool(self.headless) pw_proxy = ParsedProxy.from_any(self.proxy) if pw_proxy is not None: opts["proxy"] = pw_proxy.for_playwright() return opts # ────── HTTP через curl_cffi ──────
[docs] async def request( self, method: HttpMethod | str, url: str, *, headers: Optional[Mapping[str, str]] = None, retry: int | None = None, **kwargs: Any, ) -> Response: """ Standard fast request via curl_cffi. You must provide either an HttpMethod or its string representation, as well as a URL. Optionally, you can pass additional headers. Extra parameters can be passed through **kwargs to curl_cffi.AsyncSession.request (see their documentation for details). Retries are performed ONLY on cffi Timeout: ``curl_cffi.requests.exceptions.Timeout``. """ method_enum = method if isinstance(method, HttpMethod) else HttpMethod[str(method).upper()] base_headers = {k.lower(): v for k, v in (headers or {}).items()} # lazy curl session if self._curl is None: self._curl = cffi_requests.AsyncSession() curl = self._curl assert curl is not None # для mypy: ниже уже не union # spoof UA / headers imper_profile = self.spoof.choose(self.browser_name) base_headers.update(self.spoof.forge_headers(imper_profile)) # Cookie header (фиксируем один раз на первую попытку) url_parts = urlsplit(url) cookie_header, sent_cookies = compose_cookie_header( url_parts, base_headers, list(self.cookies) ) if cookie_header: base_headers["cookie"] = cookie_header # proxies по умолчанию из Session.proxy, если пользователь не передал свои pp_user_proxies = ParsedProxy.from_any(kwargs.pop("proxy", None)) user_proxies = None if pp_user_proxies: user_proxies = pp_user_proxies.for_curl() pp_default_proxies = ParsedProxy.from_any(self.proxy) default_proxies = None if pp_default_proxies: default_proxies = pp_default_proxies.for_curl() attempts_left = self.direct_retry if retry is None else int(retry) last_err: Exception | None = None async def _do_request() -> tuple[Any, float]: req_headers = dict(base_headers) # копия на попытку t0 = perf_counter() r = await curl.request( method_enum.value, url, headers=req_headers, impersonate=cast( # сузить тип до Literal набора curl_cffi "cffi_requests.impersonate.BrowserTypeLiteral", imper_profile ), timeout=self.timeout, proxy=user_proxies if user_proxies is not None else default_proxies, **kwargs, ) duration = perf_counter() - t0 return r, duration # первая попытка + мягкие повторы на Timeout try: r, duration = await _do_request() except cffi_requests.exceptions.Timeout as e: last_err = e while attempts_left > 0: attempts_left -= 1 try: r, duration = await _do_request() last_err = None break except cffi_requests.exceptions.Timeout as e2: last_err = e2 if last_err is not None: raise last_err # response → cookies resp_headers = {k.lower(): v for k, v in r.headers.items()} raw_sc = collect_set_cookie_headers(r.headers) resp_cookies = parse_set_cookie(raw_sc, url_parts.hostname or "") self.cookies.add(resp_cookies) charset = guess_encoding(resp_headers) body_text = r.content.decode(charset, errors="replace") data = kwargs.get("data") json_body = kwargs.get("json") files = kwargs.get("files") # models req_model = Request( method=method_enum, url=URL(full_url=url), headers=dict(base_headers), body=data or json_body or files or None, cookies=sent_cookies, ) resp_model = Response( request=req_model, url=URL(full_url=str(r.url)), headers=resp_headers, cookies=resp_cookies, body=body_text, status_code=r.status_code, duration=duration, _render_callable=self._render_response, ) return resp_model
# ────── browser nav ────── @asynccontextmanager
[docs] async def goto_page( self, url: str, *, wait_until: Literal["commit", "load", "domcontentloaded", "networkidle"] = "commit", retry: int | None = None, ) -> AsyncGenerator[Page, None]: """ Opens a page in the browser using a one-time context. Retries perform a "soft reload" without recreating the context. """ # Обновляем launch-аргументы в мастере перед стартом self._bm.launch_opts = self._make_browser_launch_opts() await self._bm.start() storage_state = build_storage_state_for_context( local_storage=self.local_storage, cookie_manager=self.cookies, ) ctx = await self._bm.new_context(storage_state=storage_state) page = await ctx.new_page() timeout_ms = int(self.timeout * 1000) attempts_left = self.page_retry if retry is None else int(retry) try: await handle_nav_with_retries( page, target_url=url, wait_until=wait_until, timeout_ms=timeout_ms, attempts=attempts_left, on_retry=None, ) yield page finally: self.local_storage = await merge_storage_state_from_context( ctx, cookie_manager=self.cookies ) await page.close() await ctx.close()
# ────── Offline render ────── @asynccontextmanager async def _render_response( self, response: Response, *, wait_until: Literal["load", "domcontentloaded", "networkidle"] = "domcontentloaded", retry: int | None = None, ) -> AsyncGenerator[Page, None]: """ Offline render of a Response: creates a temporary context (with our storage_state), intercepts the first request and responds with the prepared body. Retries do not recreate the context/page — instead a "soft reload" is performed, reattaching the route on retry. """ # Обновляем launch-аргументы в мастере перед стартом self._bm.launch_opts = self._make_browser_launch_opts() await self._bm.start() storage_state = build_storage_state_for_context( local_storage=self.local_storage, cookie_manager=self.cookies, ) ctx: BrowserContext = await self._bm.new_context(storage_state=cast(Any, storage_state)) timeout_ms = int(self.timeout * 1000) attempts_left = self.page_retry if retry is None else int(retry) async def _attach_route_once() -> None: await ctx.unroute("**/*") async def handler(route: Route, _req: PWRequest) -> None: await route.fulfill( status=response.status_code, headers=dict(response.headers), body=response.body.encode("utf-8"), ) await ctx.route("**/*", handler, times=1) await _attach_route_once() page = await ctx.new_page() try: async def _on_retry() -> None: await _attach_route_once() await handle_nav_with_retries( page, target_url=response.url.full_url, wait_until=wait_until, timeout_ms=timeout_ms, attempts=attempts_left, on_retry=_on_retry, ) yield page finally: self.local_storage = await merge_storage_state_from_context( ctx, cookie_manager=self.cookies ) await page.close() await ctx.close() # ────── cleanup ──────
[docs] async def close(self) -> None: # Закрываем браузерные движки await self._bm.close() # Закрываем HTTP-сессию if self._curl: await self._curl.close() self._curl = None
# поддержка «async with»
[docs] async def __aenter__(self) -> "Session": return self
[docs] async def __aexit__( self, exc_type: Optional[type[BaseException]], exc: Optional[BaseException], tb: Optional[TracebackType], ) -> None: await self.close()