Coverage for human_requests / human_context.py: 50%
44 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-25 10:02 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-25 10:02 +0000
1from __future__ import annotations
3import json
4from pathlib import Path
5from typing import Any, Literal
7from playwright.async_api import BrowserContext
8from playwright.async_api import Request as PWRequest
9from playwright.async_api import Route
11from .fingerprint import Fingerprint
12from .human_page import HumanPage
14# ---- tiny helper to avoid repeating "get-or-create" for page wrappers ----
17class HumanContext(BrowserContext):
18 """
19 A type-compatible wrapper over Playwright's BrowserContext.
20 """
22 @staticmethod
23 def replace(playwright_context: BrowserContext) -> HumanContext:
24 playwright_context.__class__ = HumanContext
25 return playwright_context # type: ignore[return-value]
27 async def fingerprint(
28 self,
29 *,
30 wait_until: Literal["commit", "load", "domcontentloaded", "networkidle"] = "load",
31 origin: str = "https://example.com",
32 ) -> Fingerprint:
33 """
34 Collect a normalized snapshot of the current browser **fingerprint** as seen by
35 web pages and network endpoints, and return it as a `Fingerprint` object.
36 The snapshot aggregates:
37 - **UA string**: `user_agent` (mirrors `headers["user-agent"]`)
38 - **User-Agent Client Hints (UA-CH)**:
39 - `user_agent_client_hints.low_entropy` — values available
40 without JS `getHighEntropyValues`
41 - `user_agent_client_hints.high_entropy` — values from
42 `navigator.userAgentData.getHighEntropyValues(...)`
43 - **Request headers** used for navigation/fetch (e.g. `sec-ch-ua`, `sec-ch-ua-platform`,
44 `accept`, `upgrade-insecure-requests`, etc.) in `headers`
45 - **Runtime details** inferred from JS/Navigator:
46 - `platform`, `vendor`, `languages`, `timezone`
47 - **Parsed/browser meta** derived from UA + UA-CH:
48 - `browser_name`, `browser_version`, `os_name`, `os_version`,
49 `device_type`, `engine`
50 - **Helpers**:
51 - `uach`: structured/parsed UA-CH view (including `brands`, `uaFullVersion`,
52 `platformVersion`, etc.)
53 - `ua`: parsed UA string view (browser/engine/device breakdown)
54 Notes
55 -----
56 - Values are gathered from the **current browser context** using standard
57 Navigator/APIs and the context’s default request headers. No state is mutated.
58 - Consistency is enforced where possible:
59 - `headers["user-agent"] == user_agent`
60 - `headers["sec-ch-ua*"]` reflect `user_agent_client_hints`
61 - Headless/headful indicators (e.g., `HeadlessChrome/...`) are reported *as is*.
62 If you need spoofing/stealth, configure it **before** calling this method.
63 Returns
64 -------
65 Fingerprint
66 A dataclass encapsulating the fields listed above.
67 Examples
68 --------
69 >>> fp = await browser.fingerprint()
70 >>> fp.user_agent
71 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)
72 HeadlessChrome/140.0.7339.16 Safari/537.36'
73 >>> fp.headers["sec-ch-ua"]
74 '"Chromium";v="140", "Not=A?Brand";v="24", "HeadlessChrome";v="140"'
75 >>> fp.uach.platform, fp.uach.platform_version
76 ('Linux', '6.8.0')
77 >>> fp.browser_name, fp.browser_version
78 ('Chromium', '140.0.7339.16')
79 """
80 HTML_PATH = Path(__file__).parent / "fingerprint" / "fingerprint_gen.html"
81 _HTML_FINGERPRINT = HTML_PATH.read_text(encoding="utf-8")
82 headers = {}
84 async def handler(route: Route, _req: PWRequest) -> None:
85 headers.update(_req.headers)
86 await route.fulfill(
87 status=200, content_type="text/html; charset=utf-8", body=_HTML_FINGERPRINT
88 )
90 ctx: HumanContext = self
91 page = await ctx.new_page()
92 await page.route(f"{origin}/**", handler)
93 await page.goto(origin, wait_until=wait_until, timeout=1000)
94 try:
95 storage = await page.local_storage()
96 raw = storage.get("fingerprint", "")
97 data = json.loads(raw)
98 except Exception as e:
99 raise RuntimeError("fingerprint отсутствует или битый JSON") from e
100 finally:
101 await page.close()
102 return Fingerprint(
103 user_agent=data.get("user_agent"),
104 user_agent_client_hints=data.get("user_agent_client_hints"),
105 headers=headers,
106 platform=data.get("platform"),
107 vendor=data.get("vendor"),
108 languages=data.get("languages"),
109 timezone=data.get("timezone"),
110 # новые поля
111 screen=data.get("screen"),
112 window=data.get("window"),
113 hardware_concurrency=data.get("hardware_concurrency"),
114 device_memory=data.get("device_memory"),
115 cookies_enabled=data.get("cookies_enabled"),
116 local_storage=data.get("local_storage"),
117 session_storage=data.get("session_storage"),
118 do_not_track=data.get("do_not_track"),
119 touch_support=data.get("touch_support"),
120 orientation=data.get("orientation"),
121 battery=data.get("battery"),
122 canvas_fingerprint=data.get("canvas_fingerprint"),
123 webgl_fingerprint=data.get("webgl_fingerprint"),
124 audio_fingerprint=data.get("audio_fingerprint"),
125 fonts=data.get("fonts"),
126 )
128 @property
129 def pages(self) -> list["HumanPage"]: # type: ignore[override]
130 return [HumanPage.replace(p) for p in super().pages]
132 async def new_page(self) -> "HumanPage":
133 p = await super().new_page()
134 return HumanPage.replace(p)
136 # ---------- new funcs ----------
138 async def local_storage(self, **kwargs: Any) -> dict[str, dict[str, str]]:
139 ls = await self.storage_state(**kwargs)
140 return {
141 o["origin"]: {e["name"]: e["value"] for e in o.get("localStorage", [])}
142 for o in ls.get("origins", [])
143 }
145 def __repr__(self) -> str:
146 return f"<HumanContext wrapping {super().__repr__()!r}>"