Coverage for human_requests / human_context.py: 50%

44 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-25 10:02 +0000

1from __future__ import annotations 

2 

3import json 

4from pathlib import Path 

5from typing import Any, Literal 

6 

7from playwright.async_api import BrowserContext 

8from playwright.async_api import Request as PWRequest 

9from playwright.async_api import Route 

10 

11from .fingerprint import Fingerprint 

12from .human_page import HumanPage 

13 

14# ---- tiny helper to avoid repeating "get-or-create" for page wrappers ---- 

15 

16 

17class HumanContext(BrowserContext): 

18 """ 

19 A type-compatible wrapper over Playwright's BrowserContext. 

20 """ 

21 

22 @staticmethod 

23 def replace(playwright_context: BrowserContext) -> HumanContext: 

24 playwright_context.__class__ = HumanContext 

25 return playwright_context # type: ignore[return-value] 

26 

27 async def fingerprint( 

28 self, 

29 *, 

30 wait_until: Literal["commit", "load", "domcontentloaded", "networkidle"] = "load", 

31 origin: str = "https://example.com", 

32 ) -> Fingerprint: 

33 """ 

34 Collect a normalized snapshot of the current browser **fingerprint** as seen by 

35 web pages and network endpoints, and return it as a `Fingerprint` object. 

36 The snapshot aggregates: 

37 - **UA string**: `user_agent` (mirrors `headers["user-agent"]`) 

38 - **User-Agent Client Hints (UA-CH)**: 

39 - `user_agent_client_hints.low_entropy` — values available 

40 without JS `getHighEntropyValues` 

41 - `user_agent_client_hints.high_entropy` — values from 

42 `navigator.userAgentData.getHighEntropyValues(...)` 

43 - **Request headers** used for navigation/fetch (e.g. `sec-ch-ua`, `sec-ch-ua-platform`, 

44 `accept`, `upgrade-insecure-requests`, etc.) in `headers` 

45 - **Runtime details** inferred from JS/Navigator: 

46 - `platform`, `vendor`, `languages`, `timezone` 

47 - **Parsed/browser meta** derived from UA + UA-CH: 

48 - `browser_name`, `browser_version`, `os_name`, `os_version`, 

49 `device_type`, `engine` 

50 - **Helpers**: 

51 - `uach`: structured/parsed UA-CH view (including `brands`, `uaFullVersion`, 

52 `platformVersion`, etc.) 

53 - `ua`: parsed UA string view (browser/engine/device breakdown) 

54 Notes 

55 ----- 

56 - Values are gathered from the **current browser context** using standard 

57 Navigator/APIs and the context’s default request headers. No state is mutated. 

58 - Consistency is enforced where possible: 

59 - `headers["user-agent"] == user_agent` 

60 - `headers["sec-ch-ua*"]` reflect `user_agent_client_hints` 

61 - Headless/headful indicators (e.g., `HeadlessChrome/...`) are reported *as is*. 

62 If you need spoofing/stealth, configure it **before** calling this method. 

63 Returns 

64 ------- 

65 Fingerprint 

66 A dataclass encapsulating the fields listed above. 

67 Examples 

68 -------- 

69 >>> fp = await browser.fingerprint() 

70 >>> fp.user_agent 

71 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) 

72 HeadlessChrome/140.0.7339.16 Safari/537.36' 

73 >>> fp.headers["sec-ch-ua"] 

74 '"Chromium";v="140", "Not=A?Brand";v="24", "HeadlessChrome";v="140"' 

75 >>> fp.uach.platform, fp.uach.platform_version 

76 ('Linux', '6.8.0') 

77 >>> fp.browser_name, fp.browser_version 

78 ('Chromium', '140.0.7339.16') 

79 """ 

80 HTML_PATH = Path(__file__).parent / "fingerprint" / "fingerprint_gen.html" 

81 _HTML_FINGERPRINT = HTML_PATH.read_text(encoding="utf-8") 

82 headers = {} 

83 

84 async def handler(route: Route, _req: PWRequest) -> None: 

85 headers.update(_req.headers) 

86 await route.fulfill( 

87 status=200, content_type="text/html; charset=utf-8", body=_HTML_FINGERPRINT 

88 ) 

89 

90 ctx: HumanContext = self 

91 page = await ctx.new_page() 

92 await page.route(f"{origin}/**", handler) 

93 await page.goto(origin, wait_until=wait_until, timeout=1000) 

94 try: 

95 storage = await page.local_storage() 

96 raw = storage.get("fingerprint", "") 

97 data = json.loads(raw) 

98 except Exception as e: 

99 raise RuntimeError("fingerprint отсутствует или битый JSON") from e 

100 finally: 

101 await page.close() 

102 return Fingerprint( 

103 user_agent=data.get("user_agent"), 

104 user_agent_client_hints=data.get("user_agent_client_hints"), 

105 headers=headers, 

106 platform=data.get("platform"), 

107 vendor=data.get("vendor"), 

108 languages=data.get("languages"), 

109 timezone=data.get("timezone"), 

110 # новые поля 

111 screen=data.get("screen"), 

112 window=data.get("window"), 

113 hardware_concurrency=data.get("hardware_concurrency"), 

114 device_memory=data.get("device_memory"), 

115 cookies_enabled=data.get("cookies_enabled"), 

116 local_storage=data.get("local_storage"), 

117 session_storage=data.get("session_storage"), 

118 do_not_track=data.get("do_not_track"), 

119 touch_support=data.get("touch_support"), 

120 orientation=data.get("orientation"), 

121 battery=data.get("battery"), 

122 canvas_fingerprint=data.get("canvas_fingerprint"), 

123 webgl_fingerprint=data.get("webgl_fingerprint"), 

124 audio_fingerprint=data.get("audio_fingerprint"), 

125 fonts=data.get("fonts"), 

126 ) 

127 

128 @property 

129 def pages(self) -> list["HumanPage"]: # type: ignore[override] 

130 return [HumanPage.replace(p) for p in super().pages] 

131 

132 async def new_page(self) -> "HumanPage": 

133 p = await super().new_page() 

134 return HumanPage.replace(p) 

135 

136 # ---------- new funcs ---------- 

137 

138 async def local_storage(self, **kwargs: Any) -> dict[str, dict[str, str]]: 

139 ls = await self.storage_state(**kwargs) 

140 return { 

141 o["origin"]: {e["name"]: e["value"] for e in o.get("localStorage", [])} 

142 for o in ls.get("origins", []) 

143 } 

144 

145 def __repr__(self) -> str: 

146 return f"<HumanContext wrapping {super().__repr__()!r}>"