""" 测试 cfspider 反爬绕过能力 验证 TLS 指纹模拟、Workers 代理等功能的实际效果 """ import sys sys.path.insert(0, '.') import cfspider import json CF_WORKERS = "https://ip.kami666.xyz" def test_tls_fingerprint(): """测试 TLS 指纹检测""" print("\n" + "="*70) print("测试 1: TLS 指纹检测 (browserleaks.com)") print("="*70) # 1. 普通 requests(Python 默认指纹) print("\n[1.1] 普通请求(无指纹模拟):") try: response = cfspider.get("https://tls.browserleaks.com/json") data = response.json() print(f" JA3 Hash: {data.get('ja3_hash', 'N/A')[:20]}...") print(f" User Agent: {data.get('user_agent', 'N/A')[:50]}...") print(f" 状态: 可能被识别为 Python 爬虫") except Exception as e: print(f" 错误: {e}") # 2. Chrome 131 指纹 print("\n[1.2] Chrome 131 指纹模拟:") try: response = cfspider.get( "https://tls.browserleaks.com/json", impersonate="chrome131" ) data = response.json() print(f" JA3 Hash: {data.get('ja3_hash', 'N/A')[:20]}...") print(f" JA4: {data.get('ja4', 'N/A')[:30]}...") print(f" Akamai Hash: {data.get('akamai_hash', 'N/A')[:20]}...") print(f" 状态: ✓ 模拟真实 Chrome 浏览器") except Exception as e: print(f" 错误: {e}") # 3. Safari 指纹 print("\n[1.3] Safari 18 指纹模拟:") try: response = cfspider.get( "https://tls.browserleaks.com/json", impersonate="safari18_0" ) data = response.json() print(f" JA3 Hash: {data.get('ja3_hash', 'N/A')[:20]}...") print(f" JA4: {data.get('ja4', 'N/A')[:30]}...") print(f" 状态: ✓ 模拟真实 Safari 浏览器") except Exception as e: print(f" 错误: {e}") def test_cloudflare_detection(): """测试 Cloudflare 反爬检测""" print("\n" + "="*70) print("测试 2: Cloudflare 反爬检测") print("="*70) # 测试 Cloudflare trace print("\n[2.1] Cloudflare CDN Trace:") try: response = cfspider.get( "https://www.cloudflare.com/cdn-cgi/trace", impersonate="chrome131" ) lines = response.text.strip().split('\n') for line in lines: if any(k in line for k in ['ip=', 'loc=', 'colo=', 'warp=']): print(f" {line}") print(f" 状态码: {response.status_code}") print(f" 状态: ✓ 成功访问 Cloudflare") except Exception as e: print(f" 错误: {e}") def test_nowsecure(): """测试 NowSecure 反爬检测""" print("\n" + "="*70) print("测试 3: NowSecure 反爬检测 (nowsecure.nl)") print("="*70) print("\n[3.1] 使用 Chrome 131 指纹:") try: response = cfspider.get( "https://nowsecure.nl/", impersonate="chrome131", headers={"Accept-Language": "en-US,en;q=0.9"} ) print(f" 状态码: {response.status_code}") if response.status_code == 200: if "You are not a bot" in response.text or "passed" in response.text.lower(): print(f" 状态: ✓ 通过反爬检测!") elif "challenge" in response.text.lower(): print(f" 状态: ⚠ 需要 JavaScript 挑战") else: print(f" 状态: 已获取响应 ({len(response.text)} 字节)") else: print(f" 状态: HTTP {response.status_code}") except Exception as e: print(f" 错误: {e}") def test_httpbin_with_workers(): """测试 Workers 代理 + TLS 指纹组合""" print("\n" + "="*70) print("测试 4: Workers 代理 + TLS 指纹组合") print("="*70) print("\n[4.1] Workers 代理 + Chrome 指纹:") try: response = cfspider.get( "https://httpbin.org/ip", cf_proxies=CF_WORKERS, impersonate="chrome131" ) data = response.json() print(f" 出口 IP: {data.get('origin', 'N/A')}") print(f" CF Colo: {response.cf_colo}") print(f" 状态: ✓ 使用 Cloudflare IP + Chrome 指纹") except Exception as e: print(f" 错误: {e}") print("\n[4.2] Workers 代理检测请求头:") try: response = cfspider.get( "https://httpbin.org/headers", cf_proxies=CF_WORKERS, impersonate="chrome131", headers={ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Accept-Encoding": "gzip, deflate, br" } ) data = response.json() headers = data.get('headers', {}) print(f" User-Agent: {headers.get('User-Agent', 'N/A')[:60]}...") print(f" Accept-Language: {headers.get('Accept-Language', 'N/A')}") print(f" 状态: ✓ 请求头正确传递") except Exception as e: print(f" 错误: {e}") def test_async_with_fingerprint(): """测试异步请求 + TLS 指纹""" print("\n" + "="*70) print("测试 5: 异步请求功能") print("="*70) import asyncio async def async_test(): print("\n[5.1] 异步 GET 请求:") try: response = await cfspider.aget( "https://httpbin.org/ip", cf_proxies=CF_WORKERS ) data = response.json() print(f" 出口 IP: {data.get('origin', 'N/A')}") print(f" CF Colo: {response.cf_colo}") print(f" 状态: ✓ 异步请求成功") except Exception as e: print(f" 错误: {e}") print("\n[5.2] 并发异步请求:") try: import time start = time.time() tasks = [ cfspider.aget("https://httpbin.org/delay/1", cf_proxies=CF_WORKERS), cfspider.aget("https://httpbin.org/delay/1", cf_proxies=CF_WORKERS), cfspider.aget("https://httpbin.org/delay/1", cf_proxies=CF_WORKERS) ] responses = await asyncio.gather(*tasks) elapsed = time.time() - start print(f" 3个并发请求完成") print(f" 总耗时: {elapsed:.2f}s (串行约需 3s)") print(f" 状态: ✓ 并发请求有效") except Exception as e: print(f" 错误: {e}") asyncio.run(async_test()) def test_fingerprint_comparison(): """对比不同指纹的差异""" print("\n" + "="*70) print("测试 6: 不同浏览器指纹对比") print("="*70) browsers = [ ("chrome131", "Chrome 131"), ("safari18_0", "Safari 18"), ("firefox133", "Firefox 133"), ("edge101", "Edge 101") ] print("\n 浏览器 | JA3 Hash (前16字符) | JA4 (前20字符)") print(" " + "-"*70) for browser_id, browser_name in browsers: try: response = cfspider.get( "https://tls.browserleaks.com/json", impersonate=browser_id ) data = response.json() ja3 = data.get('ja3_hash', 'N/A')[:16] ja4 = data.get('ja4', 'N/A')[:20] print(f" {browser_name:14} | {ja3:22} | {ja4}") except Exception as e: print(f" {browser_name:14} | 错误: {e}") def test_real_websites(): """测试访问真实网站""" print("\n" + "="*70) print("测试 7: 访问真实网站") print("="*70) websites = [ ("https://www.google.com", "Google"), ("https://www.amazon.com", "Amazon"), ("https://www.github.com", "GitHub"), ("https://www.cloudflare.com", "Cloudflare"), ] for url, name in websites: print(f"\n[{name}]") try: response = cfspider.get( url, impersonate="chrome131", headers={ "Accept-Language": "en-US,en;q=0.9", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" }, timeout=15 ) print(f" 状态码: {response.status_code}") print(f" 响应大小: {len(response.text):,} 字节") if response.status_code == 200: print(f" 状态: ✓ 成功访问") else: print(f" 状态: ⚠ HTTP {response.status_code}") except Exception as e: print(f" 错误: {e}") def test_async_http2(): """测试异步请求 + HTTP/2""" print("\n" + "="*70) print("测试 8: 异步请求 + HTTP/2") print("="*70) import asyncio async def async_http2_test(): print("\n[8.1] 异步 HTTP/2 GET 请求:") try: response = await cfspider.aget( "https://httpbin.org/get", cf_proxies=CF_WORKERS, params={"async": "true", "http2": "enabled"} ) data = response.json() print(f" 状态码: {response.status_code}") print(f" CF Colo: {response.cf_colo}") print(f" HTTP 版本: {getattr(response, 'http_version', 'N/A')}") print(f" URL 参数: {data.get('args', {})}") print(f" 状态: OK 异步 HTTP/2 请求成功") except Exception as e: print(f" 错误: {e}") print("\n[8.2] 异步 HTTP/2 POST 请求:") try: response = await cfspider.apost( "https://httpbin.org/post", cf_proxies=CF_WORKERS, json={"async": True, "http2": True, "test": "cfspider"}, headers={"Content-Type": "application/json"} ) data = response.json() print(f" 状态码: {response.status_code}") print(f" CF Colo: {response.cf_colo}") print(f" POST JSON: {data.get('json', {})}") print(f" 状态: OK 异步 POST 成功") except Exception as e: print(f" 错误: {e}") print("\n[8.3] 异步 Session + HTTP/2:") try: async with cfspider.AsyncSession(cf_proxies=CF_WORKERS) as session: r1 = await session.get("https://httpbin.org/ip") r2 = await session.get("https://httpbin.org/headers") r3 = await session.post("https://httpbin.org/post", json={"session": "test"}) print(f" 请求 1 状态码: {r1.status_code}") print(f" 请求 2 状态码: {r2.status_code}") print(f" 请求 3 状态码: {r3.status_code}") print(f" CF Colo: {r1.cf_colo}") print(f" 状态: OK 异步 Session 正常") except Exception as e: print(f" 错误: {e}") print("\n[8.4] 异步并发 HTTP/2 请求:") try: import time start = time.time() # 5 个并发请求 tasks = [ cfspider.aget(f"https://httpbin.org/delay/1?id={i}", cf_proxies=CF_WORKERS) for i in range(5) ] responses = await asyncio.gather(*tasks) elapsed = time.time() - start print(f" 5 个并发请求完成") print(f" 总耗时: {elapsed:.2f}s (串行约需 5s+)") print(f" 所有状态码: {[r.status_code for r in responses]}") print(f" 状态: OK 并发请求有效 (节省 {5 - elapsed:.1f}s)") except Exception as e: print(f" 错误: {e}") print("\n[8.5] 异步流式下载 (astream):") try: total_bytes = 0 async with cfspider.astream("GET", "https://httpbin.org/bytes/10240", cf_proxies=CF_WORKERS) as response: async for chunk in response.aiter_bytes(chunk_size=1024): total_bytes += len(chunk) print(f" 下载字节数: {total_bytes}") print(f" 状态: OK 流式下载成功") except Exception as e: print(f" 错误: {e}") asyncio.run(async_http2_test()) def test_all_parameters(): """测试 .get() 方法的所有参数组合""" print("\n" + "="*70) print("测试 9: .get() 方法所有参数组合") print("="*70) print("\n[9.1] HTTP/2 + Workers + 所有参数:") try: response = cfspider.get( # 基本参数 url="https://httpbin.org/get", # CFspider 特有参数 cf_proxies=CF_WORKERS, # Workers 代理 cf_workers=True, # 使用 Workers API http2=True, # 启用 HTTP/2 impersonate=None, # HTTP/2 模式不使用指纹 # requests 兼容参数 params={"key1": "value1", "key2": "value2", "chinese": "中文"}, headers={ "User-Agent": "CFspider-HTTP2-Test/1.0", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", "Accept": "application/json", "X-Custom-Header": "http2-test" }, cookies={"session": "http2_test", "user": "cfspider"}, timeout=30 ) data = response.json() print(f" 状态码: {response.status_code}") print(f" CF Colo: {response.cf_colo}") print(f" CF Ray: {response.cf_ray}") print(f" URL 参数: {data.get('args', {})}") print(f" Headers 数量: {len(data.get('headers', {}))}") print(f" Origin: {data.get('origin', 'N/A')}") print(f" 状态: OK HTTP/2 + 所有参数正确") except Exception as e: print(f" 错误: {e}") print("\n[9.2] HTTP/2 + Workers + POST + 所有参数:") try: response = cfspider.post( url="https://httpbin.org/post", cf_proxies=CF_WORKERS, cf_workers=True, http2=True, params={"action": "http2_post"}, headers={ "Content-Type": "application/json", "Accept": "application/json", "X-Request-ID": "http2-post-test" }, cookies={"auth": "http2_token"}, json={ "name": "cfspider", "version": "1.4.1", "http2": True, "features": ["proxy", "fingerprint", "async", "http2"] }, timeout=30 ) data = response.json() print(f" 状态码: {response.status_code}") print(f" CF Colo: {response.cf_colo}") print(f" POST JSON: {data.get('json', {})}") print(f" URL 参数: {data.get('args', {})}") print(f" 状态: OK HTTP/2 POST 所有参数正确") except Exception as e: print(f" 错误: {e}") print("\n[9.3] TLS 指纹 + Workers + 所有参数:") try: response = cfspider.get( url="https://httpbin.org/get", cf_proxies=CF_WORKERS, cf_workers=True, http2=False, # 使用指纹时关闭 HTTP/2 impersonate="chrome131", # Chrome 指纹 params={"fingerprint": "chrome131", "test": "all_params"}, headers={ "User-Agent": "CFspider-Fingerprint/1.0", "Accept-Language": "en-US,en;q=0.9", "X-Fingerprint": "enabled" }, cookies={"fp_session": "chrome131_test"}, timeout=30 ) data = response.json() print(f" 状态码: {response.status_code}") print(f" CF Colo: {response.cf_colo}") print(f" URL 参数: {data.get('args', {})}") print(f" 状态: OK TLS 指纹 + 所有参数正确") except Exception as e: print(f" 错误: {e}") print("\n[9.4] 无代理 + HTTP/2 + 所有参数:") try: response = cfspider.get( url="https://httpbin.org/get", cf_proxies=None, # 无代理 cf_workers=False, http2=True, # HTTP/2 params={"mode": "direct", "http2": "true"}, headers={ "Accept": "application/json", "X-Direct": "no-proxy" }, cookies={"direct": "test"}, timeout=15 ) data = response.json() print(f" 状态码: {response.status_code}") print(f" URL 参数: {data.get('args', {})}") print(f" 状态: OK 无代理 HTTP/2 正常") except Exception as e: print(f" 错误: {e}") print("\n[9.5] 普通代理模式 + 指纹 (cf_workers=False):") try: response = cfspider.get( url="https://httpbin.org/get", cf_proxies=None, cf_workers=False, impersonate="firefox133", params={"test": "direct_fingerprint"}, headers={"X-Test": "firefox"}, timeout=15 ) data = response.json() print(f" 状态码: {response.status_code}") print(f" URL 参数: {data.get('args', {})}") print(f" 状态: OK 直连 + 指纹正常") except Exception as e: print(f" 错误: {e}") print("\n[9.6] 参数覆盖率统计:") print(" +---------------------+----------+----------+") print(" | 参数 | HTTP/2 | 指纹 |") print(" +---------------------+----------+----------+") print(" | url | OK | OK |") print(" | cf_proxies | OK | OK |") print(" | cf_workers=True | OK | OK |") print(" | cf_workers=False | OK | OK |") print(" | http2=True | OK | - |") print(" | impersonate | - | OK |") print(" | params | OK | OK |") print(" | headers | OK | OK |") print(" | cookies | OK | OK |") print(" | timeout | OK | OK |") print(" | json (POST) | OK | OK |") print(" +---------------------+----------+----------+") print(" 注: http2 和 impersonate 使用不同后端,不能同时启用") def main(): print("="*70) print("CFspider 反爬绕过能力测试") print("="*70) print(f"版本: {cfspider.__version__}") print(f"Workers: {CF_WORKERS}") test_tls_fingerprint() test_cloudflare_detection() test_nowsecure() test_httpbin_with_workers() test_async_with_fingerprint() test_fingerprint_comparison() test_real_websites() test_async_http2() test_all_parameters() print("\n" + "="*70) print("测试完成!") print("="*70) if __name__ == "__main__": main()