Files
CFspider/test_antibot.py
2026-01-04 10:27:41 +08:00

539 lines
19 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
测试 cfspider 反爬绕过能力
验证 TLS 指纹模拟、Workers 代理等功能的实际效果
"""
import sys
sys.path.insert(0, '.')
import cfspider
import json
CF_WORKERS = "https://ip.kami666.xyz"
def test_tls_fingerprint():
"""测试 TLS 指纹检测"""
print("\n" + "="*70)
print("测试 1: TLS 指纹检测 (browserleaks.com)")
print("="*70)
# 1. 普通 requestsPython 默认指纹)
print("\n[1.1] 普通请求(无指纹模拟):")
try:
response = cfspider.get("https://tls.browserleaks.com/json")
data = response.json()
print(f" JA3 Hash: {data.get('ja3_hash', 'N/A')[:20]}...")
print(f" User Agent: {data.get('user_agent', 'N/A')[:50]}...")
print(f" 状态: 可能被识别为 Python 爬虫")
except Exception as e:
print(f" 错误: {e}")
# 2. Chrome 131 指纹
print("\n[1.2] Chrome 131 指纹模拟:")
try:
response = cfspider.get(
"https://tls.browserleaks.com/json",
impersonate="chrome131"
)
data = response.json()
print(f" JA3 Hash: {data.get('ja3_hash', 'N/A')[:20]}...")
print(f" JA4: {data.get('ja4', 'N/A')[:30]}...")
print(f" Akamai Hash: {data.get('akamai_hash', 'N/A')[:20]}...")
print(f" 状态: ✓ 模拟真实 Chrome 浏览器")
except Exception as e:
print(f" 错误: {e}")
# 3. Safari 指纹
print("\n[1.3] Safari 18 指纹模拟:")
try:
response = cfspider.get(
"https://tls.browserleaks.com/json",
impersonate="safari18_0"
)
data = response.json()
print(f" JA3 Hash: {data.get('ja3_hash', 'N/A')[:20]}...")
print(f" JA4: {data.get('ja4', 'N/A')[:30]}...")
print(f" 状态: ✓ 模拟真实 Safari 浏览器")
except Exception as e:
print(f" 错误: {e}")
def test_cloudflare_detection():
"""测试 Cloudflare 反爬检测"""
print("\n" + "="*70)
print("测试 2: Cloudflare 反爬检测")
print("="*70)
# 测试 Cloudflare trace
print("\n[2.1] Cloudflare CDN Trace:")
try:
response = cfspider.get(
"https://www.cloudflare.com/cdn-cgi/trace",
impersonate="chrome131"
)
lines = response.text.strip().split('\n')
for line in lines:
if any(k in line for k in ['ip=', 'loc=', 'colo=', 'warp=']):
print(f" {line}")
print(f" 状态码: {response.status_code}")
print(f" 状态: ✓ 成功访问 Cloudflare")
except Exception as e:
print(f" 错误: {e}")
def test_nowsecure():
"""测试 NowSecure 反爬检测"""
print("\n" + "="*70)
print("测试 3: NowSecure 反爬检测 (nowsecure.nl)")
print("="*70)
print("\n[3.1] 使用 Chrome 131 指纹:")
try:
response = cfspider.get(
"https://nowsecure.nl/",
impersonate="chrome131",
headers={"Accept-Language": "en-US,en;q=0.9"}
)
print(f" 状态码: {response.status_code}")
if response.status_code == 200:
if "You are not a bot" in response.text or "passed" in response.text.lower():
print(f" 状态: ✓ 通过反爬检测!")
elif "challenge" in response.text.lower():
print(f" 状态: ⚠ 需要 JavaScript 挑战")
else:
print(f" 状态: 已获取响应 ({len(response.text)} 字节)")
else:
print(f" 状态: HTTP {response.status_code}")
except Exception as e:
print(f" 错误: {e}")
def test_httpbin_with_workers():
"""测试 Workers 代理 + TLS 指纹组合"""
print("\n" + "="*70)
print("测试 4: Workers 代理 + TLS 指纹组合")
print("="*70)
print("\n[4.1] Workers 代理 + Chrome 指纹:")
try:
response = cfspider.get(
"https://httpbin.org/ip",
cf_proxies=CF_WORKERS,
impersonate="chrome131"
)
data = response.json()
print(f" 出口 IP: {data.get('origin', 'N/A')}")
print(f" CF Colo: {response.cf_colo}")
print(f" 状态: ✓ 使用 Cloudflare IP + Chrome 指纹")
except Exception as e:
print(f" 错误: {e}")
print("\n[4.2] Workers 代理检测请求头:")
try:
response = cfspider.get(
"https://httpbin.org/headers",
cf_proxies=CF_WORKERS,
impersonate="chrome131",
headers={
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept-Encoding": "gzip, deflate, br"
}
)
data = response.json()
headers = data.get('headers', {})
print(f" User-Agent: {headers.get('User-Agent', 'N/A')[:60]}...")
print(f" Accept-Language: {headers.get('Accept-Language', 'N/A')}")
print(f" 状态: ✓ 请求头正确传递")
except Exception as e:
print(f" 错误: {e}")
def test_async_with_fingerprint():
"""测试异步请求 + TLS 指纹"""
print("\n" + "="*70)
print("测试 5: 异步请求功能")
print("="*70)
import asyncio
async def async_test():
print("\n[5.1] 异步 GET 请求:")
try:
response = await cfspider.aget(
"https://httpbin.org/ip",
cf_proxies=CF_WORKERS
)
data = response.json()
print(f" 出口 IP: {data.get('origin', 'N/A')}")
print(f" CF Colo: {response.cf_colo}")
print(f" 状态: ✓ 异步请求成功")
except Exception as e:
print(f" 错误: {e}")
print("\n[5.2] 并发异步请求:")
try:
import time
start = time.time()
tasks = [
cfspider.aget("https://httpbin.org/delay/1", cf_proxies=CF_WORKERS),
cfspider.aget("https://httpbin.org/delay/1", cf_proxies=CF_WORKERS),
cfspider.aget("https://httpbin.org/delay/1", cf_proxies=CF_WORKERS)
]
responses = await asyncio.gather(*tasks)
elapsed = time.time() - start
print(f" 3个并发请求完成")
print(f" 总耗时: {elapsed:.2f}s (串行约需 3s)")
print(f" 状态: ✓ 并发请求有效")
except Exception as e:
print(f" 错误: {e}")
asyncio.run(async_test())
def test_fingerprint_comparison():
"""对比不同指纹的差异"""
print("\n" + "="*70)
print("测试 6: 不同浏览器指纹对比")
print("="*70)
browsers = [
("chrome131", "Chrome 131"),
("safari18_0", "Safari 18"),
("firefox133", "Firefox 133"),
("edge101", "Edge 101")
]
print("\n 浏览器 | JA3 Hash (前16字符) | JA4 (前20字符)")
print(" " + "-"*70)
for browser_id, browser_name in browsers:
try:
response = cfspider.get(
"https://tls.browserleaks.com/json",
impersonate=browser_id
)
data = response.json()
ja3 = data.get('ja3_hash', 'N/A')[:16]
ja4 = data.get('ja4', 'N/A')[:20]
print(f" {browser_name:14} | {ja3:22} | {ja4}")
except Exception as e:
print(f" {browser_name:14} | 错误: {e}")
def test_real_websites():
"""测试访问真实网站"""
print("\n" + "="*70)
print("测试 7: 访问真实网站")
print("="*70)
websites = [
("https://www.google.com", "Google"),
("https://www.amazon.com", "Amazon"),
("https://www.github.com", "GitHub"),
("https://www.cloudflare.com", "Cloudflare"),
]
for url, name in websites:
print(f"\n[{name}]")
try:
response = cfspider.get(
url,
impersonate="chrome131",
headers={
"Accept-Language": "en-US,en;q=0.9",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
},
timeout=15
)
print(f" 状态码: {response.status_code}")
print(f" 响应大小: {len(response.text):,} 字节")
if response.status_code == 200:
print(f" 状态: ✓ 成功访问")
else:
print(f" 状态: ⚠ HTTP {response.status_code}")
except Exception as e:
print(f" 错误: {e}")
def test_async_http2():
"""测试异步请求 + HTTP/2"""
print("\n" + "="*70)
print("测试 8: 异步请求 + HTTP/2")
print("="*70)
import asyncio
async def async_http2_test():
print("\n[8.1] 异步 HTTP/2 GET 请求:")
try:
response = await cfspider.aget(
"https://httpbin.org/get",
cf_proxies=CF_WORKERS,
params={"async": "true", "http2": "enabled"}
)
data = response.json()
print(f" 状态码: {response.status_code}")
print(f" CF Colo: {response.cf_colo}")
print(f" HTTP 版本: {getattr(response, 'http_version', 'N/A')}")
print(f" URL 参数: {data.get('args', {})}")
print(f" 状态: OK 异步 HTTP/2 请求成功")
except Exception as e:
print(f" 错误: {e}")
print("\n[8.2] 异步 HTTP/2 POST 请求:")
try:
response = await cfspider.apost(
"https://httpbin.org/post",
cf_proxies=CF_WORKERS,
json={"async": True, "http2": True, "test": "cfspider"},
headers={"Content-Type": "application/json"}
)
data = response.json()
print(f" 状态码: {response.status_code}")
print(f" CF Colo: {response.cf_colo}")
print(f" POST JSON: {data.get('json', {})}")
print(f" 状态: OK 异步 POST 成功")
except Exception as e:
print(f" 错误: {e}")
print("\n[8.3] 异步 Session + HTTP/2:")
try:
async with cfspider.AsyncSession(cf_proxies=CF_WORKERS) as session:
r1 = await session.get("https://httpbin.org/ip")
r2 = await session.get("https://httpbin.org/headers")
r3 = await session.post("https://httpbin.org/post", json={"session": "test"})
print(f" 请求 1 状态码: {r1.status_code}")
print(f" 请求 2 状态码: {r2.status_code}")
print(f" 请求 3 状态码: {r3.status_code}")
print(f" CF Colo: {r1.cf_colo}")
print(f" 状态: OK 异步 Session 正常")
except Exception as e:
print(f" 错误: {e}")
print("\n[8.4] 异步并发 HTTP/2 请求:")
try:
import time
start = time.time()
# 5 个并发请求
tasks = [
cfspider.aget(f"https://httpbin.org/delay/1?id={i}", cf_proxies=CF_WORKERS)
for i in range(5)
]
responses = await asyncio.gather(*tasks)
elapsed = time.time() - start
print(f" 5 个并发请求完成")
print(f" 总耗时: {elapsed:.2f}s (串行约需 5s+)")
print(f" 所有状态码: {[r.status_code for r in responses]}")
print(f" 状态: OK 并发请求有效 (节省 {5 - elapsed:.1f}s)")
except Exception as e:
print(f" 错误: {e}")
print("\n[8.5] 异步流式下载 (astream):")
try:
total_bytes = 0
async with cfspider.astream("GET", "https://httpbin.org/bytes/10240", cf_proxies=CF_WORKERS) as response:
async for chunk in response.aiter_bytes(chunk_size=1024):
total_bytes += len(chunk)
print(f" 下载字节数: {total_bytes}")
print(f" 状态: OK 流式下载成功")
except Exception as e:
print(f" 错误: {e}")
asyncio.run(async_http2_test())
def test_all_parameters():
"""测试 .get() 方法的所有参数组合"""
print("\n" + "="*70)
print("测试 9: .get() 方法所有参数组合")
print("="*70)
print("\n[9.1] HTTP/2 + Workers + 所有参数:")
try:
response = cfspider.get(
# 基本参数
url="https://httpbin.org/get",
# CFspider 特有参数
cf_proxies=CF_WORKERS, # Workers 代理
cf_workers=True, # 使用 Workers API
http2=True, # 启用 HTTP/2
impersonate=None, # HTTP/2 模式不使用指纹
# requests 兼容参数
params={"key1": "value1", "key2": "value2", "chinese": "中文"},
headers={
"User-Agent": "CFspider-HTTP2-Test/1.0",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
"Accept": "application/json",
"X-Custom-Header": "http2-test"
},
cookies={"session": "http2_test", "user": "cfspider"},
timeout=30
)
data = response.json()
print(f" 状态码: {response.status_code}")
print(f" CF Colo: {response.cf_colo}")
print(f" CF Ray: {response.cf_ray}")
print(f" URL 参数: {data.get('args', {})}")
print(f" Headers 数量: {len(data.get('headers', {}))}")
print(f" Origin: {data.get('origin', 'N/A')}")
print(f" 状态: OK HTTP/2 + 所有参数正确")
except Exception as e:
print(f" 错误: {e}")
print("\n[9.2] HTTP/2 + Workers + POST + 所有参数:")
try:
response = cfspider.post(
url="https://httpbin.org/post",
cf_proxies=CF_WORKERS,
cf_workers=True,
http2=True,
params={"action": "http2_post"},
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"X-Request-ID": "http2-post-test"
},
cookies={"auth": "http2_token"},
json={
"name": "cfspider",
"version": "1.4.1",
"http2": True,
"features": ["proxy", "fingerprint", "async", "http2"]
},
timeout=30
)
data = response.json()
print(f" 状态码: {response.status_code}")
print(f" CF Colo: {response.cf_colo}")
print(f" POST JSON: {data.get('json', {})}")
print(f" URL 参数: {data.get('args', {})}")
print(f" 状态: OK HTTP/2 POST 所有参数正确")
except Exception as e:
print(f" 错误: {e}")
print("\n[9.3] TLS 指纹 + Workers + 所有参数:")
try:
response = cfspider.get(
url="https://httpbin.org/get",
cf_proxies=CF_WORKERS,
cf_workers=True,
http2=False, # 使用指纹时关闭 HTTP/2
impersonate="chrome131", # Chrome 指纹
params={"fingerprint": "chrome131", "test": "all_params"},
headers={
"User-Agent": "CFspider-Fingerprint/1.0",
"Accept-Language": "en-US,en;q=0.9",
"X-Fingerprint": "enabled"
},
cookies={"fp_session": "chrome131_test"},
timeout=30
)
data = response.json()
print(f" 状态码: {response.status_code}")
print(f" CF Colo: {response.cf_colo}")
print(f" URL 参数: {data.get('args', {})}")
print(f" 状态: OK TLS 指纹 + 所有参数正确")
except Exception as e:
print(f" 错误: {e}")
print("\n[9.4] 无代理 + HTTP/2 + 所有参数:")
try:
response = cfspider.get(
url="https://httpbin.org/get",
cf_proxies=None, # 无代理
cf_workers=False,
http2=True, # HTTP/2
params={"mode": "direct", "http2": "true"},
headers={
"Accept": "application/json",
"X-Direct": "no-proxy"
},
cookies={"direct": "test"},
timeout=15
)
data = response.json()
print(f" 状态码: {response.status_code}")
print(f" URL 参数: {data.get('args', {})}")
print(f" 状态: OK 无代理 HTTP/2 正常")
except Exception as e:
print(f" 错误: {e}")
print("\n[9.5] 普通代理模式 + 指纹 (cf_workers=False):")
try:
response = cfspider.get(
url="https://httpbin.org/get",
cf_proxies=None,
cf_workers=False,
impersonate="firefox133",
params={"test": "direct_fingerprint"},
headers={"X-Test": "firefox"},
timeout=15
)
data = response.json()
print(f" 状态码: {response.status_code}")
print(f" URL 参数: {data.get('args', {})}")
print(f" 状态: OK 直连 + 指纹正常")
except Exception as e:
print(f" 错误: {e}")
print("\n[9.6] 参数覆盖率统计:")
print(" +---------------------+----------+----------+")
print(" | 参数 | HTTP/2 | 指纹 |")
print(" +---------------------+----------+----------+")
print(" | url | OK | OK |")
print(" | cf_proxies | OK | OK |")
print(" | cf_workers=True | OK | OK |")
print(" | cf_workers=False | OK | OK |")
print(" | http2=True | OK | - |")
print(" | impersonate | - | OK |")
print(" | params | OK | OK |")
print(" | headers | OK | OK |")
print(" | cookies | OK | OK |")
print(" | timeout | OK | OK |")
print(" | json (POST) | OK | OK |")
print(" +---------------------+----------+----------+")
print(" 注: http2 和 impersonate 使用不同后端,不能同时启用")
def main():
print("="*70)
print("CFspider 反爬绕过能力测试")
print("="*70)
print(f"版本: {cfspider.__version__}")
print(f"Workers: {CF_WORKERS}")
test_tls_fingerprint()
test_cloudflare_detection()
test_nowsecure()
test_httpbin_with_workers()
test_async_with_fingerprint()
test_fingerprint_comparison()
test_real_websites()
test_async_http2()
test_all_parameters()
print("\n" + "="*70)
print("测试完成!")
print("="*70)
if __name__ == "__main__":
main()