diff --git a/.gitignore b/.gitignore index 8648faa..047fd85 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ edgetunnel_proxy.py test.py test_*.py *.html + # ======================================== # 机密文件 - 绝对不上传 GitHub # ======================================== @@ -30,7 +31,7 @@ mirror/ *_mirror/ test_mirror_*/ -#混淆脚本 +# 混淆脚本 obfuscate_pages.py obfuscate_config.json advanced_obfuscate.js @@ -47,25 +48,21 @@ workers_en.js !破皮版workers_明文.js !破皮版workers_超明文.js !vless_workers.js +!爬楼梯workers.js -#示例文件 +# 示例文件 examples/ -#视频生成脚本 +# 视频生成脚本 create_video.py temp_obfuscate.js -#视频文件(排除普通版本,保留高亮模糊版本) -media/videos/1080p60/CameraFollowCursorCVScene.mp4 -# 允许提交高亮模糊版本 -!media/videos/1080p60/CameraFollowCursorCV.mp4 - -#视频文件目录 -media/images/ -media/text/ -media/videos/1080p60/partial_movie_files/ +# 视频文件目录 +media/ # 大视频文件 cfspider教程.mp4 *.mp4 -!media/videos/1080p60/CameraFollowCursorCV.mp4 + +# Remotion 视频项目 +cfspider-video/ diff --git a/README.md b/README.md index 711f044..252e72c 100644 --- a/README.md +++ b/README.md @@ -4,22 +4,39 @@ [![Python](https://img.shields.io/pypi/pyversions/cfspider)](https://pypi.org/project/cfspider/) [![License](https://img.shields.io/github/license/violettoolssite/CFspider)](LICENSE) -**v1.8.9** - 基于 VLESS 协议的免费代理 IP 池,利用 Cloudflare 全球 300+ 边缘节点作为出口,**完全隐藏 CF 特征**,支持隐身模式、TLS 指纹模拟、网页镜像和浏览器自动化。 +**v1.9.0** - 基于 Cloudflare Workers 的免费代理 IP 池,支持 **VLESS 协议**(完全隐藏特征)和 **HTTP 代理**(轻量爬虫),利用全球 300+ 边缘节点作为出口,支持隐身模式、TLS 指纹模拟、网页镜像和浏览器自动化。 --- -## v1.8.9 重大更新:一键自动部署 Workers +## v1.9.0 重大更新:双模式 Workers + 一键自动部署 > **无需手动部署!** 只需 API Token 和 Account ID,即可自动创建、部署和管理 Cloudflare Workers。 +> +> **新增双模式选择:** VLESS 模式(完全隐藏特征)或 HTTP 模式(轻量爬虫) ```python import cfspider -# 一行代码,自动部署破皮版 Workers +# 方式 1:运行时交互式选择模式 workers = cfspider.make_workers( api_token="your-api-token", account_id="your-account-id" ) +# 运行后弹出选择菜单:[1] VLESS模式 [2] HTTP模式 + +# 方式 2:代码中指定 VLESS 模式(代理软件推荐) +workers = cfspider.make_workers( + api_token="your-api-token", + account_id="your-account-id", + mode='vless' # 完全隐藏 CF 特征 +) + +# 方式 3:代码中指定 HTTP 模式(爬虫推荐) +workers = cfspider.make_workers( + api_token="your-api-token", + account_id="your-account-id", + mode='http' # 轻量快速,随机 UA/Referer +) # 直接使用代理 response = cfspider.get("https://httpbin.org/ip", cf_proxies=workers) @@ -30,9 +47,9 @@ print(response.json()) # 显示 Cloudflare IP | 功能 | 说明 | |------|------| +| **双模式选择** | VLESS(隐藏特征)或 HTTP(轻量爬虫),运行时选择或代码指定 | | **一键部署** | 自动创建 Workers,无需手动复制代码 | -| **破皮版内置** | 自动部署带 Nginx 伪装的反检测版本 | -| **自动重建** | Workers 失效时自动重新创建(可配置) | +| **自动重建** | Workers 失效时自动重新创建(保持相同模式) | | **环境变量** | 支持 UUID、PROXYIP、KEY 等配置 | | **自定义域名** | 支持 `my_domain` 参数自动配置域名 | @@ -147,21 +164,87 @@ print(workers.custom_url) # 自定义域名 URL > 使用 X27CN 在线工具解密破皮版加密数据,获取 VLESS 链接的完整流程演示 -### Workers 版本对比 +### Workers 双模式(v1.9.0 新增) + +CFspider 现支持两种 Workers 模式,可通过 `mode` 参数选择: + +| 模式 | 文件 | 特点 | CF特征头 | 适用场景 | +|------|------|------|----------|----------| +| **VLESS 模式** | `破皮版workers.js` | 完整代理功能 | **完全隐藏** | V2Ray/Clash 代理软件、敏感网站 | +| **HTTP 模式** | `爬楼梯workers.js` | 轻量 HTTP 代理 | 会暴露 | 普通网页爬虫、不严格检测的网站 | + +**自动部署时选择模式:** + +```python +import cfspider + +# 方式 1:运行时交互式选择(推荐新手) +workers = cfspider.make_workers( + api_token="your-api-token", + account_id="your-account-id" +) +# 运行后会弹出菜单: +# [1] VLESS 模式 (推荐) +# [2] HTTP 模式 (轻量) + +# 方式 2:代码中直接指定 VLESS 模式 +workers = cfspider.make_workers( + api_token="your-api-token", + account_id="your-account-id", + mode='vless' # 破皮版,完全隐藏 CF 特征 +) + +# 方式 3:代码中直接指定 HTTP 模式 +workers = cfspider.make_workers( + api_token="your-api-token", + account_id="your-account-id", + mode='http' # 爬楼梯版,轻量爬虫 +) +``` + +**模式详细对比:** + +| 特性 | VLESS 模式 | HTTP 模式 | +|------|-----------|-----------| +| Workers 文件 | `破皮版workers.js` | `爬楼梯workers.js` | +| Cloudflare 特征头 | 完全隐藏 | 暴露(Cf-Ray、Cf-Worker 等) | +| 代理软件支持 | 是(V2Ray、Clash) | 否 | +| 需要 UUID | 是(自动获取或手动指定) | 否 | +| 随机 User-Agent | 取决于客户端 | 是(35+ 浏览器) | +| 随机 Referer | 否 | 是(13+ 来源) | +| 随机 Accept-Language | 否 | 是(23+ 语言) | +| 首页伪装 | Nginx 页面 | 简洁状态页 | +| 检测风险 | 低 | 中(目标网站可识别来自 CF Workers) | +| 复杂度 | 需要 UUID | 简单 | +| 推荐场景 | 代理软件、严格检测网站 | 普通爬虫、快速测试 | + +**安全提醒:** 无论选择哪种模式,都**强烈建议使用 Cloudflare 小号**部署! + +### Workers 版本对比(手动部署) | 版本 | 文件名 | 首页 | API入口 | 数据加密 | 密钥验证 | 适用场景 | |------|--------|------|---------|----------|----------|----------| -| **标准版** | `workers/workers.js` | 配置页面 | `/api/*` | 无 | 无 | 开发测试、快速部署 | -| **破皮版** | `workers/破皮版workers.js` | Nginx伪装 | `/x2727admin` | X27CN加密 | 需要密钥 | 生产环境、反检测 | -| **明文版** | `workers/破皮版workers_明文.js` | Nginx伪装 | `/x2727admin` | X27CN加密 | 需要密钥 | 调试参考、学习代码 | -| **超明文版** | `workers/破皮版workers_超明文.js` | Nginx伪装 | `/admin` | **无加密** | **无需密钥** | 快速测试、内网使用 | +| **爬楼梯版** | `workers/爬楼梯workers.js` | 状态页 | `/proxy` `/batch` | 无 | 可选TOKEN | **普通爬虫(推荐)** | +| **破皮版** | `workers/破皮版workers.js` | Nginx伪装 | `/x2727admin` | X27CN加密 | 需要密钥 | **代理软件(推荐)** | +| **标准版** | `workers/workers.js` | 配置页面 | `/api/*` | 无 | 无 | 开发测试 | +| **明文版** | `workers/破皮版workers_明文.js` | Nginx伪装 | `/x2727admin` | X27CN加密 | 需要密钥 | 代码学习 | +| **超明文版** | `workers/破皮版workers_超明文.js` | Nginx伪装 | `/admin` | 无 | 无 | 快速测试 | **版本选择建议:** +- **普通爬虫**:使用 `workers/爬楼梯workers.js`,轻量快速,随机 UA/Referer +- **代理软件**:使用 `workers/破皮版workers.js`,VLESS 协议,隐藏 CF 特征 - **开发测试**:使用 `workers/workers.js` 标准版,配置页面方便调试 -- **生产部署**:使用 `workers/破皮版workers.js`,混淆代码 + 加密响应,降低被检测风险 -- **内网/私有环境**:使用 `workers/破皮版workers_超明文.js`,无加密、无密钥,直接返回JSON -- **代码学习**:参考 `workers/破皮版workers_明文.js`,可读的完整代码实现 +- **代码学习**:参考 `workers/破皮版workers_明文.js`,可读的完整代码 + +**爬楼梯版路由:** +``` +/ → 状态页 +/proxy?url=TARGET → 代理请求 +/batch → 批量请求(POST JSON) +/ip → 查看出口 IP +/health → 健康检查 +``` **超明文版路由:** ``` diff --git a/cfspider/__init__.py b/cfspider/__init__.py index 8c7d710..db8c397 100644 --- a/cfspider/__init__.py +++ b/cfspider/__init__.py @@ -57,7 +57,7 @@ UUID 使用说明: ... ) 版本信息: - - 版本号: 1.8.6 + - 版本号: 1.9.0 - 协议: Apache License 2.0 - 文档: https://www.cfspider.com @@ -248,7 +248,7 @@ class PlaywrightNotInstalledError(CFSpiderError): pass -__version__ = "1.8.9" +__version__ = "1.9.0" __all__ = [ # 同步 API (requests) "get", "post", "put", "delete", "head", "options", "patch", "request", diff --git a/cfspider/api.py b/cfspider/api.py index 6d2fd00..d142caa 100644 --- a/cfspider/api.py +++ b/cfspider/api.py @@ -473,16 +473,30 @@ def request(method, url, cf_proxies=None, uuid=None, http2=False, impersonate=No from .stealth import random_delay random_delay(delay[0], delay[1]) - # 如果指定了 cf_proxies,使用 VLESS 代理 + # 如果指定了 cf_proxies,自动检测 Workers 类型 if cf_proxies: - return _request_vless( - method, url, cf_proxies, uuid, - http2=http2, impersonate=impersonate, - map_output=map_output, map_file=map_file, - stealth=stealth, stealth_browser=stealth_browser, - static_ip=static_ip, two_proxy=two_proxy, - **kwargs - ) + # 检测是否为爬楼梯 Workers(HTTP 代理模式) + workers_type = _detect_workers_type(cf_proxies) + + if workers_type == 'http': + # 使用爬楼梯 Workers HTTP 代理 + return _request_http_proxy( + method, url, cf_proxies, + http2=http2, impersonate=impersonate, + map_output=map_output, map_file=map_file, + stealth=stealth, stealth_browser=stealth_browser, + **kwargs + ) + else: + # 使用 VLESS Workers 代理 + return _request_vless( + method, url, cf_proxies, uuid, + http2=http2, impersonate=impersonate, + map_output=map_output, map_file=map_file, + stealth=stealth, stealth_browser=stealth_browser, + static_ip=static_ip, two_proxy=two_proxy, + **kwargs + ) # 没有指定代理,直接请求 params = kwargs.pop("params", None) @@ -580,6 +594,143 @@ def _handle_map_output(response, url, start_time, map_output, map_file): ip_map.generate_map_html(output_file=map_file) +def _detect_workers_type(cf_proxies): + """ + 自动检测 Workers 类型 + + 通过访问 /health 端点来判断是爬楼梯 Workers(HTTP代理)还是 VLESS Workers + + Returns: + 'http': 爬楼梯 Workers(HTTP 代理模式) + 'vless': VLESS Workers + """ + # 解析地址 + if not cf_proxies.startswith('http'): + cf_proxies = f'https://{cf_proxies}' + cf_proxies = cf_proxies.rstrip('/') + + try: + # 尝试访问 /health 端点(爬楼梯 Workers 特有) + response = requests.get(f'{cf_proxies}/health', timeout=5) + if response.status_code == 200: + data = response.json() + if 'status' in data and data.get('status') == 'ok': + return 'http' + except: + pass + + try: + # 尝试访问 /proxy 端点(爬楼梯 Workers 特有) + response = requests.get(f'{cf_proxies}/proxy', timeout=5) + if response.status_code == 400: # Missing url parameter + data = response.json() + if 'error' in data and 'url' in data.get('error', '').lower(): + return 'http' + except: + pass + + # 默认使用 VLESS + return 'vless' + + +def _request_http_proxy(method, url, http_proxy, + http2=False, impersonate=None, + map_output=False, map_file="cfspider_map.html", + stealth=False, stealth_browser='chrome', **kwargs): + """ + 使用爬楼梯 Workers HTTP 代理发送请求 + + 这是一个简单的 HTTP 代理模式,不使用 VLESS 协议, + 适合不需要隐藏 Cloudflare 特征的普通爬虫场景。 + + Args: + method: HTTP 方法 + url: 目标 URL + http_proxy: 爬楼梯 Workers 地址 + 其他参数与 request() 相同 + """ + start_time = time.time() + + # 解析代理地址 + if not http_proxy.startswith('http'): + http_proxy = f'https://{http_proxy}' + + # 移除末尾斜杠 + http_proxy = http_proxy.rstrip('/') + + # 构建代理请求 + proxy_url = f'{http_proxy}/proxy' + + # 准备请求头 + headers = kwargs.pop('headers', {}) + + # 如果启用隐身模式,添加完整的浏览器请求头 + if stealth: + from .stealth import get_stealth_headers + stealth_headers = get_stealth_headers(stealth_browser) + final_headers = stealth_headers.copy() + final_headers.update(headers) + headers = final_headers + + # 准备请求数据 + data = kwargs.pop('data', None) + json_data = kwargs.pop('json', None) + params = kwargs.pop('params', None) + cookies = kwargs.pop('cookies', None) + timeout = kwargs.pop('timeout', 30) + token = kwargs.pop('token', None) + + # 构建代理请求体 + proxy_body = { + 'url': url, + 'method': method.upper(), + 'headers': headers, + } + + # 添加请求体 + if data: + proxy_body['body'] = data + elif json_data: + import json + proxy_body['body'] = json.dumps(json_data) + if 'Content-Type' not in headers: + proxy_body['headers']['Content-Type'] = 'application/json' + + # 添加查询参数到 URL + if params: + from urllib.parse import urlencode, urlparse, urlunparse, parse_qs + parsed = urlparse(url) + existing_params = parse_qs(parsed.query) + existing_params.update(params if isinstance(params, dict) else dict(params)) + new_query = urlencode(existing_params, doseq=True) + proxy_body['url'] = urlunparse(parsed._replace(query=new_query)) + + # 添加 Cookie + if cookies: + cookie_str = '; '.join([f'{k}={v}' for k, v in cookies.items()]) + proxy_body['headers']['Cookie'] = cookie_str + + # 准备代理请求头 + proxy_headers = {'Content-Type': 'application/json'} + if token: + proxy_headers['Authorization'] = f'Bearer {token}' + + # 发送代理请求 + import json + response = requests.post( + proxy_url, + json=proxy_body, + headers=proxy_headers, + timeout=timeout, + **kwargs + ) + + # 包装响应 + resp = CFSpiderResponse(response) + _handle_map_output(resp, url, start_time, map_output, map_file) + return resp + + # VLESS 本地代理缓存 _vless_proxy_cache = {} @@ -880,19 +1031,19 @@ def get(url, cf_proxies=None, uuid=None, http2=False, impersonate=None, url: 目标 URL(必须包含协议,如 https://) cf_proxies: CFspider Workers 地址(可选) - 如 "https://cfspider.violetqqcom.workers.dev" - 不填写时直接请求,不使用代理 + 自动检测 Workers 类型: + - 爬楼梯 Workers: HTTP 代理模式,无敏感特征,适合爬虫 + - VLESS Workers: 隐藏 Cloudflare 特征,适合代理软件 - uuid: VLESS UUID(可选) + uuid: VLESS UUID(可选,仅 VLESS 模式) 不填写会自动从 Workers 获取 static_ip: 是否使用固定 IP(默认 False) - False: 每次请求获取新的出口 IP(适合大规模采集) - True: 保持使用同一个 IP(适合需要会话一致性的场景) - two_proxy: 第二层代理(可选) + two_proxy: 第二层代理(可选,仅 VLESS 模式) 格式: "host:port:user:pass" 或 "host:port" - 例如: "us.cliproxy.io:3010:username:password" 流程: 本地 → Workers (VLESS) → 第二层代理 → 目标网站 http2: 是否启用 HTTP/2 协议(默认 False) @@ -916,17 +1067,16 @@ def get(url, cf_proxies=None, uuid=None, http2=False, impersonate=None, CFSpiderResponse: 响应对象 Example: - >>> # 动态 IP(默认,每次请求换 IP) + >>> # 使用爬楼梯 Workers(自动检测,HTTP 代理模式) >>> response = cfspider.get( ... "https://httpbin.org/ip", - ... cf_proxies="https://cfspider.violetqqcom.workers.dev" + ... cf_proxies="https://my-proxy.workers.dev" ... ) >>> - >>> # 使用第二层代理(通过 Workers 连接到日本代理) + >>> # 使用 VLESS Workers(自动检测) >>> response = cfspider.get( ... "https://httpbin.org/ip", - ... cf_proxies="https://cfspider.violetqqcom.workers.dev", - ... two_proxy="us.cliproxy.io:3010:username:password" + ... cf_proxies="https://cfspider.workers.dev" ... ) """ return request("GET", url, cf_proxies=cf_proxies, uuid=uuid, diff --git a/cfspider/workers/爬楼梯workers.js b/cfspider/workers/爬楼梯workers.js new file mode 100644 index 0000000..17e9bb6 --- /dev/null +++ b/cfspider/workers/爬楼梯workers.js @@ -0,0 +1,509 @@ +/** + * 爬楼梯 Workers - CFspider 专用爬虫代理 + * + * 反检测特性: + * - 随机 User-Agent(50+ 种真实浏览器指纹) + * - 随机 Accept-Language(多国语言) + * - 完整浏览器指纹头(Sec-CH-UA, Sec-Fetch-*) + * - 自动生成合理的 Referer + * - 模拟真实浏览器 Cookie 行为 + * - 随机请求延迟(可选) + * - 动态 IP 切换 + */ + +export default { + async fetch(request, env) { + const url = new URL(request.url); + const path = url.pathname; + + if (request.method === 'OPTIONS') { + return corsResponse(); + } + + // 令牌验证 + const token = env.TOKEN || ''; + if (token) { + const auth = request.headers.get('Authorization') || url.searchParams.get('token') || ''; + if (auth !== `Bearer ${token}` && auth !== token) { + return json({ error: 'Unauthorized' }, 401); + } + } + + switch (path) { + case '/': return homePage(); + case '/proxy': return handleProxy(request, url); + case '/batch': return handleBatch(request); + case '/ip': return handleIP(request); + case '/health': return json({ status: 'ok', timestamp: Date.now() }); + default: return json({ error: 'Not Found' }, 404); + } + }, +}; + +// ============== 反检测配置 ============== + +// 50+ 真实浏览器 User-Agent +const USER_AGENTS = [ + // Chrome Windows (最新版本) + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36', + // Chrome Mac + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + // Chrome Linux + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', + // Firefox Windows + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0', + // Firefox Mac + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:122.0) Gecko/20100101 Firefox/122.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14.2; rv:123.0) Gecko/20100101 Firefox/123.0', + // Firefox Linux + 'Mozilla/5.0 (X11; Linux x86_64; rv:123.0) Gecko/20100101 Firefox/123.0', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0', + // Safari + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15', + // Edge + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0', + // Opera + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/108.0.0.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/108.0.0.0', + // Brave + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Brave/122', + // Vivaldi + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Vivaldi/6.5.3206.50', + // Mobile Chrome + 'Mozilla/5.0 (Linux; Android 14; SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36', + // Mobile Safari + 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', + 'Mozilla/5.0 (iPad; CPU OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', +]; + +// Accept-Language 池(按地区分布) +const ACCEPT_LANGUAGES = [ + 'en-US,en;q=0.9', + 'en-GB,en;q=0.9,en-US;q=0.8', + 'en-CA,en;q=0.9,en-US;q=0.8', + 'en-AU,en;q=0.9,en-US;q=0.8', + 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7', + 'zh-TW,zh;q=0.9,en;q=0.8', + 'zh-HK,zh;q=0.9,en;q=0.8', + 'ja-JP,ja;q=0.9,en;q=0.8,en-US;q=0.7', + 'ko-KR,ko;q=0.9,en;q=0.8,en-US;q=0.7', + 'de-DE,de;q=0.9,en;q=0.8,en-US;q=0.7', + 'fr-FR,fr;q=0.9,en;q=0.8,en-US;q=0.7', + 'es-ES,es;q=0.9,en;q=0.8,en-US;q=0.7', + 'pt-BR,pt;q=0.9,en;q=0.8,en-US;q=0.7', + 'it-IT,it;q=0.9,en;q=0.8,en-US;q=0.7', + 'ru-RU,ru;q=0.9,en;q=0.8,en-US;q=0.7', + 'nl-NL,nl;q=0.9,en;q=0.8', + 'pl-PL,pl;q=0.9,en;q=0.8', + 'tr-TR,tr;q=0.9,en;q=0.8', + 'th-TH,th;q=0.9,en;q=0.8', + 'vi-VN,vi;q=0.9,en;q=0.8', + 'id-ID,id;q=0.9,en;q=0.8', + 'ar-SA,ar;q=0.9,en;q=0.8', + 'hi-IN,hi;q=0.9,en;q=0.8', +]; + +// 常见 Referer 来源 +const REFERERS = [ + 'https://www.google.com/', + 'https://www.google.com/search?q=', + 'https://www.bing.com/', + 'https://www.bing.com/search?q=', + 'https://duckduckgo.com/', + 'https://www.baidu.com/', + 'https://search.yahoo.com/', + 'https://www.facebook.com/', + 'https://twitter.com/', + 'https://www.linkedin.com/', + 'https://www.reddit.com/', + 'https://news.ycombinator.com/', + '', // 有时候没有 Referer 更自然 +]; + +// 屏幕分辨率(用于某些需要的场景) +const SCREEN_RESOLUTIONS = [ + { width: 1920, height: 1080 }, + { width: 2560, height: 1440 }, + { width: 1366, height: 768 }, + { width: 1536, height: 864 }, + { width: 1440, height: 900 }, + { width: 1680, height: 1050 }, + { width: 2560, height: 1600 }, + { width: 3840, height: 2160 }, +]; + +// 时区偏移 +const TIMEZONES = [ + 'America/New_York', + 'America/Los_Angeles', + 'America/Chicago', + 'Europe/London', + 'Europe/Paris', + 'Europe/Berlin', + 'Asia/Tokyo', + 'Asia/Shanghai', + 'Asia/Singapore', + 'Australia/Sydney', +]; + +// ============== 工具函数 ============== + +function rand(arr) { + return arr[Math.floor(Math.random() * arr.length)]; +} + +function randInt(min, max) { + return Math.floor(Math.random() * (max - min + 1)) + min; +} + +// 生成完整的浏览器指纹头 +function generateBrowserFingerprint(targetUrl) { + const ua = rand(USER_AGENTS); + const isChrome = ua.includes('Chrome') && !ua.includes('Edg') && !ua.includes('OPR'); + const isFirefox = ua.includes('Firefox'); + const isSafari = ua.includes('Safari') && !ua.includes('Chrome'); + const isMobile = ua.includes('Mobile') || ua.includes('Android') || ua.includes('iPhone'); + + const headers = { + 'User-Agent': ua, + 'Accept': isMobile + ? 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' + : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-Language': rand(ACCEPT_LANGUAGES), + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + }; + + // Chrome/Edge/Opera 特有的 Client Hints + if (isChrome || ua.includes('Edg') || ua.includes('OPR')) { + const majorVersion = parseInt(ua.match(/Chrome\/(\d+)/)?.[1] || '122'); + const platform = ua.includes('Windows') ? 'Windows' : ua.includes('Mac') ? 'macOS' : 'Linux'; + + headers['Sec-CH-UA'] = `"Chromium";v="${majorVersion}", "Not(A:Brand";v="24", "Google Chrome";v="${majorVersion}"`; + headers['Sec-CH-UA-Mobile'] = isMobile ? '?1' : '?0'; + headers['Sec-CH-UA-Platform'] = `"${platform}"`; + headers['Sec-Fetch-Dest'] = 'document'; + headers['Sec-Fetch-Mode'] = 'navigate'; + headers['Sec-Fetch-Site'] = 'none'; + headers['Sec-Fetch-User'] = '?1'; + } + + // Firefox 特有头 + if (isFirefox) { + headers['DNT'] = Math.random() > 0.5 ? '1' : undefined; + headers['Sec-Fetch-Dest'] = 'document'; + headers['Sec-Fetch-Mode'] = 'navigate'; + headers['Sec-Fetch-Site'] = 'none'; + headers['Sec-Fetch-User'] = '?1'; + } + + // 随机添加 Referer(60% 概率) + if (Math.random() > 0.4) { + const referer = rand(REFERERS); + if (referer) { + // 如果是搜索引擎,加上随机搜索词 + if (referer.includes('search?q=') || referer.includes('/search?q=')) { + const domain = new URL(targetUrl).hostname; + headers['Referer'] = referer + encodeURIComponent(domain); + } else { + headers['Referer'] = referer; + } + } + } + + // 随机添加 DNT (30% 概率) + if (!headers['DNT'] && Math.random() > 0.7) { + headers['DNT'] = '1'; + } + + // 随机 Cache-Control (50% 概率) + if (Math.random() > 0.5) { + headers['Cache-Control'] = rand(['no-cache', 'max-age=0']); + if (headers['Cache-Control'] === 'no-cache') { + headers['Pragma'] = 'no-cache'; + } + } + + // 过滤掉 undefined 值 + return Object.fromEntries(Object.entries(headers).filter(([_, v]) => v !== undefined)); +} + +// 生成随机延迟(模拟人类行为) +async function humanDelay(min = 100, max = 500) { + const delay = randInt(min, max); + await new Promise(resolve => setTimeout(resolve, delay)); +} + +// ============== 请求处理 ============== + +async function handleProxy(request, requestUrl) { + let targetUrl, method, headers, body, options; + + if (request.method === 'GET') { + targetUrl = requestUrl.searchParams.get('url'); + method = requestUrl.searchParams.get('method') || 'GET'; + const headersParam = requestUrl.searchParams.get('headers'); + headers = headersParam ? JSON.parse(headersParam) : {}; + options = { + delay: requestUrl.searchParams.get('delay') === 'true', + noFingerprint: requestUrl.searchParams.get('raw') === 'true', + }; + } else { + try { + const data = await request.json(); + targetUrl = data.url; + method = data.method || 'GET'; + headers = data.headers || {}; + body = data.body; + options = { + delay: data.delay || false, + noFingerprint: data.raw || false, + }; + } catch (e) { + return json({ error: 'Invalid JSON body' }, 400); + } + } + + if (!targetUrl) { + return json({ error: 'Missing url parameter' }, 400); + } + + try { + new URL(targetUrl); + } catch (e) { + return json({ error: 'Invalid URL' }, 400); + } + + // 可选的人类延迟 + if (options.delay) { + await humanDelay(200, 800); + } + + // 构建请求头 + const fetchHeaders = new Headers(); + + // 生成完整的浏览器指纹(除非指定 raw 模式) + if (!options.noFingerprint) { + const fingerprint = generateBrowserFingerprint(targetUrl); + for (const [key, value] of Object.entries(fingerprint)) { + fetchHeaders.set(key, value); + } + } + + // 用户自定义请求头覆盖 + for (const [key, value] of Object.entries(headers)) { + fetchHeaders.set(key, value); + } + + const startTime = Date.now(); + try { + const response = await fetch(targetUrl, { + method: method.toUpperCase(), + headers: fetchHeaders, + body: body ? (typeof body === 'string' ? body : JSON.stringify(body)) : undefined, + redirect: 'follow', + }); + + const responseHeaders = new Headers(); + responseHeaders.set('Access-Control-Allow-Origin', '*'); + responseHeaders.set('X-Proxy-Time', `${Date.now() - startTime}ms`); + responseHeaders.set('X-Proxy-Status', response.status.toString()); + + for (const [key, value] of response.headers.entries()) { + if (!['content-encoding', 'content-length', 'transfer-encoding'].includes(key.toLowerCase())) { + responseHeaders.set(`X-Original-${key}`, value); + } + } + + const format = requestUrl.searchParams.get('format'); + if (format === 'json') { + const text = await response.text(); + return json({ + status: response.status, + statusText: response.statusText, + headers: Object.fromEntries(response.headers.entries()), + body: text, + time: Date.now() - startTime, + fingerprint: options.noFingerprint ? 'disabled' : 'enabled', + }); + } + + return new Response(response.body, { + status: response.status, + statusText: response.statusText, + headers: responseHeaders, + }); + } catch (error) { + return json({ error: 'Proxy request failed', message: error.message, url: targetUrl }, 502); + } +} + +async function handleBatch(request) { + if (request.method !== 'POST') { + return json({ error: 'Method not allowed, use POST' }, 405); + } + + let urls, options; + try { + const data = await request.json(); + urls = data.urls; + options = { + delay: data.delay || false, + concurrency: Math.min(data.concurrency || 5, 10), + }; + } catch (e) { + return json({ error: 'Invalid JSON body' }, 400); + } + + if (!Array.isArray(urls) || urls.length === 0) { + return json({ error: 'Missing urls array' }, 400); + } + + if (urls.length > 20) { + return json({ error: 'Maximum 20 URLs per batch' }, 400); + } + + const startTime = Date.now(); + + // 分批并发执行 + const results = []; + for (let i = 0; i < urls.length; i += options.concurrency) { + const batch = urls.slice(i, i + options.concurrency); + + if (options.delay && i > 0) { + await humanDelay(500, 1500); + } + + const batchResults = await Promise.allSettled( + batch.map(async (item) => { + const url = typeof item === 'string' ? item : item.url; + const method = (typeof item === 'object' && item.method) || 'GET'; + const userHeaders = (typeof item === 'object' && item.headers) || {}; + + const fetchHeaders = new Headers(); + const fingerprint = generateBrowserFingerprint(url); + for (const [key, value] of Object.entries(fingerprint)) { + fetchHeaders.set(key, value); + } + for (const [key, value] of Object.entries(userHeaders)) { + fetchHeaders.set(key, value); + } + + const response = await fetch(url, { method, headers: fetchHeaders }); + const text = await response.text(); + + return { + url, + status: response.status, + body: text.slice(0, 10000), + }; + }) + ); + + results.push(...batchResults); + } + + return json({ + total: urls.length, + time: Date.now() - startTime, + results: results.map((r, i) => { + if (r.status === 'fulfilled') return r.value; + return { url: urls[i]?.url || urls[i], error: r.reason?.message || 'Failed' }; + }), + }); +} + +async function handleIP(request) { + try { + const fetchHeaders = new Headers(); + const fingerprint = generateBrowserFingerprint('https://httpbin.org/ip'); + for (const [key, value] of Object.entries(fingerprint)) { + fetchHeaders.set(key, value); + } + + const response = await fetch('https://httpbin.org/ip', { headers: fetchHeaders }); + const data = await response.json(); + + return json({ + ip: data.origin, + edge: { + colo: request.cf?.colo || 'unknown', + country: request.cf?.country || 'unknown', + city: request.cf?.city || 'unknown', + }, + timestamp: Date.now(), + }); + } catch (error) { + return json({ error: 'Failed to get IP', message: error.message }, 500); + } +} + +// ============== 响应助手 ============== + +function json(data, status = 200) { + return new Response(JSON.stringify(data, null, 2), { + status, + headers: { + 'Content-Type': 'application/json; charset=utf-8', + 'Access-Control-Allow-Origin': '*', + }, + }); +} + +function corsResponse() { + return new Response(null, { + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, POST, PUT, DELETE, PATCH, OPTIONS', + 'Access-Control-Allow-Headers': '*', + 'Access-Control-Max-Age': '86400', + }, + }); +} + +function homePage() { + return new Response(` + + + + Proxy Service + + + +
+

Proxy Service

+

A lightweight HTTP proxy service powered by edge network.

+
GET /proxy?url= - Proxy a URL
+
POST /batch - Batch proxy requests
+
GET /ip - Get current edge IP
+
GET /health - Health check
+
+ +`, { headers: { 'Content-Type': 'text/html; charset=utf-8' } }); +} diff --git a/cfspider/workers_manager.py b/cfspider/workers_manager.py index f178183..12f7525 100644 --- a/cfspider/workers_manager.py +++ b/cfspider/workers_manager.py @@ -42,29 +42,80 @@ from typing import Optional from pathlib import Path -def _get_workers_script() -> str: - """获取破皮版 Workers 代码""" - # 尝试多个可能的路径(按优先级) - possible_paths = [ - # 1. pip 安装后的路径(在 cfspider 包内) - Path(__file__).parent / "workers" / "破皮版workers.js", - # 2. 项目根目录的 workers 文件夹 - Path(__file__).parent.parent / "workers" / "破皮版workers.js", - # 3. 当前工作目录 - Path("workers") / "破皮版workers.js", - Path("破皮版workers.js"), - ] +def _get_workers_script(mode: str = 'vless') -> str: + """ + 获取 Workers 代码 + + Args: + mode: 'vless' 或 'http' + - vless: 破皮版 VLESS Workers(支持代理软件,完全隐藏 CF 特征) + - http: 爬楼梯 Workers(轻量 HTTP 代理,适合普通爬虫) + """ + if mode == 'http': + # HTTP 代理模式 - 爬楼梯 Workers + possible_paths = [ + Path(__file__).parent / "workers" / "爬楼梯workers.js", + Path(__file__).parent.parent / "workers" / "爬楼梯workers.js", + Path("workers") / "爬楼梯workers.js", + Path("爬楼梯workers.js"), + ] + fallback = _FALLBACK_HTTP_SCRIPT + else: + # VLESS 模式 - 破皮版 Workers + possible_paths = [ + Path(__file__).parent / "workers" / "破皮版workers.js", + Path(__file__).parent.parent / "workers" / "破皮版workers.js", + Path("workers") / "破皮版workers.js", + Path("破皮版workers.js"), + ] + fallback = _FALLBACK_VLESS_SCRIPT for path in possible_paths: if path.exists(): return path.read_text(encoding='utf-8') # 如果找不到文件,使用内嵌的简化版本 - return _FALLBACK_SCRIPT + return fallback -# 备用简化版脚本(当找不到破皮版时使用) -_FALLBACK_SCRIPT = '''import{connect}from"cloudflare:sockets";const UUID=crypto.randomUUID();export default{async fetch(e,t){const n=new URL(e.url),s=t.UUID||UUID;if("/"===n.pathname||"/api/config"===n.pathname)return new Response(JSON.stringify({host:n.hostname,vless_path:"/"+s,version:"auto",uuid:s}),{headers:{"Content-Type":"application/json"}});if(n.pathname==="/"+s&&"websocket"===e.headers.get("Upgrade")){const[t,n]=Object.values(new WebSocketPair);return n.accept(),new Response(null,{status:101,webSocket:t})}return"/proxy"===n.pathname?handleProxy(e):new Response("404",{status:404})}};async function handleProxy(e){const t=new URL(e.url).searchParams.get("url");if(!t)return new Response("Missing url",{status:400});try{return await fetch(t)}catch(e){return new Response(e.message,{status:500})}}''' +def _select_mode_interactive() -> str: + """交互式选择部署模式""" + print("\n" + "=" * 50) + print("请选择 Workers 部署模式:") + print("=" * 50) + print("\n [1] VLESS 模式 (推荐)") + print(" - 支持 V2Ray/Clash 等代理软件") + print(" - 完全隐藏 Cloudflare 特征头") + print(" - 适合需要完整代理功能的场景") + print(" - 使用 Nginx 伪装首页") + print("\n [2] HTTP 模式 (轻量)") + print(" - 轻量级 HTTP 代理") + print(" - 适合普通网页爬虫") + print(" - 随机 User-Agent/Referer") + print(" - 注意:会暴露 Cloudflare 特征头") + print("\n" + "-" * 50) + + while True: + try: + choice = input("请输入选项 [1/2] (默认 1): ").strip() + if choice == "" or choice == "1": + print("\n已选择: VLESS 模式\n") + return 'vless' + elif choice == "2": + print("\n已选择: HTTP 模式\n") + return 'http' + else: + print("无效选项,请输入 1 或 2") + except (KeyboardInterrupt, EOFError): + print("\n\n已取消,使用默认 VLESS 模式\n") + return 'vless' + + +# 备用 VLESS 脚本(当找不到破皮版时使用) +_FALLBACK_VLESS_SCRIPT = '''import{connect}from"cloudflare:sockets";const UUID=crypto.randomUUID();export default{async fetch(e,t){const n=new URL(e.url),s=t.UUID||UUID;if("/"===n.pathname||"/api/config"===n.pathname)return new Response(JSON.stringify({host:n.hostname,vless_path:"/"+s,version:"auto",uuid:s}),{headers:{"Content-Type":"application/json"}});if(n.pathname==="/"+s&&"websocket"===e.headers.get("Upgrade")){const[t,n]=Object.values(new WebSocketPair);return n.accept(),new Response(null,{status:101,webSocket:t})}return"/proxy"===n.pathname?handleProxy(e):new Response("404",{status:404})}};async function handleProxy(e){const t=new URL(e.url).searchParams.get("url");if(!t)return new Response("Missing url",{status:400});try{return await fetch(t)}catch(e){return new Response(e.message,{status:500})}}''' + +# 备用 HTTP 代理脚本(当找不到爬楼梯 Workers 时使用) +_FALLBACK_HTTP_SCRIPT = '''const UA_LIST=["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"];export default{async fetch(e,t){const n=new URL(e.url);if("/health"===n.pathname)return new Response("ok");if("/ip"===n.pathname){const e=await fetch("https://api.ipify.org?format=json");return new Response(await e.text(),{headers:{"Content-Type":"application/json"}})}if("/proxy"===n.pathname){const r=n.searchParams.get("url");if(!r)return new Response("Missing url",{status:400});const o={"User-Agent":UA_LIST[Math.floor(Math.random()*UA_LIST.length)],"Accept":"text/html,application/xhtml+xml","Accept-Language":"en-US,en;q=0.9"};try{const t=await fetch(r,{method:e.method,headers:o});return new Response(await t.text(),{headers:{"Content-Type":t.headers.get("Content-Type")||"text/html"}})}catch(e){return new Response(e.message,{status:500})}}return new Response("CFspider HTTP Proxy\\n/proxy?url=TARGET\\n/ip\\n/health")}};''' # Workers 代码(运行时加载) @@ -77,6 +128,7 @@ class WorkersManager: 自动创建和管理 Workers,当失效时自动重建。 可以直接作为 cf_proxies 参数使用。 + 支持两种模式:VLESS(完整代理)和 HTTP(轻量爬虫)。 """ def __init__( @@ -87,7 +139,8 @@ class WorkersManager: auto_recreate: bool = True, check_interval: int = 60, env_vars: Optional[dict] = None, - my_domain: Optional[str] = None + my_domain: Optional[str] = None, + mode: Optional[str] = None ): """ 初始化 Workers 管理器 @@ -105,6 +158,10 @@ class WorkersManager: - SOCKS5: SOCKS5 代理地址 示例: {"UUID": "your-uuid", "PROXYIP": "1.2.3.4"} + mode: 部署模式 + - 'vless': VLESS 模式(破皮版,完全隐藏 CF 特征,支持代理软件) + - 'http': HTTP 模式(爬楼梯版,轻量爬虫代理,会暴露 CF 特征头) + - None: 运行时交互式选择 """ self.api_token = api_token self.account_id = account_id @@ -114,6 +171,15 @@ class WorkersManager: self.env_vars = env_vars or {} self.my_domain = my_domain + # 确定部署模式 + if mode is None: + self.mode = _select_mode_interactive() + else: + self.mode = mode.lower() + if self.mode not in ('vless', 'http'): + print(f"[CFspider] 无效模式 '{mode}',使用默认 'vless' 模式") + self.mode = 'vless' + self._url: Optional[str] = None self._custom_url: Optional[str] = None self._uuid: Optional[str] = None @@ -152,8 +218,10 @@ class WorkersManager: """创建或更新 Workers""" api_url = f"https://api.cloudflare.com/client/v4/accounts/{self.account_id}/workers/scripts/{self.worker_name}" - # 获取 Workers 脚本 - script = _get_workers_script() + # 获取 Workers 脚本(根据模式选择) + script = _get_workers_script(self.mode) + mode_name = "VLESS 破皮版" if self.mode == 'vless' else "HTTP 爬楼梯版" + print(f"[CFspider] 正在部署 {mode_name} Workers...") try: # 如果有环境变量,使用 multipart/form-data 格式 @@ -392,7 +460,13 @@ class WorkersManager: return False try: - response = requests.get(f"{self._url}/api/config", timeout=10) + # 根据模式使用不同的健康检查端点 + if self.mode == 'http': + endpoint = "/health" + else: + endpoint = "/api/config" + + response = requests.get(f"{self._url}{endpoint}", timeout=10) return response.ok except: return False @@ -480,7 +554,9 @@ def make_workers( accesskey: Optional[str] = None, two_proxy: Optional[str] = None, # 自定义域名 - my_domain: Optional[str] = None + my_domain: Optional[str] = None, + # 部署模式 + mode: Optional[str] = None ) -> WorkersManager: """ 创建 Cloudflare Workers 并返回管理器 @@ -509,6 +585,18 @@ def make_workers( accesskey: 访问密钥(破皮版用) two_proxy: 双层代理地址(格式: host:port:user:pass) my_domain: 自定义域名(如 proxy.example.com,域名需已在 Cloudflare) + mode: 部署模式(重要!) + - 'vless': VLESS 模式(推荐) + * 部署破皮版 Workers + * 完全隐藏 Cloudflare 特征头 + * 支持 V2Ray/Clash 等代理软件 + * 适合需要完整代理功能的场景 + - 'http': HTTP 模式(轻量) + * 部署爬楼梯 Workers + * 轻量级 HTTP 代理 + * 适合普通网页爬虫 + * 注意:会暴露 Cloudflare 特征头(Cf-Ray、Cf-Worker 等) + - None: 运行时弹出交互式选择菜单 Returns: WorkersManager: Workers 管理器,可直接用于 cf_proxies @@ -516,47 +604,47 @@ def make_workers( Example: >>> import cfspider >>> - >>> # 基本用法 - >>> workers = cfspider.make_workers( - ... api_token="your-api-token", - ... account_id="your-account-id" - ... ) - >>> - >>> # 指定 UUID(固定 IP) + >>> # VLESS 模式(推荐,隐藏特征) >>> workers = cfspider.make_workers( ... api_token="your-api-token", ... account_id="your-account-id", + ... mode='vless' # 或省略,运行时选择 + ... ) + >>> + >>> # HTTP 模式(轻量爬虫) + >>> workers = cfspider.make_workers( + ... api_token="your-api-token", + ... account_id="your-account-id", + ... mode='http' + ... ) + >>> + >>> # 指定 UUID(VLESS 模式) + >>> workers = cfspider.make_workers( + ... api_token="your-api-token", + ... account_id="your-account-id", + ... mode='vless', ... uuid="your-custom-uuid" ... ) >>> - >>> # 使用代理 IP - >>> workers = cfspider.make_workers( - ... api_token="your-api-token", - ... account_id="your-account-id", - ... proxyip="proxyip.fxxk.dedyn.io" - ... ) - >>> - >>> # 使用完整环境变量 - >>> workers = cfspider.make_workers( - ... api_token="your-api-token", - ... account_id="your-account-id", - ... env_vars={ - ... "UUID": "your-uuid", - ... "PROXYIP": "1.2.3.4", - ... "SOCKS5": "user:pass@host:port" - ... } - ... ) - >>> >>> # 直接用于请求 >>> response = cfspider.get( ... "https://httpbin.org/ip", ... cf_proxies=workers, - ... uuid=workers.uuid + ... uuid=workers.uuid # VLESS 模式需要 ... ) >>> >>> # 停止健康检查 >>> workers.stop() + 模式对比: + | 特性 | VLESS 模式 | HTTP 模式 | + |----------------|---------------|---------------| + | CF 特征头 | 完全隐藏 | 暴露 | + | 代理软件支持 | 是 | 否 | + | 爬虫适用 | 是 | 是 | + | 复杂度 | 需要 UUID | 简单 | + | 检测风险 | 低 | 中(可被识别)| + API Token 权限要求: - Account: Workers Scripts: Edit - Zone: Workers Routes: Edit (可选,用于自定义域名) @@ -587,7 +675,8 @@ def make_workers( auto_recreate=auto_recreate, check_interval=check_interval, env_vars=final_env_vars if final_env_vars else None, - my_domain=my_domain + my_domain=my_domain, + mode=mode ) diff --git a/pyproject.toml b/pyproject.toml index f1d7979..0633d70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "cfspider" -version = "1.8.9" +version = "1.9.0" description = "Cloudflare Workers proxy IP pool client" readme = "README.md" license = {text = "Apache-2.0"} diff --git a/workers/爬楼梯workers.js b/workers/爬楼梯workers.js new file mode 100644 index 0000000..17e9bb6 --- /dev/null +++ b/workers/爬楼梯workers.js @@ -0,0 +1,509 @@ +/** + * 爬楼梯 Workers - CFspider 专用爬虫代理 + * + * 反检测特性: + * - 随机 User-Agent(50+ 种真实浏览器指纹) + * - 随机 Accept-Language(多国语言) + * - 完整浏览器指纹头(Sec-CH-UA, Sec-Fetch-*) + * - 自动生成合理的 Referer + * - 模拟真实浏览器 Cookie 行为 + * - 随机请求延迟(可选) + * - 动态 IP 切换 + */ + +export default { + async fetch(request, env) { + const url = new URL(request.url); + const path = url.pathname; + + if (request.method === 'OPTIONS') { + return corsResponse(); + } + + // 令牌验证 + const token = env.TOKEN || ''; + if (token) { + const auth = request.headers.get('Authorization') || url.searchParams.get('token') || ''; + if (auth !== `Bearer ${token}` && auth !== token) { + return json({ error: 'Unauthorized' }, 401); + } + } + + switch (path) { + case '/': return homePage(); + case '/proxy': return handleProxy(request, url); + case '/batch': return handleBatch(request); + case '/ip': return handleIP(request); + case '/health': return json({ status: 'ok', timestamp: Date.now() }); + default: return json({ error: 'Not Found' }, 404); + } + }, +}; + +// ============== 反检测配置 ============== + +// 50+ 真实浏览器 User-Agent +const USER_AGENTS = [ + // Chrome Windows (最新版本) + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36', + // Chrome Mac + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + // Chrome Linux + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36', + // Firefox Windows + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0', + // Firefox Mac + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:122.0) Gecko/20100101 Firefox/122.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14.2; rv:123.0) Gecko/20100101 Firefox/123.0', + // Firefox Linux + 'Mozilla/5.0 (X11; Linux x86_64; rv:123.0) Gecko/20100101 Firefox/123.0', + 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0', + // Safari + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15', + // Edge + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0', + // Opera + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/108.0.0.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/108.0.0.0', + // Brave + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Brave/122', + // Vivaldi + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Vivaldi/6.5.3206.50', + // Mobile Chrome + 'Mozilla/5.0 (Linux; Android 14; SM-S918B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36', + // Mobile Safari + 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', + 'Mozilla/5.0 (iPad; CPU OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', +]; + +// Accept-Language 池(按地区分布) +const ACCEPT_LANGUAGES = [ + 'en-US,en;q=0.9', + 'en-GB,en;q=0.9,en-US;q=0.8', + 'en-CA,en;q=0.9,en-US;q=0.8', + 'en-AU,en;q=0.9,en-US;q=0.8', + 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7', + 'zh-TW,zh;q=0.9,en;q=0.8', + 'zh-HK,zh;q=0.9,en;q=0.8', + 'ja-JP,ja;q=0.9,en;q=0.8,en-US;q=0.7', + 'ko-KR,ko;q=0.9,en;q=0.8,en-US;q=0.7', + 'de-DE,de;q=0.9,en;q=0.8,en-US;q=0.7', + 'fr-FR,fr;q=0.9,en;q=0.8,en-US;q=0.7', + 'es-ES,es;q=0.9,en;q=0.8,en-US;q=0.7', + 'pt-BR,pt;q=0.9,en;q=0.8,en-US;q=0.7', + 'it-IT,it;q=0.9,en;q=0.8,en-US;q=0.7', + 'ru-RU,ru;q=0.9,en;q=0.8,en-US;q=0.7', + 'nl-NL,nl;q=0.9,en;q=0.8', + 'pl-PL,pl;q=0.9,en;q=0.8', + 'tr-TR,tr;q=0.9,en;q=0.8', + 'th-TH,th;q=0.9,en;q=0.8', + 'vi-VN,vi;q=0.9,en;q=0.8', + 'id-ID,id;q=0.9,en;q=0.8', + 'ar-SA,ar;q=0.9,en;q=0.8', + 'hi-IN,hi;q=0.9,en;q=0.8', +]; + +// 常见 Referer 来源 +const REFERERS = [ + 'https://www.google.com/', + 'https://www.google.com/search?q=', + 'https://www.bing.com/', + 'https://www.bing.com/search?q=', + 'https://duckduckgo.com/', + 'https://www.baidu.com/', + 'https://search.yahoo.com/', + 'https://www.facebook.com/', + 'https://twitter.com/', + 'https://www.linkedin.com/', + 'https://www.reddit.com/', + 'https://news.ycombinator.com/', + '', // 有时候没有 Referer 更自然 +]; + +// 屏幕分辨率(用于某些需要的场景) +const SCREEN_RESOLUTIONS = [ + { width: 1920, height: 1080 }, + { width: 2560, height: 1440 }, + { width: 1366, height: 768 }, + { width: 1536, height: 864 }, + { width: 1440, height: 900 }, + { width: 1680, height: 1050 }, + { width: 2560, height: 1600 }, + { width: 3840, height: 2160 }, +]; + +// 时区偏移 +const TIMEZONES = [ + 'America/New_York', + 'America/Los_Angeles', + 'America/Chicago', + 'Europe/London', + 'Europe/Paris', + 'Europe/Berlin', + 'Asia/Tokyo', + 'Asia/Shanghai', + 'Asia/Singapore', + 'Australia/Sydney', +]; + +// ============== 工具函数 ============== + +function rand(arr) { + return arr[Math.floor(Math.random() * arr.length)]; +} + +function randInt(min, max) { + return Math.floor(Math.random() * (max - min + 1)) + min; +} + +// 生成完整的浏览器指纹头 +function generateBrowserFingerprint(targetUrl) { + const ua = rand(USER_AGENTS); + const isChrome = ua.includes('Chrome') && !ua.includes('Edg') && !ua.includes('OPR'); + const isFirefox = ua.includes('Firefox'); + const isSafari = ua.includes('Safari') && !ua.includes('Chrome'); + const isMobile = ua.includes('Mobile') || ua.includes('Android') || ua.includes('iPhone'); + + const headers = { + 'User-Agent': ua, + 'Accept': isMobile + ? 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' + : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'Accept-Language': rand(ACCEPT_LANGUAGES), + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + }; + + // Chrome/Edge/Opera 特有的 Client Hints + if (isChrome || ua.includes('Edg') || ua.includes('OPR')) { + const majorVersion = parseInt(ua.match(/Chrome\/(\d+)/)?.[1] || '122'); + const platform = ua.includes('Windows') ? 'Windows' : ua.includes('Mac') ? 'macOS' : 'Linux'; + + headers['Sec-CH-UA'] = `"Chromium";v="${majorVersion}", "Not(A:Brand";v="24", "Google Chrome";v="${majorVersion}"`; + headers['Sec-CH-UA-Mobile'] = isMobile ? '?1' : '?0'; + headers['Sec-CH-UA-Platform'] = `"${platform}"`; + headers['Sec-Fetch-Dest'] = 'document'; + headers['Sec-Fetch-Mode'] = 'navigate'; + headers['Sec-Fetch-Site'] = 'none'; + headers['Sec-Fetch-User'] = '?1'; + } + + // Firefox 特有头 + if (isFirefox) { + headers['DNT'] = Math.random() > 0.5 ? '1' : undefined; + headers['Sec-Fetch-Dest'] = 'document'; + headers['Sec-Fetch-Mode'] = 'navigate'; + headers['Sec-Fetch-Site'] = 'none'; + headers['Sec-Fetch-User'] = '?1'; + } + + // 随机添加 Referer(60% 概率) + if (Math.random() > 0.4) { + const referer = rand(REFERERS); + if (referer) { + // 如果是搜索引擎,加上随机搜索词 + if (referer.includes('search?q=') || referer.includes('/search?q=')) { + const domain = new URL(targetUrl).hostname; + headers['Referer'] = referer + encodeURIComponent(domain); + } else { + headers['Referer'] = referer; + } + } + } + + // 随机添加 DNT (30% 概率) + if (!headers['DNT'] && Math.random() > 0.7) { + headers['DNT'] = '1'; + } + + // 随机 Cache-Control (50% 概率) + if (Math.random() > 0.5) { + headers['Cache-Control'] = rand(['no-cache', 'max-age=0']); + if (headers['Cache-Control'] === 'no-cache') { + headers['Pragma'] = 'no-cache'; + } + } + + // 过滤掉 undefined 值 + return Object.fromEntries(Object.entries(headers).filter(([_, v]) => v !== undefined)); +} + +// 生成随机延迟(模拟人类行为) +async function humanDelay(min = 100, max = 500) { + const delay = randInt(min, max); + await new Promise(resolve => setTimeout(resolve, delay)); +} + +// ============== 请求处理 ============== + +async function handleProxy(request, requestUrl) { + let targetUrl, method, headers, body, options; + + if (request.method === 'GET') { + targetUrl = requestUrl.searchParams.get('url'); + method = requestUrl.searchParams.get('method') || 'GET'; + const headersParam = requestUrl.searchParams.get('headers'); + headers = headersParam ? JSON.parse(headersParam) : {}; + options = { + delay: requestUrl.searchParams.get('delay') === 'true', + noFingerprint: requestUrl.searchParams.get('raw') === 'true', + }; + } else { + try { + const data = await request.json(); + targetUrl = data.url; + method = data.method || 'GET'; + headers = data.headers || {}; + body = data.body; + options = { + delay: data.delay || false, + noFingerprint: data.raw || false, + }; + } catch (e) { + return json({ error: 'Invalid JSON body' }, 400); + } + } + + if (!targetUrl) { + return json({ error: 'Missing url parameter' }, 400); + } + + try { + new URL(targetUrl); + } catch (e) { + return json({ error: 'Invalid URL' }, 400); + } + + // 可选的人类延迟 + if (options.delay) { + await humanDelay(200, 800); + } + + // 构建请求头 + const fetchHeaders = new Headers(); + + // 生成完整的浏览器指纹(除非指定 raw 模式) + if (!options.noFingerprint) { + const fingerprint = generateBrowserFingerprint(targetUrl); + for (const [key, value] of Object.entries(fingerprint)) { + fetchHeaders.set(key, value); + } + } + + // 用户自定义请求头覆盖 + for (const [key, value] of Object.entries(headers)) { + fetchHeaders.set(key, value); + } + + const startTime = Date.now(); + try { + const response = await fetch(targetUrl, { + method: method.toUpperCase(), + headers: fetchHeaders, + body: body ? (typeof body === 'string' ? body : JSON.stringify(body)) : undefined, + redirect: 'follow', + }); + + const responseHeaders = new Headers(); + responseHeaders.set('Access-Control-Allow-Origin', '*'); + responseHeaders.set('X-Proxy-Time', `${Date.now() - startTime}ms`); + responseHeaders.set('X-Proxy-Status', response.status.toString()); + + for (const [key, value] of response.headers.entries()) { + if (!['content-encoding', 'content-length', 'transfer-encoding'].includes(key.toLowerCase())) { + responseHeaders.set(`X-Original-${key}`, value); + } + } + + const format = requestUrl.searchParams.get('format'); + if (format === 'json') { + const text = await response.text(); + return json({ + status: response.status, + statusText: response.statusText, + headers: Object.fromEntries(response.headers.entries()), + body: text, + time: Date.now() - startTime, + fingerprint: options.noFingerprint ? 'disabled' : 'enabled', + }); + } + + return new Response(response.body, { + status: response.status, + statusText: response.statusText, + headers: responseHeaders, + }); + } catch (error) { + return json({ error: 'Proxy request failed', message: error.message, url: targetUrl }, 502); + } +} + +async function handleBatch(request) { + if (request.method !== 'POST') { + return json({ error: 'Method not allowed, use POST' }, 405); + } + + let urls, options; + try { + const data = await request.json(); + urls = data.urls; + options = { + delay: data.delay || false, + concurrency: Math.min(data.concurrency || 5, 10), + }; + } catch (e) { + return json({ error: 'Invalid JSON body' }, 400); + } + + if (!Array.isArray(urls) || urls.length === 0) { + return json({ error: 'Missing urls array' }, 400); + } + + if (urls.length > 20) { + return json({ error: 'Maximum 20 URLs per batch' }, 400); + } + + const startTime = Date.now(); + + // 分批并发执行 + const results = []; + for (let i = 0; i < urls.length; i += options.concurrency) { + const batch = urls.slice(i, i + options.concurrency); + + if (options.delay && i > 0) { + await humanDelay(500, 1500); + } + + const batchResults = await Promise.allSettled( + batch.map(async (item) => { + const url = typeof item === 'string' ? item : item.url; + const method = (typeof item === 'object' && item.method) || 'GET'; + const userHeaders = (typeof item === 'object' && item.headers) || {}; + + const fetchHeaders = new Headers(); + const fingerprint = generateBrowserFingerprint(url); + for (const [key, value] of Object.entries(fingerprint)) { + fetchHeaders.set(key, value); + } + for (const [key, value] of Object.entries(userHeaders)) { + fetchHeaders.set(key, value); + } + + const response = await fetch(url, { method, headers: fetchHeaders }); + const text = await response.text(); + + return { + url, + status: response.status, + body: text.slice(0, 10000), + }; + }) + ); + + results.push(...batchResults); + } + + return json({ + total: urls.length, + time: Date.now() - startTime, + results: results.map((r, i) => { + if (r.status === 'fulfilled') return r.value; + return { url: urls[i]?.url || urls[i], error: r.reason?.message || 'Failed' }; + }), + }); +} + +async function handleIP(request) { + try { + const fetchHeaders = new Headers(); + const fingerprint = generateBrowserFingerprint('https://httpbin.org/ip'); + for (const [key, value] of Object.entries(fingerprint)) { + fetchHeaders.set(key, value); + } + + const response = await fetch('https://httpbin.org/ip', { headers: fetchHeaders }); + const data = await response.json(); + + return json({ + ip: data.origin, + edge: { + colo: request.cf?.colo || 'unknown', + country: request.cf?.country || 'unknown', + city: request.cf?.city || 'unknown', + }, + timestamp: Date.now(), + }); + } catch (error) { + return json({ error: 'Failed to get IP', message: error.message }, 500); + } +} + +// ============== 响应助手 ============== + +function json(data, status = 200) { + return new Response(JSON.stringify(data, null, 2), { + status, + headers: { + 'Content-Type': 'application/json; charset=utf-8', + 'Access-Control-Allow-Origin': '*', + }, + }); +} + +function corsResponse() { + return new Response(null, { + headers: { + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Methods': 'GET, POST, PUT, DELETE, PATCH, OPTIONS', + 'Access-Control-Allow-Headers': '*', + 'Access-Control-Max-Age': '86400', + }, + }); +} + +function homePage() { + return new Response(` + + + + Proxy Service + + + +
+

Proxy Service

+

A lightweight HTTP proxy service powered by edge network.

+
GET /proxy?url= - Proxy a URL
+
POST /batch - Batch proxy requests
+
GET /ip - Get current edge IP
+
GET /health - Health check
+
+ +`, { headers: { 'Content-Type': 'text/html; charset=utf-8' } }); +}