mirror of
https://github.com/violettoolssite/CFspider.git
synced 2026-04-05 03:09:01 +08:00
数据处理功能更新
This commit is contained in:
7
.gitignore
vendored
7
.gitignore
vendored
@@ -19,3 +19,10 @@ test_*.py
|
||||
mirror/
|
||||
*_mirror/
|
||||
test_mirror_*/
|
||||
|
||||
#混淆脚本
|
||||
obfuscate_pages.py
|
||||
obfuscate_config.json
|
||||
|
||||
#示例文件
|
||||
examples/
|
||||
@@ -44,6 +44,12 @@ print(response.cf_colo) # 可能显示 NRT, SIN, LAX 等不同节点
|
||||
- [点击观看 B 站视频教程](https://b23.tv/1uzOf7M)
|
||||
- [点击观看 YouTube 视频教程](https://youtu.be/oPeXiIFJ9TA?si=ukXsX8iP86ZTB4LP)
|
||||
|
||||
**代码演示视频 - 快速上手**
|
||||
|
||||
观看完整的爬虫示例演示,了解如何使用 CFspider 进行数据提取和批量处理:
|
||||
|
||||
- [点击观看代码演示视频](examples/scraper_demo.mp4) - 8 行代码完成爬虫示例
|
||||
|
||||
> ⚠️ **重要声明**:本项目仅供学习研究、网络安全测试、合规数据采集等**合法用途**。使用者须遵守所在地法律法规及 Cloudflare 服务条款。**任何非法使用(包括但不限于网络攻击、侵犯隐私、规避版权保护等)均与本项目开发者无关,使用者自行承担全部法律责任。**
|
||||
|
||||
## 代理方案对比
|
||||
|
||||
46
test.py
46
test.py
@@ -1,43 +1,9 @@
|
||||
import CodeVideoRenderer
|
||||
|
||||
CodeVideoRenderer.CameraFollowCursorCV(code_string="""
|
||||
import cfspider
|
||||
|
||||
# ========== 方案一:使用有效的 Workers 地址 ==========
|
||||
# 请将下面的地址替换为你的实际 Workers 地址
|
||||
# Workers 地址格式:https://your-worker-name.your-subdomain.workers.dev
|
||||
WORKERS_URL = "https://proxy.kami666.xyz/" # 替换为你的 Workers 地址
|
||||
TOKEN = "HAIfuge27" # 替换为你在 Workers 中配置的 token
|
||||
response = cfspider.get("https://www.cfspider.com",impersonate="chrome131")
|
||||
|
||||
try:
|
||||
# 使用 Token 鉴权的请求
|
||||
res = cfspider.get(
|
||||
"https://httpbin.org/ip",
|
||||
cf_proxies=WORKERS_URL,
|
||||
token=TOKEN
|
||||
)
|
||||
|
||||
print("✅ 请求成功!")
|
||||
print(f"响应内容: {res.text}")
|
||||
print(f"节点代码: {res.cf_colo}")
|
||||
print(f"Ray ID: {res.cf_ray}")
|
||||
print(f"状态码: {res.status_code}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 请求失败: {e}")
|
||||
print("\n可能的原因:")
|
||||
print("1. Workers 地址不正确或域名无法解析")
|
||||
print("2. Token 配置错误")
|
||||
print("3. 网络连接问题")
|
||||
print("\n解决方案:")
|
||||
print("1. 检查 Workers 地址是否正确")
|
||||
print("2. 确认 Workers 已部署并运行")
|
||||
print("3. 检查 Token 是否在 Workers 环境变量中配置")
|
||||
print("4. 尝试不使用代理测试(见下方方案二)")
|
||||
|
||||
# ========== 方案二:不使用代理测试(用于验证库是否正常)==========
|
||||
print("\n" + "="*50)
|
||||
print("测试:不使用代理直接请求")
|
||||
try:
|
||||
res = cfspider.get("https://httpbin.org/ip")
|
||||
print("✅ 直接请求成功!")
|
||||
print(f"响应内容: {res.text}")
|
||||
except Exception as e:
|
||||
print(f"❌ 直接请求也失败: {e}")
|
||||
print(response.text)
|
||||
""")
|
||||
36
workers.js
36
workers.js
@@ -1,8 +1,8 @@
|
||||
// CFspider - Cloudflare Workers 代理 IP 池 v1.7.3
|
||||
// CFspider - Cloudflare Workers 代理 IP 池 v1.8.0
|
||||
// 支持:同步/异步请求、TLS指纹模拟、浏览器自动化
|
||||
|
||||
let 反代IP = '';
|
||||
const VERSION = '1.7.3';
|
||||
const VERSION = '1.8.0';
|
||||
const START_TIME = Date.now();
|
||||
|
||||
export default {
|
||||
@@ -31,8 +31,12 @@ export default {
|
||||
return new Response(null, { headers: corsHeaders });
|
||||
}
|
||||
|
||||
// Token 验证(除了首页和 debug 页面)
|
||||
if (path !== '' && path !== '/' && path !== 'debug') {
|
||||
// Token 验证(除了首页、debug 页面和从首页发起的 API 请求)
|
||||
const referer = request.headers.get('Referer') || '';
|
||||
const isFromHomePage = referer && (referer.endsWith('/') || referer.endsWith(url.hostname + '/') || referer.includes(url.hostname + '/?'));
|
||||
const isPublicApi = (path === 'api/pool' || path === 'api/proxyip') && isFromHomePage;
|
||||
|
||||
if (path !== '' && path !== '/' && path !== 'debug' && !isPublicApi) {
|
||||
const tokenValidation = validateToken(request, env);
|
||||
if (!tokenValidation.valid) {
|
||||
return new Response(JSON.stringify({
|
||||
@@ -1247,7 +1251,7 @@ function generateCyberpunkPage(request, url, visitorIP) {
|
||||
</div>
|
||||
|
||||
<div class="code-section">
|
||||
<pre><span class="code-comment"># pip install cfspider</span>
|
||||
<pre><span class="code-comment"># pip install cfspider[extract]</span>
|
||||
<span class="code-keyword">import</span> cfspider
|
||||
|
||||
cf_proxies = <span class="code-string">"https://your-workers.dev"</span>
|
||||
@@ -1259,12 +1263,24 @@ response = cfspider.<span class="code-function">get</span>(
|
||||
)
|
||||
<span class="code-function">print</span>(response.cf_colo) <span class="code-comment"># Cloudflare node code</span>
|
||||
|
||||
<span class="code-comment"># TLS fingerprint impersonate (curl_cffi)</span>
|
||||
<span class="code-comment"># TLS fingerprint + stealth mode</span>
|
||||
response = cfspider.<span class="code-function">get</span>(
|
||||
<span class="code-string">"https://example.com"</span>,
|
||||
impersonate=<span class="code-string">"chrome131"</span>
|
||||
impersonate=<span class="code-string">"chrome131"</span>,
|
||||
stealth=<span class="code-keyword">True</span>
|
||||
)
|
||||
|
||||
<span class="code-comment"># Data extraction (CSS/XPath/JSONPath)</span>
|
||||
title = response.<span class="code-function">find</span>(<span class="code-string">"h1"</span>)
|
||||
links = response.<span class="code-function">find_all</span>(<span class="code-string">"a"</span>, attr=<span class="code-string">"href"</span>)
|
||||
data = response.<span class="code-function">pick</span>(title=<span class="code-string">"h1"</span>, links=(<span class="code-string">"a"</span>, <span class="code-string">"href"</span>))
|
||||
data.<span class="code-function">save</span>(<span class="code-string">"output.csv"</span>)
|
||||
|
||||
<span class="code-comment"># Batch requests with progress</span>
|
||||
urls = [<span class="code-string">"https://example.com/1"</span>, <span class="code-string">"https://example.com/2"</span>]
|
||||
results = cfspider.<span class="code-function">batch</span>(urls, concurrency=<span class="code-number">10</span>)
|
||||
results.<span class="code-function">save</span>(<span class="code-string">"results.json"</span>)
|
||||
|
||||
<span class="code-comment"># Async request (httpx)</span>
|
||||
response = <span class="code-keyword">await</span> cfspider.<span class="code-function">aget</span>(
|
||||
<span class="code-string">"https://httpbin.org/ip"</span>,
|
||||
@@ -1293,7 +1309,11 @@ browser.<span class="code-function">close</span>()</pre>
|
||||
|
||||
async function loadPool() {
|
||||
try {
|
||||
const resp = await fetch('/api/pool');
|
||||
// 从 URL 参数获取 token
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
const token = urlParams.get('token');
|
||||
const apiUrl = token ? \`/api/pool?token=\${token}\` : '/api/pool';
|
||||
const resp = await fetch(apiUrl);
|
||||
const data = await resp.json();
|
||||
const tbody = document.getElementById('poolBody');
|
||||
|
||||
|
||||
Reference in New Issue
Block a user