数据处理功能更新

This commit is contained in:
test01
2026-01-09 02:40:26 +08:00
parent e78d1762d8
commit 59fe4c6bea
4 changed files with 47 additions and 48 deletions

7
.gitignore vendored
View File

@@ -19,3 +19,10 @@ test_*.py
mirror/
*_mirror/
test_mirror_*/
#混淆脚本
obfuscate_pages.py
obfuscate_config.json
#示例文件
examples/

View File

@@ -44,6 +44,12 @@ print(response.cf_colo) # 可能显示 NRT, SIN, LAX 等不同节点
- [点击观看 B 站视频教程](https://b23.tv/1uzOf7M)
- [点击观看 YouTube 视频教程](https://youtu.be/oPeXiIFJ9TA?si=ukXsX8iP86ZTB4LP)
**代码演示视频 - 快速上手**
观看完整的爬虫示例演示,了解如何使用 CFspider 进行数据提取和批量处理:
- [点击观看代码演示视频](examples/scraper_demo.mp4) - 8 行代码完成爬虫示例
> ⚠️ **重要声明**:本项目仅供学习研究、网络安全测试、合规数据采集等**合法用途**。使用者须遵守所在地法律法规及 Cloudflare 服务条款。**任何非法使用(包括但不限于网络攻击、侵犯隐私、规避版权保护等)均与本项目开发者无关,使用者自行承担全部法律责任。**
## 代理方案对比

46
test.py
View File

@@ -1,43 +1,9 @@
import CodeVideoRenderer
CodeVideoRenderer.CameraFollowCursorCV(code_string="""
import cfspider
# ========== 方案一:使用有效的 Workers 地址 ==========
# 请将下面的地址替换为你的实际 Workers 地址
# Workers 地址格式https://your-worker-name.your-subdomain.workers.dev
WORKERS_URL = "https://proxy.kami666.xyz/" # 替换为你的 Workers 地址
TOKEN = "HAIfuge27" # 替换为你在 Workers 中配置的 token
response = cfspider.get("https://www.cfspider.com",impersonate="chrome131")
try:
# 使用 Token 鉴权的请求
res = cfspider.get(
"https://httpbin.org/ip",
cf_proxies=WORKERS_URL,
token=TOKEN
)
print("✅ 请求成功!")
print(f"响应内容: {res.text}")
print(f"节点代码: {res.cf_colo}")
print(f"Ray ID: {res.cf_ray}")
print(f"状态码: {res.status_code}")
except Exception as e:
print(f"❌ 请求失败: {e}")
print("\n可能的原因:")
print("1. Workers 地址不正确或域名无法解析")
print("2. Token 配置错误")
print("3. 网络连接问题")
print("\n解决方案:")
print("1. 检查 Workers 地址是否正确")
print("2. 确认 Workers 已部署并运行")
print("3. 检查 Token 是否在 Workers 环境变量中配置")
print("4. 尝试不使用代理测试(见下方方案二)")
# ========== 方案二:不使用代理测试(用于验证库是否正常)==========
print("\n" + "="*50)
print("测试:不使用代理直接请求")
try:
res = cfspider.get("https://httpbin.org/ip")
print("✅ 直接请求成功!")
print(f"响应内容: {res.text}")
except Exception as e:
print(f"❌ 直接请求也失败: {e}")
print(response.text)
""")

View File

@@ -1,8 +1,8 @@
// CFspider - Cloudflare Workers 代理 IP 池 v1.7.3
// CFspider - Cloudflare Workers 代理 IP 池 v1.8.0
// 支持:同步/异步请求、TLS指纹模拟、浏览器自动化
let 反代IP = '';
const VERSION = '1.7.3';
const VERSION = '1.8.0';
const START_TIME = Date.now();
export default {
@@ -31,8 +31,12 @@ export default {
return new Response(null, { headers: corsHeaders });
}
// Token 验证(除了首页debug 页面)
if (path !== '' && path !== '/' && path !== 'debug') {
// Token 验证(除了首页debug 页面和从首页发起的 API 请求
const referer = request.headers.get('Referer') || '';
const isFromHomePage = referer && (referer.endsWith('/') || referer.endsWith(url.hostname + '/') || referer.includes(url.hostname + '/?'));
const isPublicApi = (path === 'api/pool' || path === 'api/proxyip') && isFromHomePage;
if (path !== '' && path !== '/' && path !== 'debug' && !isPublicApi) {
const tokenValidation = validateToken(request, env);
if (!tokenValidation.valid) {
return new Response(JSON.stringify({
@@ -1247,7 +1251,7 @@ function generateCyberpunkPage(request, url, visitorIP) {
</div>
<div class="code-section">
<pre><span class="code-comment"># pip install cfspider</span>
<pre><span class="code-comment"># pip install cfspider[extract]</span>
<span class="code-keyword">import</span> cfspider
cf_proxies = <span class="code-string">"https://your-workers.dev"</span>
@@ -1259,12 +1263,24 @@ response = cfspider.<span class="code-function">get</span>(
)
<span class="code-function">print</span>(response.cf_colo) <span class="code-comment"># Cloudflare node code</span>
<span class="code-comment"># TLS fingerprint impersonate (curl_cffi)</span>
<span class="code-comment"># TLS fingerprint + stealth mode</span>
response = cfspider.<span class="code-function">get</span>(
<span class="code-string">"https://example.com"</span>,
impersonate=<span class="code-string">"chrome131"</span>
impersonate=<span class="code-string">"chrome131"</span>,
stealth=<span class="code-keyword">True</span>
)
<span class="code-comment"># Data extraction (CSS/XPath/JSONPath)</span>
title = response.<span class="code-function">find</span>(<span class="code-string">"h1"</span>)
links = response.<span class="code-function">find_all</span>(<span class="code-string">"a"</span>, attr=<span class="code-string">"href"</span>)
data = response.<span class="code-function">pick</span>(title=<span class="code-string">"h1"</span>, links=(<span class="code-string">"a"</span>, <span class="code-string">"href"</span>))
data.<span class="code-function">save</span>(<span class="code-string">"output.csv"</span>)
<span class="code-comment"># Batch requests with progress</span>
urls = [<span class="code-string">"https://example.com/1"</span>, <span class="code-string">"https://example.com/2"</span>]
results = cfspider.<span class="code-function">batch</span>(urls, concurrency=<span class="code-number">10</span>)
results.<span class="code-function">save</span>(<span class="code-string">"results.json"</span>)
<span class="code-comment"># Async request (httpx)</span>
response = <span class="code-keyword">await</span> cfspider.<span class="code-function">aget</span>(
<span class="code-string">"https://httpbin.org/ip"</span>,
@@ -1293,7 +1309,11 @@ browser.<span class="code-function">close</span>()</pre>
async function loadPool() {
try {
const resp = await fetch('/api/pool');
// 从 URL 参数获取 token
const urlParams = new URLSearchParams(window.location.search);
const token = urlParams.get('token');
const apiUrl = token ? \`/api/pool?token=\${token}\` : '/api/pool';
const resp = await fetch(apiUrl);
const data = await resp.json();
const tbody = document.getElementById('poolBody');