import requests
import time
import re
# 创建任务创建任务函数
def create_task(url, proxy, client_key):
"""
创建Cloudflare防护页面的任务
:param url: 目标网址
:param proxy: 代理服务器地址
:param client_key: 客户端密钥
data = { :return: 创建任务的响应结果
"""
data #= 填您自己的密钥{
"clientKey": clientKeyclient_key,
"task": {
"type": "CloudFlareTaskS2",
"userAgent": "", # 指定内核指定chrome内核, 可以为空,如:"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
"userAgentwaitLoad": ""True,
"waitLoadwebsiteURL": False,
"websiteURL": url,
"proxy": proxy
}
}
api_url = "https://api.yescaptcha.com/createTask"
response = requests.post(api_url, json=data).json()
return response
# 获取结果获取任务结果函数
def get_task(task_id, client_key):
url = "http://api.yescaptcha.com/getTaskResult"""
data = {获取任务的执行结果
# 填您自己的密钥:param task_id: 任务ID
"clientKey":param client_key: clientKey,客户端密钥
:return: 任务结果的响应结果
"taskId": task_id }"""
response = requests.post(url, json=data).json()
return response
# 完整的请求
def get_result(*args, **kwargs):
uuid = create_task(*args, **kwargs)
if not uuid or not uuid.get('taskId'):
return uuid
print("TaskID:", uuid)
for i in range(30):
time.sleep(3)
result = get_task(uuid.get('taskId'))
if result.get('status') == 'processing':
continue
elif result.get('status') == 'ready':
return result
else:
raise Exception(result)
if __name__ == '__main__':
# 填您的密钥
clientKey = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# 填您自己的代理,不要填本地的地址
proxy = "http://JN3wWChA:Dsg7ckfv@176.222.59.86:64048"
proxies = {
'http': proxy,
'https': proxy,
}
# 要访问的网址
# url = "https://apkpure.com/"
url = "https://billetterie.psg.fr/"
# 正常情况是这样的:使用普通requests请求
# import requests
# response = requests.get(url, proxies=proxies)
# print("请求响应:", response.status_code)
# print("网页标题:", re.search(r"<title>(.*?)</title>", response.text)[0])
# >>> 请求响应: 403
# >>> 网页标题: <title>Attention Required! | Cloudflare</title> api_url = "https://api.yescaptcha.com/getTaskResult"
data = {
"clientKey": client_key,
"taskId": task_id
}
response = requests.post(api_url, json=data).json()
return response
# 完整的请求函数
def get_result(url, proxy, client_key):
"""
完整的创建任务并获取结果的流程
:param url: 目标网址
:param proxy: 代理服务器地址
:param client_key: 客户端密钥
:return: 任务执行结果
"""
task_response = create_task(url, proxy, client_key)
if not task_response or not task_response.get('taskId'):
return task_response
print("TaskID:", task_response)
for _ in range(30):
time.sleep(3)
result = get_task(task_response.get('taskId'), client_key)
if result.get('status') == 'processing':
continue
elif result.get('status') == 'ready':
return result
else:
raise Exception(result)
return {"status": "timeout"}
if __name__ == '__main__':
# 客户端密钥, 登陆yescaptcha.com获取
client_key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# 代理服务器地址,填您自己的代理地址
# 不要填本地地址(如:http://127.0.0.1:1080,http://localhost:1080这种,这个只有你自己能用)
proxy = "http://user:password@103.147.170.108:64484"
proxies = {
'http': proxy,
'https': proxy,
}
# 目标网址
使用 curl_cffi.requests请求 url # from curl_cffi import requests
= "https://openai.com/6a0bdd42abee7/"
# 使用普通requests请求
# response = requests.get(url, proxies=proxies, impersonate="chrome110")
# print("请求响应:", response.status_code)
# print("网页标题:", re.search(r"<title>(.*?)</title>", response.text)[0]) if response.text # >>> 如果使用curl_cffi就可以正常请求,说明并没有开启严格的5s盾
# >>> 请求响应: 403
else print("网页标题:", response.text[:1000])
# >>> 网页标题: <title>Just a moment...</title>
print("是否为CF盾:", "cf_chl" in response.text)
# 使用接口返回的值来请求 # 创建任务使用接口返回的值来请求
task_result = get_result(url, proxy=proxy, client_key)
if not task_result.get("solution"):
print("任务失败", task_result)
exit()
# 获取结果
solution = task_result.get("solution")
# 从solution中获取请求头和cookies
headers = solution.get("request_headers")
headers.update(solution.get("headers"))
cookies = solution.get("headers")"cookies")
print("Headers:", headers)
print("HeadersCookies:", headerscookies)
cookies = solution.get("cookies")
# 使用curl_cffi.requests请求
print("Cookies:", cookies) # 请先安装curl_cffi库,并且是pre版本:pip install curl_cffi --pre
from curl_cffi import requests # 这里也配套指定impersonate="chrome120"as curl_requests
response = curl_requests.get(url, headers=headers, cookies=cookies, proxies=proxies, impersonate="chrome120")
print("请求响应:", response.status_code)
print("网页标题:", re.search(r"<title>(.*?)</title>", response.text))[0]) if response.text else print("网页标题:", response.text[:1000])
print("是否为CF盾:", "cf_chl" in response.text)
|