import requests
import re,json,random,time
from py_mini_racer import MiniRacer
from urllib import parse
from chaojiying import Chaojiying_Client
headers = {
"Connection": "keep-alive",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Referer": "http://zxgk.court.gov.cn/",
"Accept-Language": "zh-CN,zh;q=0.9"
}
def get_cookie_first(url):
cookies = {}
session = requests.session()
# 第一次访问412
html = session.get(url, headers=headers)
print("第一次访问的结果 ===>",html)
# 拿出content标签
content = re.findall(r'', html.text)[0].replace('"', '')
# 获取JS路径后缀
win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', html.text)[0]
# 拼接url
win_ts_url = parse.urljoin(url, win_ts_url)
#提取出来js
InnerJs = requests.get(win_ts_url, headers=headers, verify=False).text
# 匹配出$_ts
win_ts = re.findall(r"r='m'>(.*?)", html.text)[1]
with open('1.js', mode='r', encoding='utf-8') as f:
code = f.read()
js_txt = code.replace('window.win_ts', win_ts).replace('window.zhiyuan', InnerJs).replace('window.content',
str(content))
ctx = MiniRacer()
ctx.eval(js_txt)
cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx.call("update_cookie"))[0]
print("第一次cookie = ", cookie)
cookies['lqWVdQzgOVyaT'] = cookie
session.cookies.update(cookies)
response = session.get(url, headers=headers)
print("更新412的cookie后请求的结果 ===>",response)
##############################################200后的操作########################################
# 拿出content标签
second_content = re.findall(r'', response.text)[0].replace('"', '')
# 获取JS路径后缀
obj = re.compile(r'',
re.S)
second_win_ts_url = ""
result = obj.finditer(response.text)
for i in result:
second_win_ts_url = i.group("win_ts_url")
# 拼接url
second_win_ts_url = parse.urljoin(url, second_win_ts_url)
# 提取出来js
second_InnerJs = requests.get(second_win_ts_url, headers=headers, verify=False).text
# 匹配出$_ts
second_win_ts = re.findall(r"r='m'>(.*?)", response.text)[1]
with open('2.js', mode='r', encoding='utf-8') as f:
code = f.read()
js_txt2 = code.replace('window.win_ts',second_win_ts).replace('window.zhiyuan111',second_InnerJs).replace('window.content',str(second_content)).replace("window.aaaaa",cookie)
ctx2 = MiniRacer()
ctx2.eval(js_txt2)
#200页面生成的cookie
new_cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx2.call("get_cookie", cookie))[0] # Search_First_Page
print("200更新cookie为: ", len(new_cookie), new_cookie)
session.cookies.set("lqWVdQzgOVyaT", new_cookie)
while True:
new_cookie = update_cookie(ctx2,session,new_cookie)
test_headers = {
"Connection": "keep-alive",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Referer": "http://zxgk.court.gov.cn/",
"Accept-Language": "zh-CN,zh;q=0.9"
}
url = "http://zxgk.court.gov.cn/zhixing/"
test_response = session.get(url=url,headers=test_headers,verify=False)
print(test_response)
if test_response.status_code == 200:
break
time.sleep(1)
captcha_str,captchaId = yzm(session,ctx2,new_cookie)
while True:
new_cookie = update_cookie(ctx2, session, new_cookie)
result = get_sxmd(session,captcha_str,captchaId,ctx2)
if result == True:
break
def update_cookie(ctx2,session,cookie):
new_cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx2.call("update_cookie", cookie))[0]
print("更新cookie为: ", len(new_cookie), new_cookie)
session.cookies.set("lqWVdQzgOVyaT", new_cookie)
return new_cookie
def get_captchaId():
chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
'x', 'y', 'z']
nums = ""
for i in range(32):
ids = int(random.random() * 61)
nums += chars[ids]
return nums
def yzm(session,ctx2,new_cookie):
captcha_headers = {
"Connection": "keep-alive",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
"Referer": "http://zxgk.court.gov.cn/zhixing/",
"Accept-Language": "zh-CN,zh;q=0.9"
}
captchaId = get_captchaId()
print("captchaId ===>",captchaId)
params = {
"captchaId": captchaId,
"random": str(random.random())
}
try:
captcha_str = ""
response_captcha = session.get("http://zxgk.court.gov.cn/zhixing/captcha.do",headers=captcha_headers,params=params)
print("验证码的响应 ===>",response_captcha)
with open("captcha.jpg","wb") as f:
f.write(response_captcha.content)
if response_captcha.status_code == 200:
chaojiying = Chaojiying_Client('15985724690', 'a520520a', '923160')
captcha_str = chaojiying.PostPic(response_captcha.content, 1005).get('pic_str')
print("验证码的结果 ===>",captcha_str)
#激活操作
yzm_url = f"checkyzm.do?captchaId={captchaId}&pCode={captcha_str}"
yzm_url = ctx2.call("get_bzxr_hz","GET",yzm_url,captchaId,captcha_str,str(int(time.time() * 1000)))
print("yzm_url ===>",yzm_url)
new_cookie = update_cookie(ctx2,session,new_cookie)
session.cookies.set("lqWVdQzgOVyaT", new_cookie)
yzm_url = "http://zxgk.court.gov.cn" + yzm_url
print("验证码的验证的url ===>",yzm_url)
yzm_response = session.get(url=yzm_url,headers=headers,verify=False)
print("验证码的验证 ===>",yzm_response.text)
except:
print("验证码error")
return captcha_str, captchaId
def get_sxmd(session,captcha_str,captchaId,ctx2):
headers = {
"Connection": "keep-alive",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Origin": "http://zxgk.court.gov.cn",
"Referer": "http://zxgk.court.gov.cn/zhixing/",
"Accept-Language": "zh-CN,zh;q=0.9"
}
url = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
data = {
"pName": "张三",
"pCardNum": "",
"selectCourtId": "0",
"pCode": captcha_str,
"captchaId": captchaId,
"searchCourtName": "全国法院(包含地方各级法院)",
"selectCourtArrange": "1",
"currentPage": "1"
}
response = session.post(url=url, headers=headers, data=data, verify=False)
print(response.text)
print(response)
currentTime = str(int(time.time() * 1000))
url = f"newdetail?id=1582536987&j_captcha={captcha_str}&captchaId={captchaId}&_={currentTime}"
new_url = ctx2.call("get_bzxr_hz", "GET", url, captchaId, captcha_str, currentTime)
new_url = "http://zxgk.court.gov.cn" + new_url
print(new_url)
response = session.get(new_url, headers=headers, verify=False)
print("response.text =====>",response.text)
print(response)
exit()
get_cookie_first("http://zxgk.court.gov.cn/")