import requests import re,json,random,time from py_mini_racer import MiniRacer from urllib import parse from chaojiying import Chaojiying_Client headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Referer": "http://zxgk.court.gov.cn/", "Accept-Language": "zh-CN,zh;q=0.9" } def get_cookie_first(url): cookies = {} session = requests.session() # 第一次访问412 html = session.get(url, headers=headers) print("第一次访问的结果 ===>",html) # 拿出content标签 content = re.findall(r'', html.text)[0].replace('"', '') # 获取JS路径后缀 win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', html.text)[0] # 拼接url win_ts_url = parse.urljoin(url, win_ts_url) #提取出来js InnerJs = requests.get(win_ts_url, headers=headers, verify=False).text # 匹配出$_ts win_ts = re.findall(r"r='m'>(.*?)", html.text)[1] with open('1.js', mode='r', encoding='utf-8') as f: code = f.read() js_txt = code.replace('window.win_ts', win_ts).replace('window.zhiyuan', InnerJs).replace('window.content', str(content)) ctx = MiniRacer() ctx.eval(js_txt) cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx.call("update_cookie"))[0] print("第一次cookie = ", cookie) cookies['lqWVdQzgOVyaT'] = cookie session.cookies.update(cookies) response = session.get(url, headers=headers) print("更新412的cookie后请求的结果 ===>",response) ##############################################200后的操作######################################## # 拿出content标签 second_content = re.findall(r'', response.text)[0].replace('"', '') # 获取JS路径后缀 obj = re.compile(r'', re.S) second_win_ts_url = "" result = obj.finditer(response.text) for i in result: second_win_ts_url = i.group("win_ts_url") # 拼接url second_win_ts_url = parse.urljoin(url, second_win_ts_url) # 提取出来js second_InnerJs = requests.get(second_win_ts_url, headers=headers, verify=False).text # 匹配出$_ts second_win_ts = re.findall(r"r='m'>(.*?)", response.text)[1] with open('2.js', mode='r', encoding='utf-8') as f: code = f.read() js_txt2 = code.replace('window.win_ts',second_win_ts).replace('window.zhiyuan111',second_InnerJs).replace('window.content',str(second_content)).replace("window.aaaaa",cookie) ctx2 = MiniRacer() ctx2.eval(js_txt2) #200页面生成的cookie new_cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx2.call("get_cookie", cookie))[0] # Search_First_Page print("200更新cookie为: ", len(new_cookie), new_cookie) session.cookies.set("lqWVdQzgOVyaT", new_cookie) while True: new_cookie = update_cookie(ctx2,session,new_cookie) test_headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Referer": "http://zxgk.court.gov.cn/", "Accept-Language": "zh-CN,zh;q=0.9" } url = "http://zxgk.court.gov.cn/zhixing/" test_response = session.get(url=url,headers=test_headers,verify=False) print(test_response) if test_response.status_code == 200: break time.sleep(1) captcha_str,captchaId = yzm(session,ctx2,new_cookie) while True: new_cookie = update_cookie(ctx2, session, new_cookie) result = get_sxmd(session,captcha_str,captchaId,ctx2) if result == True: break def update_cookie(ctx2,session,cookie): new_cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx2.call("update_cookie", cookie))[0] print("更新cookie为: ", len(new_cookie), new_cookie) session.cookies.set("lqWVdQzgOVyaT", new_cookie) return new_cookie def get_captchaId(): chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] nums = "" for i in range(32): ids = int(random.random() * 61) nums += chars[ids] return nums def yzm(session,ctx2,new_cookie): captcha_headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8", "Referer": "http://zxgk.court.gov.cn/zhixing/", "Accept-Language": "zh-CN,zh;q=0.9" } captchaId = get_captchaId() print("captchaId ===>",captchaId) params = { "captchaId": captchaId, "random": str(random.random()) } try: captcha_str = "" response_captcha = session.get("http://zxgk.court.gov.cn/zhixing/captcha.do",headers=captcha_headers,params=params) print("验证码的响应 ===>",response_captcha) with open("captcha.jpg","wb") as f: f.write(response_captcha.content) if response_captcha.status_code == 200: chaojiying = Chaojiying_Client('15985724690', 'a520520a', '923160') captcha_str = chaojiying.PostPic(response_captcha.content, 1005).get('pic_str') print("验证码的结果 ===>",captcha_str) #激活操作 yzm_url = f"checkyzm.do?captchaId={captchaId}&pCode={captcha_str}" yzm_url = ctx2.call("get_bzxr_hz","GET",yzm_url,captchaId,captcha_str,str(int(time.time() * 1000))) print("yzm_url ===>",yzm_url) new_cookie = update_cookie(ctx2,session,new_cookie) session.cookies.set("lqWVdQzgOVyaT", new_cookie) yzm_url = "http://zxgk.court.gov.cn" + yzm_url print("验证码的验证的url ===>",yzm_url) yzm_response = session.get(url=yzm_url,headers=headers,verify=False) print("验证码的验证 ===>",yzm_response.text) except: print("验证码error") return captcha_str, captchaId def get_sxmd(session,captcha_str,captchaId,ctx2): headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "Accept": "application/json, text/javascript, */*; q=0.01", "X-Requested-With": "XMLHttpRequest", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Origin": "http://zxgk.court.gov.cn", "Referer": "http://zxgk.court.gov.cn/zhixing/", "Accept-Language": "zh-CN,zh;q=0.9" } url = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do" data = { "pName": "张三", "pCardNum": "", "selectCourtId": "0", "pCode": captcha_str, "captchaId": captchaId, "searchCourtName": "全国法院(包含地方各级法院)", "selectCourtArrange": "1", "currentPage": "1" } response = session.post(url=url, headers=headers, data=data, verify=False) print(response.text) print(response) currentTime = str(int(time.time() * 1000)) url = f"newdetail?id=1582536987&j_captcha={captcha_str}&captchaId={captchaId}&_={currentTime}" new_url = ctx2.call("get_bzxr_hz", "GET", url, captchaId, captcha_str, currentTime) new_url = "http://zxgk.court.gov.cn" + new_url print(new_url) response = session.get(new_url, headers=headers, verify=False) print("response.text =====>",response.text) print(response) exit() get_cookie_first("http://zxgk.court.gov.cn/")