123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- import requests
- import re,json,random,time
- from py_mini_racer import MiniRacer
- from urllib import parse
- from chaojiying import Chaojiying_Client
- headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "Upgrade-Insecure-Requests": "1",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
- "Referer": "http://zxgk.court.gov.cn/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- def get_cookie_first(url):
- cookies = {}
- session = requests.session()
- # 第一次访问412
- html = session.get(url, headers=headers)
- print("第一次访问的结果 ===>",html)
- # 拿出content标签
- content = re.findall(r'<meta content="(.*?) r=\"m">', html.text)[0].replace('"', '')
- # 获取JS路径后缀
- win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', html.text)[0]
- # 拼接url
- win_ts_url = parse.urljoin(url, win_ts_url)
- #提取出来js
- InnerJs = requests.get(win_ts_url, headers=headers, verify=False).text
- # 匹配出$_ts
- win_ts = re.findall(r"r='m'>(.*?)</script>", html.text)[1]
- with open('1.js', mode='r', encoding='utf-8') as f:
- code = f.read()
- js_txt = code.replace('window.win_ts', win_ts).replace('window.zhiyuan', InnerJs).replace('window.content',
- str(content))
- ctx = MiniRacer()
- ctx.eval(js_txt)
- cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx.call("update_cookie"))[0]
- print("第一次cookie = ", cookie)
- cookies['lqWVdQzgOVyaT'] = cookie
- session.cookies.update(cookies)
- response = session.get(url, headers=headers)
- print("更新412的cookie后请求的结果 ===>",response)
- ##############################################200后的操作########################################
- # 拿出content标签
- second_content = re.findall(r'<meta content="(.*?) r=\"m">', response.text)[0].replace('"', '')
- # 获取JS路径后缀
- obj = re.compile(r'<script type="text/javascript" charset="utf-8" src="(?P<win_ts_url>.*?)" r=\'m\'></script>',
- re.S)
- second_win_ts_url = ""
- result = obj.finditer(response.text)
- for i in result:
- second_win_ts_url = i.group("win_ts_url")
- # 拼接url
- second_win_ts_url = parse.urljoin(url, second_win_ts_url)
- # 提取出来js
- second_InnerJs = requests.get(second_win_ts_url, headers=headers, verify=False).text
- # 匹配出$_ts
- second_win_ts = re.findall(r"r='m'>(.*?)</script>", response.text)[1]
- with open('2.js', mode='r', encoding='utf-8') as f:
- code = f.read()
- js_txt2 = code.replace('window.win_ts',second_win_ts).replace('window.zhiyuan111',second_InnerJs).replace('window.content',str(second_content)).replace("window.aaaaa",cookie)
- ctx2 = MiniRacer()
- ctx2.eval(js_txt2)
- #200页面生成的cookie
- new_cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx2.call("get_cookie", cookie))[0] # Search_First_Page
- print("200更新cookie为: ", len(new_cookie), new_cookie)
- session.cookies.set("lqWVdQzgOVyaT", new_cookie)
- while True:
- new_cookie = update_cookie(ctx2,session,new_cookie)
- test_headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "Upgrade-Insecure-Requests": "1",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
- "Referer": "http://zxgk.court.gov.cn/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- url = "http://zxgk.court.gov.cn/zhixing/"
- test_response = session.get(url=url,headers=test_headers,verify=False)
- print(test_response)
- if test_response.status_code == 200:
- break
- time.sleep(1)
- captcha_str,captchaId = yzm(session,ctx2,new_cookie)
- while True:
- new_cookie = update_cookie(ctx2, session, new_cookie)
- result = get_sxmd(session,captcha_str,captchaId,ctx2)
- if result == True:
- break
- def update_cookie(ctx2,session,cookie):
- new_cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx2.call("update_cookie", cookie))[0]
- print("更新cookie为: ", len(new_cookie), new_cookie)
- session.cookies.set("lqWVdQzgOVyaT", new_cookie)
- return new_cookie
- def get_captchaId():
- chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
- 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
- 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
- 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
- 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
- 'x', 'y', 'z']
- nums = ""
- for i in range(32):
- ids = int(random.random() * 61)
- nums += chars[ids]
- return nums
- def yzm(session,ctx2,new_cookie):
- captcha_headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
- "Referer": "http://zxgk.court.gov.cn/zhixing/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- captchaId = get_captchaId()
- print("captchaId ===>",captchaId)
- params = {
- "captchaId": captchaId,
- "random": str(random.random())
- }
- try:
- captcha_str = ""
- response_captcha = session.get("http://zxgk.court.gov.cn/zhixing/captcha.do",headers=captcha_headers,params=params)
- print("验证码的响应 ===>",response_captcha)
- with open("captcha.jpg","wb") as f:
- f.write(response_captcha.content)
- if response_captcha.status_code == 200:
- chaojiying = Chaojiying_Client('15985724690', 'a520520a', '923160')
- captcha_str = chaojiying.PostPic(response_captcha.content, 1005).get('pic_str')
- print("验证码的结果 ===>",captcha_str)
- #激活操作
- yzm_url = f"checkyzm.do?captchaId={captchaId}&pCode={captcha_str}"
- yzm_url = ctx2.call("get_bzxr_hz","GET",yzm_url,captchaId,captcha_str,str(int(time.time() * 1000)))
- print("yzm_url ===>",yzm_url)
- new_cookie = update_cookie(ctx2,session,new_cookie)
- session.cookies.set("lqWVdQzgOVyaT", new_cookie)
- yzm_url = "http://zxgk.court.gov.cn" + yzm_url
- print("验证码的验证的url ===>",yzm_url)
- yzm_response = session.get(url=yzm_url,headers=headers,verify=False)
- print("验证码的验证 ===>",yzm_response.text)
- except:
- print("验证码error")
- return captcha_str, captchaId
- def get_sxmd(session,captcha_str,captchaId,ctx2):
- headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "Accept": "application/json, text/javascript, */*; q=0.01",
- "X-Requested-With": "XMLHttpRequest",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
- "Origin": "http://zxgk.court.gov.cn",
- "Referer": "http://zxgk.court.gov.cn/zhixing/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- url = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
- data = {
- "pName": "张三",
- "pCardNum": "",
- "selectCourtId": "0",
- "pCode": captcha_str,
- "captchaId": captchaId,
- "searchCourtName": "全国法院(包含地方各级法院)",
- "selectCourtArrange": "1",
- "currentPage": "1"
- }
- response = session.post(url=url, headers=headers, data=data, verify=False)
- print(response.text)
- print(response)
- currentTime = str(int(time.time() * 1000))
- url = f"newdetail?id=1582536987&j_captcha={captcha_str}&captchaId={captchaId}&_={currentTime}"
- new_url = ctx2.call("get_bzxr_hz", "GET", url, captchaId, captcha_str, currentTime)
- new_url = "http://zxgk.court.gov.cn" + new_url
- print(new_url)
- response = session.get(new_url, headers=headers, verify=False)
- print("response.text =====>",response.text)
- print(response)
- exit()
- get_cookie_first("http://zxgk.court.gov.cn/")
|