import requests
import re,json,random,time
from py_mini_racer import MiniRacer
from urllib import parse
import ddddocr
# 用来获取cookie
class Get_Cookie():
def __init__(self):
self.headers = {
"Connection": "keep-alive",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Referer": "http://zxgk.court.gov.cn/",
"Accept-Language": "zh-CN,zh;q=0.9"
}
self.session = requests.session()
self.initURL = "http://zxgk.court.gov.cn/"
self.ctx200 = MiniRacer()
def first_cookie(self):
url = self.initURL
response = self.session.get(url, headers=self.headers)
if response.status_code == 412:
print("第一次RS的cookie刷新", response)
else:
print("第一次RS的cookie刷新 出错")
exit()
# 拿出content标签
content = re.findall(r'', response.text)[0].replace('"', '')
# 获取JS路径后缀
win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', response.text)[0]
# 匹配出$_ts
win_ts = re.findall(r"r='m'>(.*?)", response.text)[1]
# 拼接url
win_ts_url = parse.urljoin(self.initURL, win_ts_url)
print("RS 412的JS的URL ===>", win_ts_url)
# 提取出来js
first_InnerJs = self.session.get(win_ts_url, headers=self.headers, verify=False).text
with open('1.js', mode='r', encoding='utf-8') as f:
code = f.read()
JSText = code.replace('win_ts', win_ts).replace('first_InnerJs', first_InnerJs).replace('window.content',
str(content))
ctx = MiniRacer()
ctx.eval(JSText)
FirstCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', ctx.call("get_cookie"))[0]
print("FirstCookie ===>", FirstCookie)
self.CurrentCookie = FirstCookie
cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
self.session.cookies.update(cookies)
print("RS的第一次cookie更新完毕")
def second_cookie(self):
# 验证第一次的cookie是否有效
url = self.initURL
response = self.session.get(url, headers=self.headers)
if response.status_code == 200:
print("获得了第一次的200")
else:
print(self.session.cookies)
print("第一次的200获取失败,退出程序 ===>",response)
exit()
print(response)
# print(response.text)
# 拿出content标签
second_content = re.findall(r'', response.text)[0].replace('"', '')
# 获取JS路径后缀
obj = re.compile(r'',
re.S)
second_win_ts_url = ""
result = obj.finditer(response.text)
for i in result:
second_win_ts_url = i.group("win_ts_url")
# 拼接JS的URL
second_win_ts_url = parse.urljoin(self.initURL, second_win_ts_url)
print("RS 200的JS的URL ===>", second_win_ts_url)
# 提取JS
second_InnerJs = self.session.get(second_win_ts_url, headers=self.headers, verify=False).text
# 匹配出$_ts
second_win_ts = re.findall(r"r='m'>(.*?)", response.text)[1]
with open('2.js', mode='r', encoding='utf-8') as f:
code = f.read()
JSText2 = code.replace('second_win_ts', second_win_ts).replace('second_InnerJs', second_InnerJs).replace(
'window.content', str(second_content)).replace("CurrentCookie", self.CurrentCookie)
self.ctx200.eval(JSText2)
self.CurrentCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', self.ctx200.call("get_cookie"))[0]
print("self.CurrentCookie ===>", self.CurrentCookie)
cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
self.session.cookies.update(cookies)
print("RS的第二次cookie更新完毕(200页面的cookie)")
def update_cookie(self):
self.CurrentCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', self.ctx200.call("get_cookie",self.CurrentCookie))[0]
print("self.CurrentCookie ===>", self.CurrentCookie)
cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
self.session.cookies.update(cookies)
print("============================更新cookie完毕============================")
def test_cookie(self):
URL = "http://zxgk.court.gov.cn/zhixing/"
test_response = self.session.get(url=URL,headers=self.headers)
print(test_response)
# print(test_response.text)
def main(self):
self.first_cookie()
self.second_cookie()
#self.update_cookie()
# self.test_cookie()
return self.session, self.ctx200
if __name__ == '__main__':
Get_Cookie().main()