import requests import re,json,random,time from py_mini_racer import MiniRacer from urllib import parse import ddddocr # 用来获取cookie class Get_Cookie(): def __init__(self): self.headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Referer": "http://zxgk.court.gov.cn/", "Accept-Language": "zh-CN,zh;q=0.9" } self.session = requests.session() self.initURL = "http://zxgk.court.gov.cn/" self.ctx200 = MiniRacer() def first_cookie(self): url = self.initURL response = self.session.get(url, headers=self.headers) if response.status_code == 412: print("第一次RS的cookie刷新", response) else: print("第一次RS的cookie刷新 出错") exit() # 拿出content标签 content = re.findall(r'', response.text)[0].replace('"', '') # 获取JS路径后缀 win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', response.text)[0] # 匹配出$_ts win_ts = re.findall(r"r='m'>(.*?)", response.text)[1] # 拼接url win_ts_url = parse.urljoin(self.initURL, win_ts_url) print("RS 412的JS的URL ===>", win_ts_url) # 提取出来js first_InnerJs = self.session.get(win_ts_url, headers=self.headers, verify=False).text with open('1.js', mode='r', encoding='utf-8') as f: code = f.read() JSText = code.replace('win_ts', win_ts).replace('first_InnerJs', first_InnerJs).replace('window.content', str(content)) ctx = MiniRacer() ctx.eval(JSText) FirstCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', ctx.call("get_cookie"))[0] print("FirstCookie ===>", FirstCookie) self.CurrentCookie = FirstCookie cookies = {"lqWVdQzgOVyaT": self.CurrentCookie} self.session.cookies.update(cookies) print("RS的第一次cookie更新完毕") def second_cookie(self): # 验证第一次的cookie是否有效 url = self.initURL response = self.session.get(url, headers=self.headers) if response.status_code == 200: print("获得了第一次的200") else: print(self.session.cookies) print("第一次的200获取失败,退出程序 ===>",response) exit() print(response) # print(response.text) # 拿出content标签 second_content = re.findall(r'', response.text)[0].replace('"', '') # 获取JS路径后缀 obj = re.compile(r'', re.S) second_win_ts_url = "" result = obj.finditer(response.text) for i in result: second_win_ts_url = i.group("win_ts_url") # 拼接JS的URL second_win_ts_url = parse.urljoin(self.initURL, second_win_ts_url) print("RS 200的JS的URL ===>", second_win_ts_url) # 提取JS second_InnerJs = self.session.get(second_win_ts_url, headers=self.headers, verify=False).text # 匹配出$_ts second_win_ts = re.findall(r"r='m'>(.*?)", response.text)[1] with open('2.js', mode='r', encoding='utf-8') as f: code = f.read() JSText2 = code.replace('second_win_ts', second_win_ts).replace('second_InnerJs', second_InnerJs).replace( 'window.content', str(second_content)).replace("CurrentCookie", self.CurrentCookie) self.ctx200.eval(JSText2) self.CurrentCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', self.ctx200.call("get_cookie"))[0] print("self.CurrentCookie ===>", self.CurrentCookie) cookies = {"lqWVdQzgOVyaT": self.CurrentCookie} self.session.cookies.update(cookies) print("RS的第二次cookie更新完毕(200页面的cookie)") def update_cookie(self): self.CurrentCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', self.ctx200.call("get_cookie",self.CurrentCookie))[0] print("self.CurrentCookie ===>", self.CurrentCookie) cookies = {"lqWVdQzgOVyaT": self.CurrentCookie} self.session.cookies.update(cookies) print("============================更新cookie完毕============================") def test_cookie(self): URL = "http://zxgk.court.gov.cn/zhixing/" test_response = self.session.get(url=URL,headers=self.headers) print(test_response) # print(test_response.text) def main(self): self.first_cookie() self.second_cookie() #self.update_cookie() # self.test_cookie() return self.session, self.ctx200 if __name__ == '__main__': Get_Cookie().main()