123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- import requests
- import re,json,random,time
- from py_mini_racer import MiniRacer
- from urllib import parse
- import ddddocr
- # 用来获取cookie
- class Get_Cookie():
- def __init__(self):
- self.headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "Upgrade-Insecure-Requests": "1",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
- "Referer": "http://zxgk.court.gov.cn/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- self.session = requests.session()
- self.initURL = "http://zxgk.court.gov.cn/"
- self.ctx200 = MiniRacer()
- def first_cookie(self):
- url = self.initURL
- response = self.session.get(url, headers=self.headers)
- if response.status_code == 412:
- print("第一次RS的cookie刷新", response)
- else:
- print("第一次RS的cookie刷新 出错")
- exit()
- # 拿出content标签
- content = re.findall(r'<meta content="(.*?) r=\"m">', response.text)[0].replace('"', '')
- # 获取JS路径后缀
- win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', response.text)[0]
- # 匹配出$_ts
- win_ts = re.findall(r"r='m'>(.*?)</script>", response.text)[1]
- # 拼接url
- win_ts_url = parse.urljoin(self.initURL, win_ts_url)
- print("RS 412的JS的URL ===>", win_ts_url)
- # 提取出来js
- first_InnerJs = self.session.get(win_ts_url, headers=self.headers, verify=False).text
- with open('1.js', mode='r', encoding='utf-8') as f:
- code = f.read()
- JSText = code.replace('win_ts', win_ts).replace('first_InnerJs', first_InnerJs).replace('window.content',
- str(content))
- ctx = MiniRacer()
- ctx.eval(JSText)
- FirstCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', ctx.call("get_cookie"))[0]
- print("FirstCookie ===>", FirstCookie)
- self.CurrentCookie = FirstCookie
- cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
- self.session.cookies.update(cookies)
- print("RS的第一次cookie更新完毕")
- def second_cookie(self):
- # 验证第一次的cookie是否有效
- url = self.initURL
- response = self.session.get(url, headers=self.headers)
- if response.status_code == 200:
- print("获得了第一次的200")
- else:
- print(self.session.cookies)
- print("第一次的200获取失败,退出程序 ===>",response)
- exit()
- print(response)
- # print(response.text)
- # 拿出content标签
- second_content = re.findall(r'<meta content="(.*?) r=\"m">', response.text)[0].replace('"', '')
- # 获取JS路径后缀
- obj = re.compile(r'<script type="text/javascript" charset="utf-8" src="(?P<win_ts_url>.*?)" r=\'m\'></script>',
- re.S)
- second_win_ts_url = ""
- result = obj.finditer(response.text)
- for i in result:
- second_win_ts_url = i.group("win_ts_url")
- # 拼接JS的URL
- second_win_ts_url = parse.urljoin(self.initURL, second_win_ts_url)
- print("RS 200的JS的URL ===>", second_win_ts_url)
- # 提取JS
- second_InnerJs = self.session.get(second_win_ts_url, headers=self.headers, verify=False).text
- # 匹配出$_ts
- second_win_ts = re.findall(r"r='m'>(.*?)</script>", response.text)[1]
- with open('2.js', mode='r', encoding='utf-8') as f:
- code = f.read()
- JSText2 = code.replace('second_win_ts', second_win_ts).replace('second_InnerJs', second_InnerJs).replace(
- 'window.content', str(second_content)).replace("CurrentCookie", self.CurrentCookie)
- self.ctx200.eval(JSText2)
- self.CurrentCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', self.ctx200.call("get_cookie"))[0]
- print("self.CurrentCookie ===>", self.CurrentCookie)
- cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
- self.session.cookies.update(cookies)
- print("RS的第二次cookie更新完毕(200页面的cookie)")
- def update_cookie(self):
- self.CurrentCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', self.ctx200.call("get_cookie",self.CurrentCookie))[0]
- print("self.CurrentCookie ===>", self.CurrentCookie)
- cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
- self.session.cookies.update(cookies)
- print("============================更新cookie完毕============================")
- def test_cookie(self):
- URL = "http://zxgk.court.gov.cn/zhixing/"
- test_response = self.session.get(url=URL,headers=self.headers)
- print(test_response)
- # print(test_response.text)
- def main(self):
- self.first_cookie()
- self.second_cookie()
- #self.update_cookie()
- # self.test_cookie()
- return self.session, self.ctx200
- if __name__ == '__main__':
- Get_Cookie().main()
|