RS.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. import requests
  2. import re,json,random,time
  3. from py_mini_racer import MiniRacer
  4. from urllib import parse
  5. import ddddocr
  6. # 用来获取cookie
  7. class Get_Cookie():
  8. def __init__(self):
  9. self.headers = {
  10. "Connection": "keep-alive",
  11. "Pragma": "no-cache",
  12. "Cache-Control": "no-cache",
  13. "Upgrade-Insecure-Requests": "1",
  14. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  15. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
  16. "Referer": "http://zxgk.court.gov.cn/",
  17. "Accept-Language": "zh-CN,zh;q=0.9"
  18. }
  19. self.session = requests.session()
  20. self.initURL = "http://zxgk.court.gov.cn/"
  21. self.ctx200 = MiniRacer()
  22. def first_cookie(self):
  23. url = self.initURL
  24. response = self.session.get(url, headers=self.headers)
  25. if response.status_code == 412:
  26. print("第一次RS的cookie刷新", response)
  27. else:
  28. print("第一次RS的cookie刷新 出错")
  29. exit()
  30. # 拿出content标签
  31. content = re.findall(r'<meta content="(.*?) r=\"m">', response.text)[0].replace('"', '')
  32. # 获取JS路径后缀
  33. win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', response.text)[0]
  34. # 匹配出$_ts
  35. win_ts = re.findall(r"r='m'>(.*?)</script>", response.text)[1]
  36. # 拼接url
  37. win_ts_url = parse.urljoin(self.initURL, win_ts_url)
  38. print("RS 412的JS的URL ===>", win_ts_url)
  39. # 提取出来js
  40. first_InnerJs = self.session.get(win_ts_url, headers=self.headers, verify=False).text
  41. with open('1.js', mode='r', encoding='utf-8') as f:
  42. code = f.read()
  43. JSText = code.replace('win_ts', win_ts).replace('first_InnerJs', first_InnerJs).replace('window.content',
  44. str(content))
  45. ctx = MiniRacer()
  46. ctx.eval(JSText)
  47. FirstCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', ctx.call("get_cookie"))[0]
  48. print("FirstCookie ===>", FirstCookie)
  49. self.CurrentCookie = FirstCookie
  50. cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
  51. self.session.cookies.update(cookies)
  52. print("RS的第一次cookie更新完毕")
  53. def second_cookie(self):
  54. # 验证第一次的cookie是否有效
  55. url = self.initURL
  56. response = self.session.get(url, headers=self.headers)
  57. if response.status_code == 200:
  58. print("获得了第一次的200")
  59. else:
  60. print(self.session.cookies)
  61. print("第一次的200获取失败,退出程序 ===>",response)
  62. exit()
  63. print(response)
  64. # print(response.text)
  65. # 拿出content标签
  66. second_content = re.findall(r'<meta content="(.*?) r=\"m">', response.text)[0].replace('"', '')
  67. # 获取JS路径后缀
  68. obj = re.compile(r'<script type="text/javascript" charset="utf-8" src="(?P<win_ts_url>.*?)" r=\'m\'></script>',
  69. re.S)
  70. second_win_ts_url = ""
  71. result = obj.finditer(response.text)
  72. for i in result:
  73. second_win_ts_url = i.group("win_ts_url")
  74. # 拼接JS的URL
  75. second_win_ts_url = parse.urljoin(self.initURL, second_win_ts_url)
  76. print("RS 200的JS的URL ===>", second_win_ts_url)
  77. # 提取JS
  78. second_InnerJs = self.session.get(second_win_ts_url, headers=self.headers, verify=False).text
  79. # 匹配出$_ts
  80. second_win_ts = re.findall(r"r='m'>(.*?)</script>", response.text)[1]
  81. with open('2.js', mode='r', encoding='utf-8') as f:
  82. code = f.read()
  83. JSText2 = code.replace('second_win_ts', second_win_ts).replace('second_InnerJs', second_InnerJs).replace(
  84. 'window.content', str(second_content)).replace("CurrentCookie", self.CurrentCookie)
  85. self.ctx200.eval(JSText2)
  86. self.CurrentCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', self.ctx200.call("get_cookie"))[0]
  87. print("self.CurrentCookie ===>", self.CurrentCookie)
  88. cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
  89. self.session.cookies.update(cookies)
  90. print("RS的第二次cookie更新完毕(200页面的cookie)")
  91. def update_cookie(self):
  92. self.CurrentCookie = re.findall('lqWVdQzgOVyaT=(.*?); path=/;', self.ctx200.call("get_cookie",self.CurrentCookie))[0]
  93. print("self.CurrentCookie ===>", self.CurrentCookie)
  94. cookies = {"lqWVdQzgOVyaT": self.CurrentCookie}
  95. self.session.cookies.update(cookies)
  96. print("============================更新cookie完毕============================")
  97. def test_cookie(self):
  98. URL = "http://zxgk.court.gov.cn/zhixing/"
  99. test_response = self.session.get(url=URL,headers=self.headers)
  100. print(test_response)
  101. # print(test_response.text)
  102. def main(self):
  103. self.first_cookie()
  104. self.second_cookie()
  105. #self.update_cookie()
  106. # self.test_cookie()
  107. return self.session, self.ctx200
  108. if __name__ == '__main__':
  109. Get_Cookie().main()