Kaiting.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. # from src.plugins.CreditImportExport.CustomsImportExport.CustomsImportExportDetail import *
  2. from urllib import parse
  3. import logging
  4. import ddddocr
  5. import requests
  6. import re,json,random,time
  7. logging.basicConfig(level=logging.INFO)
  8. class Ctu_cookie:
  9. def __init__(self):
  10. self.headers = {
  11. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
  12. "Accept-Language":"zh-CN,zh;q=0.9",
  13. "Cache-Control": "no-cache",
  14. "Connection": "keep-alive",
  15. "Pragma": "no-cache",
  16. "Upgrade-Insecure-Requests": "1",
  17. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
  18. }
  19. self.headers_api = {
  20. "Content-Type": "application/json",
  21. "Accept": "application/json"
  22. }
  23. self.index_url = "http://zxgk.court.gov.cn/zhixing/"
  24. self.url_search = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
  25. self.session = requests.session()
  26. self.cookie =None
  27. self.second_content=None
  28. self.second_innerjs=None
  29. self.second_win_ts =None
  30. def initial(self, options, proxy_queue, index):
  31. logging.debug(f"{self.__class__.__name__}_{proxy_queue}_{index}")
  32. self._options = options
  33. def get_html_202(self, url):
  34. self.session = requests.session()
  35. html = self.session.get(url, headers=self.headers)
  36. content = re.findall(r'<meta content="(.*?) r=\"m">', html.text)[0].replace('"', '')
  37. win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', html.text)[0]
  38. win_ts_url = parse.urljoin(url, win_ts_url)
  39. innerjs = requests.get(win_ts_url, headers=self.headers, verify=False).text
  40. win_ts = re.findall(r"r='m'>(.*?)</script>", html.text, re.S)[1]
  41. return content, innerjs, win_ts
  42. def get_html_200(self, response):
  43. second_content = re.findall(r'<meta content="(.*?) r=\"m">', response.text)[0].replace('"', '')
  44. second_win_ts_url = 'http://zxgk.court.gov.cn/U52nf4AkCaDm/fYlbxzjRpgxD.11afee1.js'
  45. second_innerjs = self.session.get(second_win_ts_url, headers=self.headers, verify=False).text
  46. second_win_ts = re.findall(r"r='m'>(.*?)</script>", response.text)[1]
  47. return second_content, second_innerjs, second_win_ts
  48. def update(self):
  49. """
  50. #刷新cookie
  51. """
  52. data_update = {
  53. 'content': self.second_content,
  54. 'innerjs': self.second_innerjs,
  55. 'win_ts': self.second_win_ts,
  56. 'cookie': self.cookie,
  57. }
  58. result = requests.post('http://127.0.0.1:8006/rs_update_cookie', json=data_update,headers=self.headers_api)
  59. if result.status_code == 200:
  60. self.cookie = json.loads(result.text)['cookie']
  61. self.session.cookies.update({'lqWVdQzgOVyaT':self.cookie})
  62. logging.info(f"更新cookie_length:{len(self.cookie)},cookie:{self.cookie}")
  63. else:
  64. logging.error(f"更新cookie失败,状态码:{result.status_code}")
  65. def get_init_cookie(self, index_url):
  66. """
  67. # 初始化cookie
  68. """
  69. #202
  70. for ret in range(5):
  71. try:
  72. content,innerjs,win_ts = self.get_html_202(index_url)
  73. data = {
  74. 'content': content,
  75. 'innerjs':innerjs,
  76. 'win_ts':win_ts,
  77. }
  78. result = requests.post('http://127.0.0.1:8006/rs_202', json=data, headers=self.headers_api)
  79. self.cookie = json.loads(result.text)['cookie']
  80. self.session.cookies.update({'lqWVdQzgOVyaT':self.cookie})
  81. response = self.session.get(index_url, headers=self.headers)
  82. logging.info(f"412-200状态码:{response.status_code},第一次cookie:{len(self.cookie)},{self.cookie}")
  83. #200
  84. self.second_content,self.second_innerjs,self.second_win_ts = self.get_html_200(response)
  85. break
  86. except Exception as e:
  87. logging.error(f"请求首页报错,{e}")
  88. def get_captchaId(self):
  89. chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
  90. 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  91. 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
  92. 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
  93. 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
  94. 'x', 'y', 'z']
  95. nums = ""
  96. for i in range(32):
  97. ids = int(random.random() * 61)
  98. nums += chars[ids]
  99. return nums
  100. def get_img(self):
  101. """
  102. 验证码部分
  103. """
  104. while True:
  105. self.update()
  106. captchaId = self.get_captchaId()
  107. params = {
  108. "captchaId": captchaId,
  109. "random": str(random.random())
  110. }
  111. try:
  112. response = self.session.get("http://zxgk.court.gov.cn/zhixing/captcha.do", headers=self.headers, params=params)
  113. print("验证码状态码:", response.status_code)
  114. if response.status_code == 200:
  115. ocr = ddddocr.DdddOcr()
  116. yzm_code = ocr.classification(response.content)
  117. return yzm_code,captchaId
  118. except Exception as e:
  119. logging.error(f"请求验证码失败,{e}")
  120. def request_(self):
  121. """
  122. 业务请求
  123. """
  124. for ret in range(6):
  125. yzm_code, captchaId = self.get_img()
  126. self.update()
  127. try:
  128. data = {
  129. "pName": "张三",
  130. "pCardNum": "",
  131. "selectCourtId": "0",
  132. "pCode": yzm_code,
  133. "captchaId": captchaId,
  134. "searchCourtName": "全国法院(包含地方各级法院)",
  135. "selectCourtArrange": "1",
  136. "currentPage": "1"
  137. }
  138. response = self.session.post(self.url_search, headers=self.headers, data=data, timeout=10)
  139. if response.status_code == 200:
  140. return response
  141. elif response.status_code == 502:
  142. print(f"请求搜索页状态码不正确,{response.status_code}")
  143. elif response.status_code == 500:
  144. time.sleep(3)
  145. print(f"对方服务器出错,延时3s,{response.status_code}")
  146. else:
  147. self.get_init_cookie("http://zxgk.court.gov.cn/zhixing/")
  148. print(f"该cookie已失效,重新走一遍412-200流程,{response.status_code}")
  149. except Exception as e:
  150. logging.error(f"请求搜索页报错,{e}")
  151. def get_html(self):
  152. """
  153. 开始拿数据
  154. """
  155. self.get_init_cookie(self.index_url)
  156. for j in range(1, 20):
  157. res = self.request_()
  158. print(f"请求数据{j}页状态码:{res.status_code}")
  159. print(res.text, "\n")
  160. def process(self,condition):
  161. self.get_html()
  162. if __name__ == '__main__':
  163. ctu_cookie = Ctu_cookie()
  164. ctu_cookie.process({})