shixin.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. import json
  2. import time
  3. from urllib import parse
  4. import ddddocr
  5. import requests
  6. from rs_zxgk.shixin.RS import Get_Cookie
  7. from rs_zxgk.shixin.Captcha import Init_Captcha
  8. import re
  9. class Get_Info:
  10. def __init__(self):
  11. self.Get_Cookie = Get_Cookie()
  12. self.session, self.ctx200, self.proxy = self.Get_Cookie.main()
  13. self.updateCookie = self.Get_Cookie.update_cookie
  14. self.initURL = "http://zxgk.court.gov.cn/shixin"
  15. self.CaptchaCheckURL = "http://zxgk.court.gov.cn/shixin/checkyzm.do"
  16. self.ListURL = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
  17. self.DetailURL = "http://zxgk.court.gov.cn/zhixing/newdetail"
  18. self.headers = {
  19. "Connection": "keep-alive",
  20. "Pragma": "no-cache",
  21. "Cache-Control": "no-cache",
  22. "Accept": "application/json, text/javascript, */*; q=0.01",
  23. "X-Requested-With": "XMLHttpRequest",
  24. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  25. "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
  26. "Origin": "http://zxgk.court.gov.cn",
  27. "Referer": "http://zxgk.court.gov.cn/zhixing/",
  28. "Accept-Language": "zh-CN,zh;q=0.9"
  29. }
  30. self.pCode = None
  31. self.ocr = ddddocr.DdddOcr()
  32. self.totalPage = 0
  33. self.Id_CaseCode = {}
  34. self.names = ["小明", "小白", "张三", "王五", "张伟"]
  35. self.file = open("test.txt", mode="w", encoding="utf-8")
  36. def init_page(self):
  37. self.updateCookie()
  38. init_page_response = self.session.get(url=self.initURL, headers=self.headers, proxies=self.proxy)
  39. #print("init_page_response ===>", init_page_response)
  40. if init_page_response.status_code == 412:
  41. print("cookie失效,重新获取ts")
  42. return
  43. self.updateCookie()
  44. def init_captcha(self):
  45. self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main()
  46. # print("获取验证码的URL ===>",self.CaptchaURL)
  47. Captcha_headers = {
  48. "Connection": "keep-alive",
  49. "Pragma": "no-cache",
  50. "Cache-Control": "no-cache",
  51. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  52. "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
  53. "Referer": "http://zxgk.court.gov.cn/zhixing/",
  54. "Accept-Language": "zh-CN,zh;q=0.9"
  55. }
  56. CaptchaResponse = self.session.get(url=self.CaptchaURL, headers=Captcha_headers, proxies=self.proxy)
  57. if CaptchaResponse.status_code == 412:
  58. print("cookie失效,重新获取ts")
  59. return
  60. # print("请求验证码的响应结果 ===>",CaptchaResponse)
  61. with open("captcha.jpg", 'wb') as f:
  62. f.write(CaptchaResponse.content)
  63. self.pCode = self.ocr.classification(CaptchaResponse.content)
  64. # print("验证码的结果 ===>",self.pCode)
  65. self.updateCookie()
  66. def check_Captcha(self):
  67. self.updateCookie()
  68. url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode + ""
  69. checkURL = self.ctx200.call("check_yzm", "GET", url, self.captchaId, self.pCode)
  70. checkURL = "http://zxgk.court.gov.cn" + checkURL
  71. # print("验证码验证的URL ===>",checkURL)
  72. response = self.session.get(url=checkURL, headers=self.headers, proxies=self.proxy)
  73. if response.status_code == 412:
  74. print("cookie失效,重新获取ts")
  75. exit(0)
  76. if response.text.split("\n")[0] == "1":
  77. return True
  78. return False
  79. def check_yzm(self):
  80. while True:
  81. self.init_captcha()
  82. ret = self.check_Captcha()
  83. self.updateCookie()
  84. if ret == True:
  85. break
  86. else:
  87. print("验证失败,正在进行重新请求验证码并验证")
  88. #print("验证码的验证通过")
  89. def get_list(self, pname, currentPage):
  90. # print("执行了get_list")
  91. url = "http://zxgk.court.gov.cn"
  92. url = url + self.ctx200.call("get_list", "POST", "searchSX.do")
  93. #print("请求列表页的URL ===>", url)
  94. data = {
  95. "pName": pname,
  96. "pCardNum": "",
  97. "pProvince": "0",
  98. "pCode": self.pCode,
  99. "captchaId": self.captchaId,
  100. "currentPage": str(currentPage)
  101. }
  102. response = self.session.post(url=url, headers=self.headers, data=data, proxies=self.proxy)
  103. #print("response =>", response.text)
  104. if response.status_code == 412:
  105. print("cookie失效,重新获取ts")
  106. return
  107. try:
  108. result = json.loads(response.text)[0].get("result")
  109. self.totalPage = json.loads(response.text)[0].get("totalPage")
  110. for ret in result:
  111. self.Id_CaseCode[ret["id"]] = ret["caseCode"]
  112. #print("self.Id_CaseCode ===>", self.Id_CaseCode)
  113. self.updateCookie()
  114. return True
  115. except:
  116. # print(response.content.decode())
  117. # print("出现了error,进行刷新验证码操作")
  118. self.check_yzm()
  119. return False
  120. def get_detail(self):
  121. for id in self.Id_CaseCode:
  122. #("当前的id对应的caseCode ===>", self.Id_CaseCode[id])
  123. caseCode = parse.quote(self.Id_CaseCode[id])
  124. checkURL = "disDetailNew?id=" + str(
  125. id) + "&caseCode=" + caseCode + "&pCode=" + self.pCode + "&captchaId=" + self.captchaId
  126. ret = self.ctx200.call("get_detail", "GET", checkURL, self.captchaId, self.pCode, str(id), caseCode)
  127. ret = "http://zxgk.court.gov.cn" + ret
  128. #print("当前请求的详情页 ===>", ret)
  129. response = self.session.get(url=ret, headers=self.headers, proxies=self.proxy)
  130. if response.status_code == 412:
  131. #print("cookie失效,重新获取ts")
  132. exit(0)
  133. id = json.loads(response.text).get("id")
  134. #print("id =>", id)
  135. if id == None:
  136. #print(response.content.decode())
  137. #print("进行刷新验证码操作")
  138. self.init_captcha()
  139. self.check_Captcha()
  140. print("resp.text =>", response.text)
  141. self.file.write(response.text)
  142. self.file.write("\n")
  143. self.updateCookie()
  144. self.Id_CaseCode = {}
  145. def test(self):
  146. pass
  147. def main(self):
  148. self.init_page()
  149. self.check_yzm()
  150. for name in self.names:
  151. index = 1
  152. self.file.write(f"关键字 =======> {name}")
  153. self.file.write("\n")
  154. while True:
  155. time.sleep(0.5)
  156. if self.get_list(name, index):
  157. self.get_detail()
  158. if index == self.totalPage:
  159. break
  160. index += 1
  161. #print("index =>", index)
  162. #print(f"关键字为 {name} 搜索完毕")
  163. self.totalPage = 0
  164. self.file.close()
  165. if __name__ == '__main__':
  166. Get_Info().main()