zhixing.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. import json
  2. import time
  3. import ddddocr
  4. from RS import Get_Cookie
  5. from Captcha import Init_Captcha
  6. import re
  7. class Get_Info:
  8. def __init__(self):
  9. self.Get_Cookie = Get_Cookie()
  10. self.session, self.ctx200 = self.Get_Cookie.main()
  11. self.updateCookie = self.Get_Cookie.update_cookie
  12. self.initURL = "http://zxgk.court.gov.cn/zhixing"
  13. self.ListURL = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
  14. self.pCode = None
  15. self.ocr = ddddocr.DdddOcr()
  16. self.headers = {
  17. "Connection": "keep-alive",
  18. "Pragma": "no-cache",
  19. "Cache-Control": "no-cache",
  20. "Accept": "application/json, text/javascript, */*; q=0.01",
  21. "X-Requested-With": "XMLHttpRequest",
  22. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  23. "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
  24. "Origin": "http://zxgk.court.gov.cn",
  25. "Referer": "http://zxgk.court.gov.cn/zhixing/",
  26. "Accept-Language": "zh-CN,zh;q=0.9"
  27. }
  28. self.id = []
  29. self.file = open("detail.txt","w",encoding="utf-8")
  30. self.obj_id = re.compile(r'"id":(?P<id>.*?),"jsonObject"',re.S)
  31. def init_page(self):
  32. init_page_response = self.session.get(url=self.initURL,headers=self.headers)
  33. print("init_page_response ===>",init_page_response)
  34. if init_page_response.status_code == 412:
  35. print("cookie失效,重新获取ts")
  36. exit(0)
  37. # print("init_page_response.text ===>",init_page_response.text)
  38. self.updateCookie()
  39. def init_captcha(self):
  40. self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main()
  41. Captcha_headers = {
  42. "Connection": "keep-alive",
  43. "Pragma": "no-cache",
  44. "Cache-Control": "no-cache",
  45. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  46. "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
  47. "Referer": "http://zxgk.court.gov.cn/zhixing/",
  48. "Accept-Language": "zh-CN,zh;q=0.9"
  49. }
  50. CaptchaResponse = self.session.get(url=self.CaptchaURL,headers=Captcha_headers)
  51. print("请求验证码的响应结果 ===>",CaptchaResponse)
  52. if CaptchaResponse.status_code == 412:
  53. print("cookie失效,重新获取ts")
  54. exit(0)
  55. with open("captcha.jpg", 'wb') as f:
  56. f.write(CaptchaResponse.content)
  57. with open("captcha.jpg", 'rb') as f:
  58. image = f.read()
  59. self.pCode = self.ocr.classification(image)
  60. print("验证码的结果 ===>",self.pCode)
  61. self.updateCookie()
  62. def check_Captcha(self):
  63. url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode + ""
  64. checkURL = self.ctx200.call("check_yzm","GET",url,self.captchaId,self.pCode)
  65. checkURL = "http://zxgk.court.gov.cn" + checkURL
  66. print("验证码验证的URL ===>",checkURL)
  67. response = self.session.get(url=checkURL,headers=self.headers)
  68. self.updateCookie()
  69. if response.status_code == 412:
  70. print("cookie失效,重新获取ts")
  71. exit(0)
  72. if response.text.split("\n")[0] == "1":
  73. return True
  74. return False
  75. def check_yzm(self):
  76. while True:
  77. self.init_captcha()
  78. ret = self.check_Captcha()
  79. self.updateCookie()
  80. if ret:
  81. break
  82. else:
  83. print("验证失败,正在进行重新请求验证码并验证")
  84. print("验证码的验证通过")
  85. def Get_List(self, pName, currentPage):
  86. List_data = {
  87. "pName": pName,
  88. "pCardNum": "",
  89. "selectCourtId": "0",
  90. "pCode": self.pCode,
  91. "captchaId": self.captchaId,
  92. "searchCourtName": "全国法院(包含地方各级法院)",
  93. "selectCourtArrange": "1",
  94. "currentPage": currentPage
  95. }
  96. listURL = self.ctx200.call("get_list","POST","searchBzxr.do")
  97. listURL = "http://zxgk.court.gov.cn" + listURL
  98. print("列表页的URL ===>",listURL)
  99. list_response = self.session.post(url=listURL, headers=self.headers, data=List_data)
  100. self.updateCookie()
  101. if list_response.status_code == 412:
  102. print("cookie失效,重新获取ts")
  103. exit(0)
  104. print("列表页的响应 ===>",list_response)
  105. print(list_response.text)
  106. try:
  107. result = self.obj_id.finditer(list_response.text)
  108. for i in result:
  109. self.id.append(i.group("id"))
  110. print("当前列表页的ID存储完毕")
  111. return True
  112. except:
  113. print("需要更新一下验证码")
  114. self.check_yzm()
  115. return False
  116. def Get_Detail(self, i):
  117. #self.pCode = ""
  118. currentTime = str(time.time() * 1000)
  119. DetailURL = "newdetail?id=" + str(self.id[i]) + "&j_captcha=" + self.pCode + "&captchaId=" + self.captchaId
  120. currentURL = self.ctx200.call("get_bzxr_hz","GET",DetailURL,self.captchaId,self.pCode,currentTime,str(self.id[i]))
  121. currentURL = "http://zxgk.court.gov.cn" + currentURL
  122. print("currentURL ===>", currentURL)
  123. detail_response = self.session.get(url=currentURL,headers=self.headers)
  124. self.updateCookie()
  125. if detail_response.status_code == 412:
  126. print("cookie失效,重新获取ts")
  127. exit(0)
  128. id = json.loads(detail_response.text).get("id")
  129. if id != None:
  130. self.file.write(detail_response.content.decode())
  131. self.file.write("\n")
  132. print(f"写入id为 {i} 的内容")
  133. print(detail_response.text)
  134. return True
  135. return False
  136. def main(self):
  137. self.init_page()
  138. self.check_yzm()
  139. index = 1
  140. while True:
  141. if self.Get_List("张三",index):
  142. index += 1
  143. if len(self.id) == 0:
  144. break
  145. for i in range(0,len(self.id)):
  146. if self.Get_Detail(i) == False:
  147. print("获取id=" + str(i) + "的详情的时候出了问题")
  148. self.id = []
  149. if index == len(self.id) + 1:
  150. break
  151. self.file.close()
  152. if __name__ == '__main__':
  153. Get_Info().main()