zhongben.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. import json
  2. import re
  3. import sys
  4. import time
  5. import ddddocr
  6. import requests
  7. from RS import Get_Cookie
  8. from Captcha import Init_Captcha
  9. class Get_Info:
  10. def __init__(self):
  11. self.Get_Cookie = Get_Cookie()
  12. self.session, self.ctx200 = self.Get_Cookie.main()
  13. self.updateCookie = self.Get_Cookie.update_cookie
  14. self.initURL = "http://zxgk.court.gov.cn/zhongben/"
  15. self.DetailURL = "http://zxgk.court.gov.cn/zhongben/searchZbDetail"
  16. self.ocr = ddddocr.DdddOcr()
  17. self.headers = {
  18. "Connection": "keep-alive",
  19. "Pragma": "no-cache",
  20. "Cache-Control": "no-cache",
  21. "Accept": "*/*",
  22. "X-Requested-With": "XMLHttpRequest",
  23. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  24. "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
  25. "Origin": "http://zxgk.court.gov.cn",
  26. "Referer": "http://zxgk.court.gov.cn/zhongben/",
  27. "Accept-Language": "zh-CN,zh;q=0.9"
  28. }
  29. self.id = []
  30. self.file = open("detail.txt",mode="w",encoding="utf-8")
  31. self.names = ["张三", "李四", "王五"]
  32. self.totalPage = 2
  33. def init_page(self):
  34. init_page_response = self.session.get(url=self.initURL, headers=self.headers)
  35. print("init_page_response ===>", init_page_response)
  36. if init_page_response.status_code == 412:
  37. print("cookie失效,重新获取ts")
  38. exit(0)
  39. self.updateCookie()
  40. def init_captcha(self):
  41. self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main()
  42. Captcha_headers = {
  43. "Connection": "keep-alive",
  44. "Pragma": "no-cache",
  45. "Cache-Control": "no-cache",
  46. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  47. "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
  48. "Referer": "http://zxgk.court.gov.cn/zhongben/",
  49. "Accept-Language": "zh-CN,zh;q=0.9"
  50. }
  51. print("self.CaptchaURL ===>",self.CaptchaURL)
  52. CaptchaResponse = self.session.get(url=self.CaptchaURL, headers=Captcha_headers)
  53. print("请求验证码的响应结果 ===>", CaptchaResponse)
  54. if CaptchaResponse.status_code == 412:
  55. print("cookie失效,重新获取ts")
  56. exit(0)
  57. with open("captcha.jpg", 'wb') as f:
  58. f.write(CaptchaResponse.content)
  59. with open("captcha.jpg", 'rb') as f:
  60. image = f.read()
  61. self.pCode = self.ocr.classification(image)
  62. print("验证码的结果 ===>", self.pCode)
  63. self.updateCookie()
  64. def check_Captcha(self):
  65. url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode
  66. checkURL = self.ctx200.call("check_yzm", "GET", url, self.captchaId, self.pCode)
  67. checkURL = "http://zxgk.court.gov.cn" + checkURL
  68. print("验证验证码的URL ===>",checkURL)
  69. response = self.session.get(url=checkURL, headers=self.headers)
  70. if response.status_code == 412:
  71. print("cookie失效,重新获取ts")
  72. exit(0)
  73. if response.text.split("\n")[0] == "1":
  74. return True
  75. else:
  76. print("验证码验证失败,退出程序")
  77. return False
  78. def check_yzm(self):
  79. while True:
  80. self.init_captcha()
  81. ret = self.check_Captcha()
  82. self.updateCookie()
  83. if ret:
  84. break
  85. else:
  86. print("验证失败,正在进行重新请求验证码并验证")
  87. print("验证码的验证通过")
  88. def Get_List(self, name, currentPage):
  89. List_data = {
  90. "pName": name,
  91. "pCardNum": "",
  92. "selectCourtId": "0",
  93. "pCode": self.pCode,
  94. "captchaId": self.captchaId,
  95. "searchCourtName": "全国法院(包含地方各级法院)",
  96. "selectCourtArrange": "1",
  97. "currentPage": currentPage
  98. }
  99. ListURL = self.ctx200.call("get_list","POST","search.do")
  100. self.ListURL = "http://zxgk.court.gov.cn" + ListURL
  101. print("获取列表页的URL ===>",self.ListURL)
  102. list_response = self.session.post(url=self.ListURL, headers=self.headers, data=List_data)
  103. if list_response.status_code == 412:
  104. print("cookie失效,重新获取ts")
  105. exit(0)
  106. self.updateCookie()
  107. try:
  108. self.id = re.findall(r'"id":(.*?),"', list_response.text)
  109. print("获取id列表完成")
  110. self.totalPage = int(re.findall(r'"totalPage":(.*?),"',list_response.text)[0])
  111. return True
  112. except:
  113. print("获取self.id失败 重新刷新验证码",list_response.text)
  114. self.check_yzm()
  115. return False
  116. def get_detail(self, id):
  117. #self.captchaId = ""
  118. searchURL = "searchZbDetail?id=" + id + "&j_captcha=" + self.pCode + "&captchaId=" + self.captchaId
  119. DetailURL = self.ctx200.call("get_detail","GET",searchURL,self.captchaId,self.pCode,id)
  120. self.DetailURL = "http://zxgk.court.gov.cn" + DetailURL
  121. print("获取详情的URL ===>",self.DetailURL)
  122. response = self.session.get(url=self.DetailURL,headers=self.headers)
  123. if response.status_code == 412:
  124. print("cookie失效,重新获取ts")
  125. exit(0)
  126. print("详情页 ===>",response.text)
  127. try:
  128. id = json.loads(response.text).get("id")
  129. print("详情id =>",id)
  130. self.file.write(response.text)
  131. self.file.write("\n")
  132. #返回True表示当前的详情写入正确(有数据)
  133. self.updateCookie()
  134. return True
  135. except:
  136. print("验证码出现错误")
  137. self.check_yzm()
  138. return False
  139. def main(self):
  140. self.init_page()
  141. self.check_yzm()
  142. for name in self.names:
  143. self.file.write(f"关键字 =======> {name}")
  144. self.file.write("\n")
  145. # 表示某某的第几页数据
  146. index = 1
  147. while True:
  148. time.sleep(0.5)
  149. if self.Get_List(name, index):
  150. id = 0
  151. while True:
  152. #返回True说明正确
  153. # if len(self.id) == 0:
  154. # break
  155. if id == len(self.id):
  156. break
  157. if self.get_detail(self.id[id]):
  158. id += 1
  159. if index == self.totalPage:
  160. break
  161. index += 1
  162. self.id = []
  163. print("index =>", index)
  164. print(f"关键字为 {name} 搜索完毕")
  165. self.totalPage = 0
  166. self.file.close()
  167. if __name__ == '__main__':
  168. Get_Info().main()