import json import re import sys import time import ddddocr import requests from RS import Get_Cookie from Captcha import Init_Captcha class Get_Info: def __init__(self): self.Get_Cookie = Get_Cookie() self.session, self.ctx200 = self.Get_Cookie.main() self.updateCookie = self.Get_Cookie.update_cookie self.initURL = "http://zxgk.court.gov.cn/zhongben/" self.DetailURL = "http://zxgk.court.gov.cn/zhongben/searchZbDetail" self.ocr = ddddocr.DdddOcr() self.headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "Accept": "*/*", "X-Requested-With": "XMLHttpRequest", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Origin": "http://zxgk.court.gov.cn", "Referer": "http://zxgk.court.gov.cn/zhongben/", "Accept-Language": "zh-CN,zh;q=0.9" } self.id = [] self.file = open("detail.txt",mode="w",encoding="utf-8") self.names = ["张三", "李四", "王五"] self.totalPage = 2 def init_page(self): init_page_response = self.session.get(url=self.initURL, headers=self.headers) print("init_page_response ===>", init_page_response) if init_page_response.status_code == 412: print("cookie失效,重新获取ts") exit(0) self.updateCookie() def init_captcha(self): self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main() Captcha_headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8", "Referer": "http://zxgk.court.gov.cn/zhongben/", "Accept-Language": "zh-CN,zh;q=0.9" } print("self.CaptchaURL ===>",self.CaptchaURL) CaptchaResponse = self.session.get(url=self.CaptchaURL, headers=Captcha_headers) print("请求验证码的响应结果 ===>", CaptchaResponse) if CaptchaResponse.status_code == 412: print("cookie失效,重新获取ts") exit(0) with open("captcha.jpg", 'wb') as f: f.write(CaptchaResponse.content) with open("captcha.jpg", 'rb') as f: image = f.read() self.pCode = self.ocr.classification(image) print("验证码的结果 ===>", self.pCode) self.updateCookie() def check_Captcha(self): url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode checkURL = self.ctx200.call("check_yzm", "GET", url, self.captchaId, self.pCode) checkURL = "http://zxgk.court.gov.cn" + checkURL print("验证验证码的URL ===>",checkURL) response = self.session.get(url=checkURL, headers=self.headers) if response.status_code == 412: print("cookie失效,重新获取ts") exit(0) if response.text.split("\n")[0] == "1": return True else: print("验证码验证失败,退出程序") return False def check_yzm(self): while True: self.init_captcha() ret = self.check_Captcha() self.updateCookie() if ret: break else: print("验证失败,正在进行重新请求验证码并验证") print("验证码的验证通过") def Get_List(self, name, currentPage): List_data = { "pName": name, "pCardNum": "", "selectCourtId": "0", "pCode": self.pCode, "captchaId": self.captchaId, "searchCourtName": "全国法院(包含地方各级法院)", "selectCourtArrange": "1", "currentPage": currentPage } ListURL = self.ctx200.call("get_list","POST","search.do") self.ListURL = "http://zxgk.court.gov.cn" + ListURL print("获取列表页的URL ===>",self.ListURL) list_response = self.session.post(url=self.ListURL, headers=self.headers, data=List_data) if list_response.status_code == 412: print("cookie失效,重新获取ts") exit(0) self.updateCookie() try: self.id = re.findall(r'"id":(.*?),"', list_response.text) print("获取id列表完成") self.totalPage = int(re.findall(r'"totalPage":(.*?),"',list_response.text)[0]) return True except: print("获取self.id失败 重新刷新验证码",list_response.text) self.check_yzm() return False def get_detail(self, id): #self.captchaId = "" searchURL = "searchZbDetail?id=" + id + "&j_captcha=" + self.pCode + "&captchaId=" + self.captchaId DetailURL = self.ctx200.call("get_detail","GET",searchURL,self.captchaId,self.pCode,id) self.DetailURL = "http://zxgk.court.gov.cn" + DetailURL print("获取详情的URL ===>",self.DetailURL) response = self.session.get(url=self.DetailURL,headers=self.headers) if response.status_code == 412: print("cookie失效,重新获取ts") exit(0) print("详情页 ===>",response.text) try: id = json.loads(response.text).get("id") print("详情id =>",id) self.file.write(response.text) self.file.write("\n") #返回True表示当前的详情写入正确(有数据) self.updateCookie() return True except: print("验证码出现错误") self.check_yzm() return False def main(self): self.init_page() self.check_yzm() for name in self.names: self.file.write(f"关键字 =======> {name}") self.file.write("\n") # 表示某某的第几页数据 index = 1 while True: time.sleep(0.5) if self.Get_List(name, index): id = 0 while True: #返回True说明正确 # if len(self.id) == 0: # break if id == len(self.id): break if self.get_detail(self.id[id]): id += 1 if index == self.totalPage: break index += 1 self.id = [] print("index =>", index) print(f"关键字为 {name} 搜索完毕") self.totalPage = 0 self.file.close() if __name__ == '__main__': Get_Info().main()