import json import time from urllib import parse import ddddocr import requests from RS import Get_Cookie from Captcha import Init_Captcha import re class Get_Info: def __init__(self): self.Get_Cookie = Get_Cookie() self.session, self.ctx200 = self.Get_Cookie.main() self.updateCookie = self.Get_Cookie.update_cookie self.initURL = "http://zxgk.court.gov.cn/shixin" self.CaptchaCheckURL = "http://zxgk.court.gov.cn/shixin/checkyzm.do" self.ListURL = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do" self.DetailURL = "http://zxgk.court.gov.cn/zhixing/newdetail" self.headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "Accept": "application/json, text/javascript, */*; q=0.01", "X-Requested-With": "XMLHttpRequest", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Origin": "http://zxgk.court.gov.cn", "Referer": "http://zxgk.court.gov.cn/zhixing/", "Accept-Language": "zh-CN,zh;q=0.9" } self.pCode = None self.ocr = ddddocr.DdddOcr() self.totalPage = 0 self.Id_CaseCode = {} self.names = ["小明","小白","张三","王五","张伟"] self.file = open("test.txt",mode="w",encoding="utf-8") def init_page(self): init_page_response = self.session.get(url=self.initURL, headers=self.headers) print("init_page_response ===>", init_page_response) if init_page_response.status_code == 412: print("cookie失效,重新获取ts") exit(0) self.updateCookie() def init_captcha(self): self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main() # print("获取验证码的URL ===>",self.CaptchaURL) Captcha_headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36", "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8", "Referer": "http://zxgk.court.gov.cn/zhixing/", "Accept-Language": "zh-CN,zh;q=0.9" } CaptchaResponse = self.session.get(url=self.CaptchaURL,headers=Captcha_headers) if CaptchaResponse.status_code == 412: print("cookie失效,重新获取ts") exit(0) # print("请求验证码的响应结果 ===>",CaptchaResponse) with open("captcha.jpg", 'wb') as f: f.write(CaptchaResponse.content) self.pCode = self.ocr.classification(CaptchaResponse.content) # print("验证码的结果 ===>",self.pCode) self.updateCookie() def check_Captcha(self): url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode + "" checkURL = self.ctx200.call("check_yzm","GET",url,self.captchaId,self.pCode) checkURL = "http://zxgk.court.gov.cn" + checkURL # print("验证码验证的URL ===>",checkURL) response = self.session.get(url=checkURL,headers=self.headers) if response.status_code == 412: print("cookie失效,重新获取ts") exit(0) if response.text.split("\n")[0] == "1": return True return False def check_yzm(self): while True: self.init_captcha() ret = self.check_Captcha() self.updateCookie() if ret == True: break else: print("验证失败,正在进行重新请求验证码并验证") print("验证码的验证通过") def get_list(self, pname, currentPage): # print("执行了get_list") url = "http://zxgk.court.gov.cn" url = url + self.ctx200.call("get_list","POST","searchSX.do") print("请求列表页的URL ===>",url) data = { "pName": pname, "pCardNum": "", "pProvince": "0", "pCode": self.pCode, "captchaId": self.captchaId, "currentPage": str(currentPage) } response = self.session.post(url=url,headers=self.headers,data=data) print("response =>",response.text) if response.status_code == 412: print("cookie失效,重新获取ts") exit(0) try: result = json.loads(response.text)[0].get("result") self.totalPage = json.loads(response.text)[0].get("totalPage") for ret in result: self.Id_CaseCode[ret["id"]] = ret["caseCode"] print("self.Id_CaseCode ===>",self.Id_CaseCode) self.updateCookie() return True except: print(response.content.decode()) print("出现了error,进行刷新验证码操作") self.check_yzm() return False def get_detail(self): for id in self.Id_CaseCode: print("当前的id对应的caseCode ===>",self.Id_CaseCode[id]) caseCode = parse.quote(self.Id_CaseCode[id]) checkURL = "disDetailNew?id=" + str(id) + "&caseCode=" + caseCode + "&pCode="+self.pCode+"&captchaId=" + self.captchaId ret = self.ctx200.call("get_detail","GET",checkURL,self.captchaId,self.pCode, str(id) ,caseCode) ret = "http://zxgk.court.gov.cn" + ret print("当前请求的详情页 ===>",ret) response = self.session.get(url=ret,headers=self.headers) if response.status_code == 412: print("cookie失效,重新获取ts") exit(0) id = json.loads(response.text).get("id") print("id =>",id) if id == None: print(response.content.decode()) print("进行刷新验证码操作") self.init_captcha() self.check_Captcha() print("resp.text =>", response.text) self.file.write(response.text) self.file.write("\n") self.updateCookie() self.Id_CaseCode = {} def test(self): pass def main(self): self.init_page() self.check_yzm() for name in self.names: index = 1 self.file.write(f"关键字 =======> {name}") self.file.write("\n") while True: time.sleep(0.5) if self.get_list(name, index): self.get_detail() if index == self.totalPage: break index += 1 print("index =>",index) print(f"关键字为 {name} 搜索完毕") self.totalPage = 0 self.file.close() if __name__ == '__main__': Get_Info().main()