123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- import json
- import time
- import ddddocr
- from RS import Get_Cookie
- from Captcha import Init_Captcha
- import re
- class Get_Info:
- def __init__(self):
- self.Get_Cookie = Get_Cookie()
- self.session, self.ctx200 = self.Get_Cookie.main()
- self.updateCookie = self.Get_Cookie.update_cookie
- self.initURL = "http://zxgk.court.gov.cn/zhixing"
- self.ListURL = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
- self.pCode = None
- self.ocr = ddddocr.DdddOcr()
- self.headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "Accept": "application/json, text/javascript, */*; q=0.01",
- "X-Requested-With": "XMLHttpRequest",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
- "Origin": "http://zxgk.court.gov.cn",
- "Referer": "http://zxgk.court.gov.cn/zhixing/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- self.id = []
- self.file = open("detail.txt","w",encoding="utf-8")
- self.obj_id = re.compile(r'"id":(?P<id>.*?),"jsonObject"',re.S)
- def init_page(self):
- init_page_response = self.session.get(url=self.initURL,headers=self.headers)
- print("init_page_response ===>",init_page_response)
- if init_page_response.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- # print("init_page_response.text ===>",init_page_response.text)
- self.updateCookie()
- def init_captcha(self):
- self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main()
- Captcha_headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
- "Referer": "http://zxgk.court.gov.cn/zhixing/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- CaptchaResponse = self.session.get(url=self.CaptchaURL,headers=Captcha_headers)
- print("请求验证码的响应结果 ===>",CaptchaResponse)
- if CaptchaResponse.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- with open("captcha.jpg", 'wb') as f:
- f.write(CaptchaResponse.content)
- with open("captcha.jpg", 'rb') as f:
- image = f.read()
- self.pCode = self.ocr.classification(image)
- print("验证码的结果 ===>",self.pCode)
- self.updateCookie()
- def check_Captcha(self):
- url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode + ""
- checkURL = self.ctx200.call("check_yzm","GET",url,self.captchaId,self.pCode)
- checkURL = "http://zxgk.court.gov.cn" + checkURL
- print("验证码验证的URL ===>",checkURL)
- response = self.session.get(url=checkURL,headers=self.headers)
- self.updateCookie()
- if response.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- if response.text.split("\n")[0] == "1":
- return True
- return False
- def check_yzm(self):
- while True:
- self.init_captcha()
- ret = self.check_Captcha()
- self.updateCookie()
- if ret:
- break
- else:
- print("验证失败,正在进行重新请求验证码并验证")
- print("验证码的验证通过")
- def Get_List(self, pName, currentPage):
- List_data = {
- "pName": pName,
- "pCardNum": "",
- "selectCourtId": "0",
- "pCode": self.pCode,
- "captchaId": self.captchaId,
- "searchCourtName": "全国法院(包含地方各级法院)",
- "selectCourtArrange": "1",
- "currentPage": currentPage
- }
- listURL = self.ctx200.call("get_list","POST","searchBzxr.do")
- listURL = "http://zxgk.court.gov.cn" + listURL
- print("列表页的URL ===>",listURL)
- list_response = self.session.post(url=listURL, headers=self.headers, data=List_data)
- self.updateCookie()
- if list_response.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- print("列表页的响应 ===>",list_response)
- print(list_response.text)
- try:
- result = self.obj_id.finditer(list_response.text)
- for i in result:
- self.id.append(i.group("id"))
- print("当前列表页的ID存储完毕")
- return True
- except:
- print("需要更新一下验证码")
- self.check_yzm()
- return False
- def Get_Detail(self, i):
- #self.pCode = ""
- currentTime = str(time.time() * 1000)
- DetailURL = "newdetail?id=" + str(self.id[i]) + "&j_captcha=" + self.pCode + "&captchaId=" + self.captchaId
- currentURL = self.ctx200.call("get_bzxr_hz","GET",DetailURL,self.captchaId,self.pCode,currentTime,str(self.id[i]))
- currentURL = "http://zxgk.court.gov.cn" + currentURL
- print("currentURL ===>", currentURL)
- detail_response = self.session.get(url=currentURL,headers=self.headers)
- self.updateCookie()
- if detail_response.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- id = json.loads(detail_response.text).get("id")
- if id != None:
- self.file.write(detail_response.content.decode())
- self.file.write("\n")
- print(f"写入id为 {i} 的内容")
- print(detail_response.text)
- return True
- return False
- def main(self):
- self.init_page()
- self.check_yzm()
- index = 1
- while True:
- if self.Get_List("张三",index):
- index += 1
- if len(self.id) == 0:
- break
- for i in range(0,len(self.id)):
- if self.Get_Detail(i) == False:
- print("获取id=" + str(i) + "的详情的时候出了问题")
- self.id = []
- if index == len(self.id) + 1:
- break
- self.file.close()
- if __name__ == '__main__':
- Get_Info().main()
|