123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- import json
- import time
- from urllib import parse
- import ddddocr
- import requests
- from RS import Get_Cookie
- from Captcha import Init_Captcha
- import re
- class Get_Info:
- def __init__(self):
- self.Get_Cookie = Get_Cookie()
- self.session, self.ctx200 = self.Get_Cookie.main()
- self.updateCookie = self.Get_Cookie.update_cookie
- self.initURL = "http://zxgk.court.gov.cn/shixin"
- self.CaptchaCheckURL = "http://zxgk.court.gov.cn/shixin/checkyzm.do"
- self.ListURL = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
- self.DetailURL = "http://zxgk.court.gov.cn/zhixing/newdetail"
- self.headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "Accept": "application/json, text/javascript, */*; q=0.01",
- "X-Requested-With": "XMLHttpRequest",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
- "Origin": "http://zxgk.court.gov.cn",
- "Referer": "http://zxgk.court.gov.cn/zhixing/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- self.pCode = None
- self.ocr = ddddocr.DdddOcr()
- self.totalPage = 0
- self.Id_CaseCode = {}
- self.names = ["小明","小白","张三","王五","张伟"]
- self.file = open("test.txt",mode="w",encoding="utf-8")
- def init_page(self):
- init_page_response = self.session.get(url=self.initURL, headers=self.headers)
- print("init_page_response ===>", init_page_response)
- if init_page_response.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- self.updateCookie()
- def init_captcha(self):
- self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main()
- # print("获取验证码的URL ===>",self.CaptchaURL)
- Captcha_headers = {
- "Connection": "keep-alive",
- "Pragma": "no-cache",
- "Cache-Control": "no-cache",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
- "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
- "Referer": "http://zxgk.court.gov.cn/zhixing/",
- "Accept-Language": "zh-CN,zh;q=0.9"
- }
- CaptchaResponse = self.session.get(url=self.CaptchaURL,headers=Captcha_headers)
- if CaptchaResponse.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- # print("请求验证码的响应结果 ===>",CaptchaResponse)
- with open("captcha.jpg", 'wb') as f:
- f.write(CaptchaResponse.content)
- self.pCode = self.ocr.classification(CaptchaResponse.content)
- # print("验证码的结果 ===>",self.pCode)
- self.updateCookie()
- def check_Captcha(self):
- url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode + ""
- checkURL = self.ctx200.call("check_yzm","GET",url,self.captchaId,self.pCode)
- checkURL = "http://zxgk.court.gov.cn" + checkURL
- # print("验证码验证的URL ===>",checkURL)
- response = self.session.get(url=checkURL,headers=self.headers)
- if response.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- if response.text.split("\n")[0] == "1":
- return True
- return False
- def check_yzm(self):
- while True:
- self.init_captcha()
- ret = self.check_Captcha()
- self.updateCookie()
- if ret == True:
- break
- else:
- print("验证失败,正在进行重新请求验证码并验证")
- print("验证码的验证通过")
- def get_list(self, pname, currentPage):
- # print("执行了get_list")
- url = "http://zxgk.court.gov.cn"
- url = url + self.ctx200.call("get_list","POST","searchSX.do")
- print("请求列表页的URL ===>",url)
- data = {
- "pName": pname,
- "pCardNum": "",
- "pProvince": "0",
- "pCode": self.pCode,
- "captchaId": self.captchaId,
- "currentPage": str(currentPage)
- }
- response = self.session.post(url=url,headers=self.headers,data=data)
- print("response =>",response.text)
- if response.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- try:
- result = json.loads(response.text)[0].get("result")
- self.totalPage = json.loads(response.text)[0].get("totalPage")
- for ret in result:
- self.Id_CaseCode[ret["id"]] = ret["caseCode"]
- print("self.Id_CaseCode ===>",self.Id_CaseCode)
- self.updateCookie()
- return True
- except:
- print(response.content.decode())
- print("出现了error,进行刷新验证码操作")
- self.check_yzm()
- return False
- def get_detail(self):
- for id in self.Id_CaseCode:
- print("当前的id对应的caseCode ===>",self.Id_CaseCode[id])
- caseCode = parse.quote(self.Id_CaseCode[id])
- checkURL = "disDetailNew?id=" + str(id) + "&caseCode=" + caseCode + "&pCode="+self.pCode+"&captchaId=" + self.captchaId
- ret = self.ctx200.call("get_detail","GET",checkURL,self.captchaId,self.pCode, str(id) ,caseCode)
- ret = "http://zxgk.court.gov.cn" + ret
- print("当前请求的详情页 ===>",ret)
- response = self.session.get(url=ret,headers=self.headers)
- if response.status_code == 412:
- print("cookie失效,重新获取ts")
- exit(0)
- id = json.loads(response.text).get("id")
- print("id =>",id)
- if id == None:
- print(response.content.decode())
- print("进行刷新验证码操作")
- self.init_captcha()
- self.check_Captcha()
- print("resp.text =>", response.text)
- self.file.write(response.text)
- self.file.write("\n")
- self.updateCookie()
- self.Id_CaseCode = {}
- def test(self):
- pass
- def main(self):
- self.init_page()
- self.check_yzm()
- for name in self.names:
- index = 1
- self.file.write(f"关键字 =======> {name}")
- self.file.write("\n")
- while True:
- time.sleep(0.5)
- if self.get_list(name, index):
- self.get_detail()
- if index == self.totalPage:
- break
- index += 1
- print("index =>",index)
- print(f"关键字为 {name} 搜索完毕")
- self.totalPage = 0
- self.file.close()
- if __name__ == '__main__':
- Get_Info().main()
|