|
@@ -0,0 +1,178 @@
|
|
|
+import json
|
|
|
+import time
|
|
|
+from urllib import parse
|
|
|
+import ddddocr
|
|
|
+import requests
|
|
|
+from rs_zxgk.shixin.RS import Get_Cookie
|
|
|
+from rs_zxgk.shixin.Captcha import Init_Captcha
|
|
|
+import re
|
|
|
+
|
|
|
+
|
|
|
+class Get_Info:
|
|
|
+ def __init__(self):
|
|
|
+ self.Get_Cookie = Get_Cookie()
|
|
|
+ self.session, self.ctx200, self.proxy = self.Get_Cookie.main()
|
|
|
+ self.updateCookie = self.Get_Cookie.update_cookie
|
|
|
+ self.initURL = "http://zxgk.court.gov.cn/shixin"
|
|
|
+ self.CaptchaCheckURL = "http://zxgk.court.gov.cn/shixin/checkyzm.do"
|
|
|
+ self.ListURL = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
|
|
|
+ self.DetailURL = "http://zxgk.court.gov.cn/zhixing/newdetail"
|
|
|
+ self.headers = {
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "Accept": "application/json, text/javascript, */*; q=0.01",
|
|
|
+ "X-Requested-With": "XMLHttpRequest",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
|
|
|
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
|
+ "Origin": "http://zxgk.court.gov.cn",
|
|
|
+ "Referer": "http://zxgk.court.gov.cn/zhixing/",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9"
|
|
|
+ }
|
|
|
+ self.pCode = None
|
|
|
+ self.ocr = ddddocr.DdddOcr()
|
|
|
+ self.totalPage = 0
|
|
|
+ self.Id_CaseCode = {}
|
|
|
+ self.names = ["小明", "小白", "张三", "王五", "张伟"]
|
|
|
+ self.file = open("test.txt", mode="w", encoding="utf-8")
|
|
|
+
|
|
|
+ def init_page(self):
|
|
|
+ self.updateCookie()
|
|
|
+ init_page_response = self.session.get(url=self.initURL, headers=self.headers, proxies=self.proxy)
|
|
|
+ #print("init_page_response ===>", init_page_response)
|
|
|
+ if init_page_response.status_code == 412:
|
|
|
+ print("cookie失效,重新获取ts")
|
|
|
+ return
|
|
|
+ self.updateCookie()
|
|
|
+
|
|
|
+ def init_captcha(self):
|
|
|
+ self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main()
|
|
|
+ # print("获取验证码的URL ===>",self.CaptchaURL)
|
|
|
+ Captcha_headers = {
|
|
|
+ "Connection": "keep-alive",
|
|
|
+ "Pragma": "no-cache",
|
|
|
+ "Cache-Control": "no-cache",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
|
|
|
+ "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
|
|
|
+ "Referer": "http://zxgk.court.gov.cn/zhixing/",
|
|
|
+ "Accept-Language": "zh-CN,zh;q=0.9"
|
|
|
+ }
|
|
|
+ CaptchaResponse = self.session.get(url=self.CaptchaURL, headers=Captcha_headers, proxies=self.proxy)
|
|
|
+ if CaptchaResponse.status_code == 412:
|
|
|
+ print("cookie失效,重新获取ts")
|
|
|
+ return
|
|
|
+ # print("请求验证码的响应结果 ===>",CaptchaResponse)
|
|
|
+ with open("captcha.jpg", 'wb') as f:
|
|
|
+ f.write(CaptchaResponse.content)
|
|
|
+ self.pCode = self.ocr.classification(CaptchaResponse.content)
|
|
|
+ # print("验证码的结果 ===>",self.pCode)
|
|
|
+ self.updateCookie()
|
|
|
+
|
|
|
+ def check_Captcha(self):
|
|
|
+ self.updateCookie()
|
|
|
+ url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode + ""
|
|
|
+ checkURL = self.ctx200.call("check_yzm", "GET", url, self.captchaId, self.pCode)
|
|
|
+ checkURL = "http://zxgk.court.gov.cn" + checkURL
|
|
|
+ # print("验证码验证的URL ===>",checkURL)
|
|
|
+ response = self.session.get(url=checkURL, headers=self.headers, proxies=self.proxy)
|
|
|
+ if response.status_code == 412:
|
|
|
+ print("cookie失效,重新获取ts")
|
|
|
+ exit(0)
|
|
|
+ if response.text.split("\n")[0] == "1":
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+ def check_yzm(self):
|
|
|
+ while True:
|
|
|
+ self.init_captcha()
|
|
|
+ ret = self.check_Captcha()
|
|
|
+ self.updateCookie()
|
|
|
+ if ret == True:
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ print("验证失败,正在进行重新请求验证码并验证")
|
|
|
+ #print("验证码的验证通过")
|
|
|
+
|
|
|
+ def get_list(self, pname, currentPage):
|
|
|
+ # print("执行了get_list")
|
|
|
+ url = "http://zxgk.court.gov.cn"
|
|
|
+ url = url + self.ctx200.call("get_list", "POST", "searchSX.do")
|
|
|
+ #print("请求列表页的URL ===>", url)
|
|
|
+ data = {
|
|
|
+ "pName": pname,
|
|
|
+ "pCardNum": "",
|
|
|
+ "pProvince": "0",
|
|
|
+ "pCode": self.pCode,
|
|
|
+ "captchaId": self.captchaId,
|
|
|
+ "currentPage": str(currentPage)
|
|
|
+ }
|
|
|
+ response = self.session.post(url=url, headers=self.headers, data=data, proxies=self.proxy)
|
|
|
+ #print("response =>", response.text)
|
|
|
+ if response.status_code == 412:
|
|
|
+ print("cookie失效,重新获取ts")
|
|
|
+ return
|
|
|
+ try:
|
|
|
+ result = json.loads(response.text)[0].get("result")
|
|
|
+ self.totalPage = json.loads(response.text)[0].get("totalPage")
|
|
|
+ for ret in result:
|
|
|
+ self.Id_CaseCode[ret["id"]] = ret["caseCode"]
|
|
|
+ #print("self.Id_CaseCode ===>", self.Id_CaseCode)
|
|
|
+ self.updateCookie()
|
|
|
+ return True
|
|
|
+ except:
|
|
|
+ # print(response.content.decode())
|
|
|
+ # print("出现了error,进行刷新验证码操作")
|
|
|
+ self.check_yzm()
|
|
|
+ return False
|
|
|
+
|
|
|
+ def get_detail(self):
|
|
|
+ for id in self.Id_CaseCode:
|
|
|
+ #("当前的id对应的caseCode ===>", self.Id_CaseCode[id])
|
|
|
+ caseCode = parse.quote(self.Id_CaseCode[id])
|
|
|
+ checkURL = "disDetailNew?id=" + str(
|
|
|
+ id) + "&caseCode=" + caseCode + "&pCode=" + self.pCode + "&captchaId=" + self.captchaId
|
|
|
+ ret = self.ctx200.call("get_detail", "GET", checkURL, self.captchaId, self.pCode, str(id), caseCode)
|
|
|
+ ret = "http://zxgk.court.gov.cn" + ret
|
|
|
+ #print("当前请求的详情页 ===>", ret)
|
|
|
+ response = self.session.get(url=ret, headers=self.headers, proxies=self.proxy)
|
|
|
+ if response.status_code == 412:
|
|
|
+ #print("cookie失效,重新获取ts")
|
|
|
+ exit(0)
|
|
|
+ id = json.loads(response.text).get("id")
|
|
|
+ #print("id =>", id)
|
|
|
+ if id == None:
|
|
|
+ #print(response.content.decode())
|
|
|
+ #print("进行刷新验证码操作")
|
|
|
+ self.init_captcha()
|
|
|
+ self.check_Captcha()
|
|
|
+ print("resp.text =>", response.text)
|
|
|
+ self.file.write(response.text)
|
|
|
+ self.file.write("\n")
|
|
|
+ self.updateCookie()
|
|
|
+ self.Id_CaseCode = {}
|
|
|
+
|
|
|
+ def test(self):
|
|
|
+ pass
|
|
|
+
|
|
|
+ def main(self):
|
|
|
+ self.init_page()
|
|
|
+ self.check_yzm()
|
|
|
+ for name in self.names:
|
|
|
+ index = 1
|
|
|
+ self.file.write(f"关键字 =======> {name}")
|
|
|
+ self.file.write("\n")
|
|
|
+ while True:
|
|
|
+ time.sleep(0.5)
|
|
|
+ if self.get_list(name, index):
|
|
|
+ self.get_detail()
|
|
|
+ if index == self.totalPage:
|
|
|
+ break
|
|
|
+ index += 1
|
|
|
+ #print("index =>", index)
|
|
|
+ #print(f"关键字为 {name} 搜索完毕")
|
|
|
+ self.totalPage = 0
|
|
|
+ self.file.close()
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ Get_Info().main()
|