2 gadi atpakaļ · aa337ad729
--- a/测试/shixin/1.js
+++ b/测试/shixin/1.js
--- a/测试/shixin/2.js
+++ b/测试/shixin/2.js
--- a/测试/shixin/Captcha.py
+++ b/测试/shixin/Captcha.py
@@ -0,0 +1,24 @@
 
				+import random
			
 
				+
			
 
				+class Init_Captcha:
			
 
				+    def __init__(self):
			
 
				+        self.init_URL = "http://zxgk.court.gov.cn/shixin/captchaNew.do"
			
 
				+
			
 
				+    def get_captchaId(self):
			
 
				+        chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
			
 
				+                 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
			
 
				+                 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
			
 
				+                 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
			
 
				+                 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
			
 
				+                 'x', 'y', 'z']
			
 
				+        nums = ""
			
 
				+        for i in range(32):
			
 
				+            ids = int(random.random() * 61)
			
 
				+            nums += chars[ids]
			
 
				+        return nums
			
 
				+
			
 
				+    #返回一个验证码的URL
			
 
				+    def main(self):
			
 
				+        captchaId = self.get_captchaId()
			
 
				+        MyRandom = str(random.random())
			
 
				+        return self.init_URL + "?captchaId=" + captchaId + "&random=" + MyRandom, captchaId, MyRandom
			
--- a/测试/shixin/RS.py
+++ b/测试/shixin/RS.py
--- a/测试/shixin/captcha.jpg
+++ b/测试/shixin/captcha.jpg
--- a/测试/shixin/detail.txt
+++ b/测试/shixin/detail.txt
--- a/测试/shixin/shixin.py
+++ b/测试/shixin/shixin.py
@@ -0,0 +1,178 @@
 
				+import json
			
 
				+import time
			
 
				+from urllib import parse
			
 
				+import ddddocr
			
 
				+import requests
			
 
				+from rs_zxgk.shixin.RS import Get_Cookie
			
 
				+from rs_zxgk.shixin.Captcha import Init_Captcha
			
 
				+import re
			
 
				+
			
 
				+
			
 
				+class Get_Info:
			
 
				+    def __init__(self):
			
 
				+        self.Get_Cookie = Get_Cookie()
			
 
				+        self.session, self.ctx200, self.proxy = self.Get_Cookie.main()
			
 
				+        self.updateCookie = self.Get_Cookie.update_cookie
			
 
				+        self.initURL = "http://zxgk.court.gov.cn/shixin"
			
 
				+        self.CaptchaCheckURL = "http://zxgk.court.gov.cn/shixin/checkyzm.do"
			
 
				+        self.ListURL = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
			
 
				+        self.DetailURL = "http://zxgk.court.gov.cn/zhixing/newdetail"
			
 
				+        self.headers = {
			
 
				+            "Connection": "keep-alive",
			
 
				+            "Pragma": "no-cache",
			
 
				+            "Cache-Control": "no-cache",
			
 
				+            "Accept": "application/json, text/javascript, */*; q=0.01",
			
 
				+            "X-Requested-With": "XMLHttpRequest",
			
 
				+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
			
 
				+            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
			
 
				+            "Origin": "http://zxgk.court.gov.cn",
			
 
				+            "Referer": "http://zxgk.court.gov.cn/zhixing/",
			
 
				+            "Accept-Language": "zh-CN,zh;q=0.9"
			
 
				+        }
			
 
				+        self.pCode = None
			
 
				+        self.ocr = ddddocr.DdddOcr()
			
 
				+        self.totalPage = 0
			
 
				+        self.Id_CaseCode = {}
			
 
				+        self.names = ["小明", "小白", "张三", "王五", "张伟"]
			
 
				+        self.file = open("test.txt", mode="w", encoding="utf-8")
			
 
				+
			
 
				+    def init_page(self):
			
 
				+        self.updateCookie()
			
 
				+        init_page_response = self.session.get(url=self.initURL, headers=self.headers, proxies=self.proxy)
			
 
				+        #print("init_page_response ===>", init_page_response)
			
 
				+        if init_page_response.status_code == 412:
			
 
				+            print("cookie失效,重新获取ts")
			
 
				+            return
			
 
				+        self.updateCookie()
			
 
				+
			
 
				+    def init_captcha(self):
			
 
				+        self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main()
			
 
				+        # print("获取验证码的URL ===>",self.CaptchaURL)
			
 
				+        Captcha_headers = {
			
 
				+            "Connection": "keep-alive",
			
 
				+            "Pragma": "no-cache",
			
 
				+            "Cache-Control": "no-cache",
			
 
				+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
			
 
				+            "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
			
 
				+            "Referer": "http://zxgk.court.gov.cn/zhixing/",
			
 
				+            "Accept-Language": "zh-CN,zh;q=0.9"
			
 
				+        }
			
 
				+        CaptchaResponse = self.session.get(url=self.CaptchaURL, headers=Captcha_headers, proxies=self.proxy)
			
 
				+        if CaptchaResponse.status_code == 412:
			
 
				+            print("cookie失效,重新获取ts")
			
 
				+            return
			
 
				+        # print("请求验证码的响应结果 ===>",CaptchaResponse)
			
 
				+        with open("captcha.jpg", 'wb') as f:
			
 
				+            f.write(CaptchaResponse.content)
			
 
				+        self.pCode = self.ocr.classification(CaptchaResponse.content)
			
 
				+        # print("验证码的结果 ===>",self.pCode)
			
 
				+        self.updateCookie()
			
 
				+
			
 
				+    def check_Captcha(self):
			
 
				+        self.updateCookie()
			
 
				+        url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode + ""
			
 
				+        checkURL = self.ctx200.call("check_yzm", "GET", url, self.captchaId, self.pCode)
			
 
				+        checkURL = "http://zxgk.court.gov.cn" + checkURL
			
 
				+        #  print("验证码验证的URL ===>",checkURL)
			
 
				+        response = self.session.get(url=checkURL, headers=self.headers, proxies=self.proxy)
			
 
				+        if response.status_code == 412:
			
 
				+            print("cookie失效,重新获取ts")
			
 
				+            exit(0)
			
 
				+        if response.text.split("\n")[0] == "1":
			
 
				+            return True
			
 
				+        return False
			
 
				+
			
 
				+    def check_yzm(self):
			
 
				+        while True:
			
 
				+            self.init_captcha()
			
 
				+            ret = self.check_Captcha()
			
 
				+            self.updateCookie()
			
 
				+            if ret == True:
			
 
				+                break
			
 
				+            else:
			
 
				+                print("验证失败,正在进行重新请求验证码并验证")
			
 
				+        #print("验证码的验证通过")
			
 
				+
			
 
				+    def get_list(self, pname, currentPage):
			
 
				+        # print("执行了get_list")
			
 
				+        url = "http://zxgk.court.gov.cn"
			
 
				+        url = url + self.ctx200.call("get_list", "POST", "searchSX.do")
			
 
				+        #print("请求列表页的URL ===>", url)
			
 
				+        data = {
			
 
				+            "pName": pname,
			
 
				+            "pCardNum": "",
			
 
				+            "pProvince": "0",
			
 
				+            "pCode": self.pCode,
			
 
				+            "captchaId": self.captchaId,
			
 
				+            "currentPage": str(currentPage)
			
 
				+        }
			
 
				+        response = self.session.post(url=url, headers=self.headers, data=data, proxies=self.proxy)
			
 
				+        #print("response =>", response.text)
			
 
				+        if response.status_code == 412:
			
 
				+            print("cookie失效,重新获取ts")
			
 
				+            return
			
 
				+        try:
			
 
				+            result = json.loads(response.text)[0].get("result")
			
 
				+            self.totalPage = json.loads(response.text)[0].get("totalPage")
			
 
				+            for ret in result:
			
 
				+                self.Id_CaseCode[ret["id"]] = ret["caseCode"]
			
 
				+            #print("self.Id_CaseCode ===>", self.Id_CaseCode)
			
 
				+            self.updateCookie()
			
 
				+            return True
			
 
				+        except:
			
 
				+            # print(response.content.decode())
			
 
				+            # print("出现了error,进行刷新验证码操作")
			
 
				+            self.check_yzm()
			
 
				+            return False
			
 
				+
			
 
				+    def get_detail(self):
			
 
				+        for id in self.Id_CaseCode:
			
 
				+            #("当前的id对应的caseCode ===>", self.Id_CaseCode[id])
			
 
				+            caseCode = parse.quote(self.Id_CaseCode[id])
			
 
				+            checkURL = "disDetailNew?id=" + str(
			
 
				+                id) + "&caseCode=" + caseCode + "&pCode=" + self.pCode + "&captchaId=" + self.captchaId
			
 
				+            ret = self.ctx200.call("get_detail", "GET", checkURL, self.captchaId, self.pCode, str(id), caseCode)
			
 
				+            ret = "http://zxgk.court.gov.cn" + ret
			
 
				+            #print("当前请求的详情页 ===>", ret)
			
 
				+            response = self.session.get(url=ret, headers=self.headers, proxies=self.proxy)
			
 
				+            if response.status_code == 412:
			
 
				+                #print("cookie失效,重新获取ts")
			
 
				+                exit(0)
			
 
				+            id = json.loads(response.text).get("id")
			
 
				+            #print("id =>", id)
			
 
				+            if id == None:
			
 
				+                #print(response.content.decode())
			
 
				+                #print("进行刷新验证码操作")
			
 
				+                self.init_captcha()
			
 
				+                self.check_Captcha()
			
 
				+            print("resp.text =>", response.text)
			
 
				+            self.file.write(response.text)
			
 
				+            self.file.write("\n")
			
 
				+            self.updateCookie()
			
 
				+        self.Id_CaseCode = {}
			
 
				+
			
 
				+    def test(self):
			
 
				+        pass
			
 
				+
			
 
				+    def main(self):
			
 
				+        self.init_page()
			
 
				+        self.check_yzm()
			
 
				+        for name in self.names:
			
 
				+            index = 1
			
 
				+            self.file.write(f"关键字 =======> {name}")
			
 
				+            self.file.write("\n")
			
 
				+            while True:
			
 
				+                time.sleep(0.5)
			
 
				+                if self.get_list(name, index):
			
 
				+                    self.get_detail()
			
 
				+                    if index == self.totalPage:
			
 
				+                        break
			
 
				+                    index += 1
			
 
				+            #print("index =>", index)
			
 
				+            #print(f"关键字为 {name} 搜索完毕")
			
 
				+            self.totalPage = 0
			
 
				+        self.file.close()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    Get_Info().main()
			
--- a/测试/shixin/test.txt
+++ b/测试/shixin/test.txt
--- a/测试/shixin/test/captcha.jpg
+++ b/测试/shixin/test/captcha.jpg
--- a/测试/shixin/test/python多线程.py
+++ b/测试/shixin/test/python多线程.py
@@ -0,0 +1,40 @@
 
				+import queue, requests
			
 
				+from threading import Thread
			
 
				+from rs_zxgk.shixin.shixin import Get_Info
			
 
				+
			
 
				+def spider(data9):
			
 
				+
			
 
				+    data9["obj"].main()
			
 
				+    #  所有的操作    CURD   请求都写到这个函数中   data9 是一个任务   就相当于一个url
			
 
				+    # 获取代理什么的,请求,全部写到这里面
			
 
				+    # TODO requests 里面 timeout 要写,否则线程会卡死
			
 
				+
			
 
				+    # url = 'https://www.baidu.com/'
			
 
				+    # headers = {
			
 
				+    #     'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36',
			
 
				+    # }
			
 
				+    # # requests  必须加timeout 参数,要不然会线程卡死
			
 
				+    # res = requests.get(url=url, headers=headers, timeout=10)
			
 
				+    # print('请求次数为', data9['req'], '开始时间为', data9['s'], '结束时间为', int(time.time()), '结果为： ', res.status_code)
			
 
				+
			
 
				+
			
 
				+q = queue.Queue(20)
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    class Work(Thread):
			
 
				+        def run(self):
			
 
				+            while True:
			
 
				+                spider(q.get())
			
 
				+
			
 
				+
			
 
				+    for i in range(10):  # 控制线程数,相当于开10个线程
			
 
				+        Work().start()
			
 
				+
			
 
				+    import time
			
 
				+    info = Get_Info()
			
 
				+    s = int(time.time())
			
 
				+    try:
			
 
				+        for conn in range(1, 100000):
			
 
				+            q.put({'req': conn, 's': s, "obj": info}, timeout=None)
			
 
				+    except Exception as e:
			
 
				+        pass
			
--- a/测试/shixin/test/test.txt
+++ b/测试/shixin/test/test.txt