Pārlūkot izejas kodu

瑞数的cookie测试文件提交

ywb 1 gadu atpakaļ
vecāks
revīzija
aa337ad729

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 2007 - 0
测试/shixin/1.js


Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 7080 - 0
测试/shixin/2.js


+ 24 - 0
测试/shixin/Captcha.py

@@ -0,0 +1,24 @@
+import random
+
+class Init_Captcha:
+    def __init__(self):
+        self.init_URL = "http://zxgk.court.gov.cn/shixin/captchaNew.do"
+
+    def get_captchaId(self):
+        chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
+                 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+                 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
+                 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
+                 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+                 'x', 'y', 'z']
+        nums = ""
+        for i in range(32):
+            ids = int(random.random() * 61)
+            nums += chars[ids]
+        return nums
+
+    #返回一个验证码的URL
+    def main(self):
+        captchaId = self.get_captchaId()
+        MyRandom = str(random.random())
+        return self.init_URL + "?captchaId=" + captchaId + "&random=" + MyRandom, captchaId, MyRandom

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 9225 - 0
测试/shixin/RS.py


BIN
测试/shixin/captcha.jpg


+ 0 - 0
测试/shixin/detail.txt


+ 178 - 0
测试/shixin/shixin.py

@@ -0,0 +1,178 @@
+import json
+import time
+from urllib import parse
+import ddddocr
+import requests
+from rs_zxgk.shixin.RS import Get_Cookie
+from rs_zxgk.shixin.Captcha import Init_Captcha
+import re
+
+
+class Get_Info:
+    def __init__(self):
+        self.Get_Cookie = Get_Cookie()
+        self.session, self.ctx200, self.proxy = self.Get_Cookie.main()
+        self.updateCookie = self.Get_Cookie.update_cookie
+        self.initURL = "http://zxgk.court.gov.cn/shixin"
+        self.CaptchaCheckURL = "http://zxgk.court.gov.cn/shixin/checkyzm.do"
+        self.ListURL = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
+        self.DetailURL = "http://zxgk.court.gov.cn/zhixing/newdetail"
+        self.headers = {
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Cache-Control": "no-cache",
+            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "X-Requested-With": "XMLHttpRequest",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
+            "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+            "Origin": "http://zxgk.court.gov.cn",
+            "Referer": "http://zxgk.court.gov.cn/zhixing/",
+            "Accept-Language": "zh-CN,zh;q=0.9"
+        }
+        self.pCode = None
+        self.ocr = ddddocr.DdddOcr()
+        self.totalPage = 0
+        self.Id_CaseCode = {}
+        self.names = ["小明", "小白", "张三", "王五", "张伟"]
+        self.file = open("test.txt", mode="w", encoding="utf-8")
+
+    def init_page(self):
+        self.updateCookie()
+        init_page_response = self.session.get(url=self.initURL, headers=self.headers, proxies=self.proxy)
+        #print("init_page_response ===>", init_page_response)
+        if init_page_response.status_code == 412:
+            print("cookie失效,重新获取ts")
+            return
+        self.updateCookie()
+
+    def init_captcha(self):
+        self.CaptchaURL, self.captchaId, self.MyRandom = Init_Captcha().main()
+        # print("获取验证码的URL ===>",self.CaptchaURL)
+        Captcha_headers = {
+            "Connection": "keep-alive",
+            "Pragma": "no-cache",
+            "Cache-Control": "no-cache",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
+            "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
+            "Referer": "http://zxgk.court.gov.cn/zhixing/",
+            "Accept-Language": "zh-CN,zh;q=0.9"
+        }
+        CaptchaResponse = self.session.get(url=self.CaptchaURL, headers=Captcha_headers, proxies=self.proxy)
+        if CaptchaResponse.status_code == 412:
+            print("cookie失效,重新获取ts")
+            return
+        # print("请求验证码的响应结果 ===>",CaptchaResponse)
+        with open("captcha.jpg", 'wb') as f:
+            f.write(CaptchaResponse.content)
+        self.pCode = self.ocr.classification(CaptchaResponse.content)
+        # print("验证码的结果 ===>",self.pCode)
+        self.updateCookie()
+
+    def check_Captcha(self):
+        self.updateCookie()
+        url = "checkyzm.do?captchaId=" + self.captchaId + "&pCode=" + self.pCode + ""
+        checkURL = self.ctx200.call("check_yzm", "GET", url, self.captchaId, self.pCode)
+        checkURL = "http://zxgk.court.gov.cn" + checkURL
+        #  print("验证码验证的URL ===>",checkURL)
+        response = self.session.get(url=checkURL, headers=self.headers, proxies=self.proxy)
+        if response.status_code == 412:
+            print("cookie失效,重新获取ts")
+            exit(0)
+        if response.text.split("\n")[0] == "1":
+            return True
+        return False
+
+    def check_yzm(self):
+        while True:
+            self.init_captcha()
+            ret = self.check_Captcha()
+            self.updateCookie()
+            if ret == True:
+                break
+            else:
+                print("验证失败,正在进行重新请求验证码并验证")
+        #print("验证码的验证通过")
+
+    def get_list(self, pname, currentPage):
+        # print("执行了get_list")
+        url = "http://zxgk.court.gov.cn"
+        url = url + self.ctx200.call("get_list", "POST", "searchSX.do")
+        #print("请求列表页的URL ===>", url)
+        data = {
+            "pName": pname,
+            "pCardNum": "",
+            "pProvince": "0",
+            "pCode": self.pCode,
+            "captchaId": self.captchaId,
+            "currentPage": str(currentPage)
+        }
+        response = self.session.post(url=url, headers=self.headers, data=data, proxies=self.proxy)
+        #print("response =>", response.text)
+        if response.status_code == 412:
+            print("cookie失效,重新获取ts")
+            return
+        try:
+            result = json.loads(response.text)[0].get("result")
+            self.totalPage = json.loads(response.text)[0].get("totalPage")
+            for ret in result:
+                self.Id_CaseCode[ret["id"]] = ret["caseCode"]
+            #print("self.Id_CaseCode ===>", self.Id_CaseCode)
+            self.updateCookie()
+            return True
+        except:
+            # print(response.content.decode())
+            # print("出现了error,进行刷新验证码操作")
+            self.check_yzm()
+            return False
+
+    def get_detail(self):
+        for id in self.Id_CaseCode:
+            #("当前的id对应的caseCode ===>", self.Id_CaseCode[id])
+            caseCode = parse.quote(self.Id_CaseCode[id])
+            checkURL = "disDetailNew?id=" + str(
+                id) + "&caseCode=" + caseCode + "&pCode=" + self.pCode + "&captchaId=" + self.captchaId
+            ret = self.ctx200.call("get_detail", "GET", checkURL, self.captchaId, self.pCode, str(id), caseCode)
+            ret = "http://zxgk.court.gov.cn" + ret
+            #print("当前请求的详情页 ===>", ret)
+            response = self.session.get(url=ret, headers=self.headers, proxies=self.proxy)
+            if response.status_code == 412:
+                #print("cookie失效,重新获取ts")
+                exit(0)
+            id = json.loads(response.text).get("id")
+            #print("id =>", id)
+            if id == None:
+                #print(response.content.decode())
+                #print("进行刷新验证码操作")
+                self.init_captcha()
+                self.check_Captcha()
+            print("resp.text =>", response.text)
+            self.file.write(response.text)
+            self.file.write("\n")
+            self.updateCookie()
+        self.Id_CaseCode = {}
+
+    def test(self):
+        pass
+
+    def main(self):
+        self.init_page()
+        self.check_yzm()
+        for name in self.names:
+            index = 1
+            self.file.write(f"关键字 =======> {name}")
+            self.file.write("\n")
+            while True:
+                time.sleep(0.5)
+                if self.get_list(name, index):
+                    self.get_detail()
+                    if index == self.totalPage:
+                        break
+                    index += 1
+            #print("index =>", index)
+            #print(f"关键字为 {name} 搜索完毕")
+            self.totalPage = 0
+        self.file.close()
+
+
+if __name__ == '__main__':
+    Get_Info().main()

Failā izmaiņas netiks attēlotas, jo tās ir par lielu
+ 722 - 0
测试/shixin/test.txt


BIN
测试/shixin/test/captcha.jpg


+ 40 - 0
测试/shixin/test/python多线程.py

@@ -0,0 +1,40 @@
+import queue, requests
+from threading import Thread
+from rs_zxgk.shixin.shixin import Get_Info
+
+def spider(data9):
+
+    data9["obj"].main()
+    #  所有的操作    CURD   请求都写到这个函数中   data9 是一个任务   就相当于一个url
+    # 获取代理什么的,请求,全部写到这里面
+    # TODO requests 里面 timeout 要写,否则线程会卡死
+
+    # url = 'https://www.baidu.com/'
+    # headers = {
+    #     'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36',
+    # }
+    # # requests  必须加timeout 参数,要不然会线程卡死
+    # res = requests.get(url=url, headers=headers, timeout=10)
+    # print('请求次数为', data9['req'], '开始时间为', data9['s'], '结束时间为', int(time.time()), '结果为: ', res.status_code)
+
+
+q = queue.Queue(20)
+if __name__ == "__main__":
+
+    class Work(Thread):
+        def run(self):
+            while True:
+                spider(q.get())
+
+
+    for i in range(10):  # 控制线程数,相当于开10个线程
+        Work().start()
+
+    import time
+    info = Get_Info()
+    s = int(time.time())
+    try:
+        for conn in range(1, 100000):
+            q.put({'req': conn, 's': s, "obj": info}, timeout=None)
+    except Exception as e:
+        pass

+ 0 - 0
测试/shixin/test/test.txt