412-200.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import requests
  2. import re,json,random,time
  3. from py_mini_racer import MiniRacer
  4. from urllib import parse
  5. from chaojiying import Chaojiying_Client
  6. headers = {
  7. "Connection": "keep-alive",
  8. "Pragma": "no-cache",
  9. "Cache-Control": "no-cache",
  10. "Upgrade-Insecure-Requests": "1",
  11. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  12. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
  13. "Referer": "http://zxgk.court.gov.cn/",
  14. "Accept-Language": "zh-CN,zh;q=0.9"
  15. }
  16. def get_cookie_first(url):
  17. cookies = {}
  18. session = requests.session()
  19. # 第一次访问412
  20. html = session.get(url, headers=headers)
  21. print("第一次访问的结果 ===>",html)
  22. # 拿出content标签
  23. content = re.findall(r'<meta content="(.*?) r=\"m">', html.text)[0].replace('"', '')
  24. # 获取JS路径后缀
  25. win_ts_url = re.findall('" src="(.*?)" r=\'m\'>', html.text)[0]
  26. # 拼接url
  27. win_ts_url = parse.urljoin(url, win_ts_url)
  28. #提取出来js
  29. InnerJs = requests.get(win_ts_url, headers=headers, verify=False).text
  30. # 匹配出$_ts
  31. win_ts = re.findall(r"r='m'>(.*?)</script>", html.text)[1]
  32. with open('1.js', mode='r', encoding='utf-8') as f:
  33. code = f.read()
  34. js_txt = code.replace('window.win_ts', win_ts).replace('window.zhiyuan', InnerJs).replace('window.content',
  35. str(content))
  36. ctx = MiniRacer()
  37. ctx.eval(js_txt)
  38. cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx.call("update_cookie"))[0]
  39. print("第一次cookie = ", cookie)
  40. cookies['lqWVdQzgOVyaT'] = cookie
  41. session.cookies.update(cookies)
  42. response = session.get(url, headers=headers)
  43. print("更新412的cookie后请求的结果 ===>",response)
  44. ##############################################200后的操作########################################
  45. # 拿出content标签
  46. second_content = re.findall(r'<meta content="(.*?) r=\"m">', response.text)[0].replace('"', '')
  47. # 获取JS路径后缀
  48. obj = re.compile(r'<script type="text/javascript" charset="utf-8" src="(?P<win_ts_url>.*?)" r=\'m\'></script>',
  49. re.S)
  50. second_win_ts_url = ""
  51. result = obj.finditer(response.text)
  52. for i in result:
  53. second_win_ts_url = i.group("win_ts_url")
  54. # 拼接url
  55. second_win_ts_url = parse.urljoin(url, second_win_ts_url)
  56. # 提取出来js
  57. second_InnerJs = requests.get(second_win_ts_url, headers=headers, verify=False).text
  58. # 匹配出$_ts
  59. second_win_ts = re.findall(r"r='m'>(.*?)</script>", response.text)[1]
  60. with open('2.js', mode='r', encoding='utf-8') as f:
  61. code = f.read()
  62. js_txt2 = code.replace('window.win_ts',second_win_ts).replace('window.zhiyuan111',second_InnerJs).replace('window.content',str(second_content)).replace("window.aaaaa",cookie)
  63. ctx2 = MiniRacer()
  64. ctx2.eval(js_txt2)
  65. #200页面生成的cookie
  66. new_cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx2.call("get_cookie", cookie))[0] # Search_First_Page
  67. print("200更新cookie为: ", len(new_cookie), new_cookie)
  68. session.cookies.set("lqWVdQzgOVyaT", new_cookie)
  69. while True:
  70. new_cookie = update_cookie(ctx2,session,new_cookie)
  71. test_headers = {
  72. "Connection": "keep-alive",
  73. "Pragma": "no-cache",
  74. "Cache-Control": "no-cache",
  75. "Upgrade-Insecure-Requests": "1",
  76. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  77. "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
  78. "Referer": "http://zxgk.court.gov.cn/",
  79. "Accept-Language": "zh-CN,zh;q=0.9"
  80. }
  81. url = "http://zxgk.court.gov.cn/zhixing/"
  82. test_response = session.get(url=url,headers=test_headers,verify=False)
  83. print(test_response)
  84. if test_response.status_code == 200:
  85. break
  86. time.sleep(1)
  87. captcha_str,captchaId = yzm(session,ctx2,new_cookie)
  88. while True:
  89. new_cookie = update_cookie(ctx2, session, new_cookie)
  90. result = get_sxmd(session,captcha_str,captchaId,ctx2)
  91. if result == True:
  92. break
  93. def update_cookie(ctx2,session,cookie):
  94. new_cookie = re.findall('lqWVdQzgOVyaT=(.*?); p', ctx2.call("update_cookie", cookie))[0]
  95. print("更新cookie为: ", len(new_cookie), new_cookie)
  96. session.cookies.set("lqWVdQzgOVyaT", new_cookie)
  97. return new_cookie
  98. def get_captchaId():
  99. chars = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
  100. 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  101. 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
  102. 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
  103. 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
  104. 'x', 'y', 'z']
  105. nums = ""
  106. for i in range(32):
  107. ids = int(random.random() * 61)
  108. nums += chars[ids]
  109. return nums
  110. def yzm(session,ctx2,new_cookie):
  111. captcha_headers = {
  112. "Connection": "keep-alive",
  113. "Pragma": "no-cache",
  114. "Cache-Control": "no-cache",
  115. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  116. "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
  117. "Referer": "http://zxgk.court.gov.cn/zhixing/",
  118. "Accept-Language": "zh-CN,zh;q=0.9"
  119. }
  120. captchaId = get_captchaId()
  121. print("captchaId ===>",captchaId)
  122. params = {
  123. "captchaId": captchaId,
  124. "random": str(random.random())
  125. }
  126. try:
  127. captcha_str = ""
  128. response_captcha = session.get("http://zxgk.court.gov.cn/zhixing/captcha.do",headers=captcha_headers,params=params)
  129. print("验证码的响应 ===>",response_captcha)
  130. with open("captcha.jpg","wb") as f:
  131. f.write(response_captcha.content)
  132. if response_captcha.status_code == 200:
  133. chaojiying = Chaojiying_Client('15985724690', 'a520520a', '923160')
  134. captcha_str = chaojiying.PostPic(response_captcha.content, 1005).get('pic_str')
  135. print("验证码的结果 ===>",captcha_str)
  136. #激活操作
  137. yzm_url = f"checkyzm.do?captchaId={captchaId}&pCode={captcha_str}"
  138. yzm_url = ctx2.call("get_bzxr_hz","GET",yzm_url,captchaId,captcha_str,str(int(time.time() * 1000)))
  139. print("yzm_url ===>",yzm_url)
  140. new_cookie = update_cookie(ctx2,session,new_cookie)
  141. session.cookies.set("lqWVdQzgOVyaT", new_cookie)
  142. yzm_url = "http://zxgk.court.gov.cn" + yzm_url
  143. print("验证码的验证的url ===>",yzm_url)
  144. yzm_response = session.get(url=yzm_url,headers=headers,verify=False)
  145. print("验证码的验证 ===>",yzm_response.text)
  146. except:
  147. print("验证码error")
  148. return captcha_str, captchaId
  149. def get_sxmd(session,captcha_str,captchaId,ctx2):
  150. headers = {
  151. "Connection": "keep-alive",
  152. "Pragma": "no-cache",
  153. "Cache-Control": "no-cache",
  154. "Accept": "application/json, text/javascript, */*; q=0.01",
  155. "X-Requested-With": "XMLHttpRequest",
  156. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
  157. "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
  158. "Origin": "http://zxgk.court.gov.cn",
  159. "Referer": "http://zxgk.court.gov.cn/zhixing/",
  160. "Accept-Language": "zh-CN,zh;q=0.9"
  161. }
  162. url = "http://zxgk.court.gov.cn/zhixing/searchBzxr.do"
  163. data = {
  164. "pName": "张三",
  165. "pCardNum": "",
  166. "selectCourtId": "0",
  167. "pCode": captcha_str,
  168. "captchaId": captchaId,
  169. "searchCourtName": "全国法院(包含地方各级法院)",
  170. "selectCourtArrange": "1",
  171. "currentPage": "1"
  172. }
  173. response = session.post(url=url, headers=headers, data=data, verify=False)
  174. print(response.text)
  175. print(response)
  176. currentTime = str(int(time.time() * 1000))
  177. url = f"newdetail?id=1582536987&j_captcha={captcha_str}&captchaId={captchaId}&_={currentTime}"
  178. new_url = ctx2.call("get_bzxr_hz", "GET", url, captchaId, captcha_str, currentTime)
  179. new_url = "http://zxgk.court.gov.cn" + new_url
  180. print(new_url)
  181. response = session.get(new_url, headers=headers, verify=False)
  182. print("response.text =====>",response.text)
  183. print(response)
  184. exit()
  185. get_cookie_first("http://zxgk.court.gov.cn/")