Kaynağa Gözat

feat: 数据聚合结束时增加延时

许家凯 2 yıl önce
ebeveyn
işleme
8b7d810a31
2 değiştirilmiş dosya ile 11 ekleme ve 137 silme
  1. 9 135
      spider/cpa_agg.py
  2. 2 2
      utils/xxl_queue.py

+ 9 - 135
spider/cpa_agg.py

@@ -134,10 +134,9 @@ def data_transform(data: list):
 
     log.info('output data: {}'.format(li))
 
-    if li is None or len(li) == 0:
-        return li
+    if li is not None and len(li) > 0:
+        insert_many(li, holo_keys, HOLO_TABLE_NAME, holo_client)
 
-    insert_many(li, holo_keys, HOLO_TABLE_NAME, holo_client)
     del_num = 0
     try:
         del_col.insert_many(data, ordered=False)
@@ -216,138 +215,6 @@ def main(max_round: int = 2, interval_of_sed: int = 300):
     pass
 
 
-# tmp_data = {
-#     "_id": "20221214_0000b94de6aa5fba1f4daa0f2c353815_winhc",
-#     "base_info": {
-#         "cate_first": "租赁和商务服务业",
-#         "cate_second": "商务服务业",
-#         "cate_third": "旅行社及相关服务",
-#         "city": "衢州市",
-#         "company_id": "0000b94de6aa5fba1f4daa0f2c353815",
-#         "company_name": "龙游县文化旅游发展有限公司",
-#         "company_org_type": "有限责任公司(非自然人投资或控股的法人独资)",
-#         "county": "龙游县",
-#         "credit_code": "91330825573984254D",
-#         "org_number": "573984254",
-#         "province": "浙江省",
-#         "reg_number": "330825000024620"
-#     },
-#     "competitor_product_name": "winhc",
-#     "latest_date": {
-#         "严重违法": None,
-#         "公示催告": None,
-#         "历史变更": "2022-04-15 00:00:00",
-#         "双随机抽查": None,
-#         "司法拍卖": None,
-#         "土地公示": "2022-06-14 00:00:00",
-#         "基本信息": "2021-12-17 00:00:00",
-#         "失信信息": None,
-#         "开庭公告": "2022-10-24 09:00:00",
-#         "抽查检查": None,
-#         "招投标": None,
-#         "欠税公告": None,
-#         "法院公告": None,
-#         "环保处罚": None,
-#         "税收违法": None,
-#         "立案信息": "2019-08-21 00:00:00",
-#         "终本案件": None,
-#         "经营异常": None,
-#         "行政处罚": None,
-#         "行政许可": "2022-08-17 00:00:00",
-#         "被执行人": None,
-#         "裁判文书": "2020-05-29 00:00:00",
-#         "诉前调解": None,
-#         "询价评估": None,
-#         "购地信息": "2022-06-27 00:00:00",
-#         "送达公告": None,
-#         "限制消费": None
-#     },
-#     "spider_date": "2022-12-14 10:09:43",
-#     "summary": {
-#         "主要成员": 2,
-#         "企业年报": 9,
-#         "历史主要成员": 1,
-#         "历史变更": 32,
-#         "历史对外投资": 0,
-#         "历史股东信息": 0,
-#         "商标": 97,
-#         "土地公示": 11,
-#         "对外投资": 21,
-#         "股东信息": 1,
-#         "行政许可": 10,
-#         "裁判文书": 5,
-#         "购地信息": 12
-#     }
-# }
-#
-# tmp_data_2 = {
-#     "_id": "20221215_0000b94de6aa5fba1f4daa0f2c353815_winhc",
-#     "base_info": {
-#         "cate_first": "租赁和商务服务业",
-#         "cate_second": "商务服务业",
-#         "cate_third": "旅行社及相关服务",
-#         "city": "衢州市",
-#         "company_id": "0000b94de6aa5fba1f4daa0f2c353815",
-#         "company_name": "龙游县文化旅游发展有限公司",
-#         "company_org_type": "有限责任公司(非自然人投资或控股的法人独资)",
-#         "county": "龙游县",
-#         "credit_code": "91330825573984254D",
-#         "org_number": "573984254",
-#         "province": "浙江省",
-#         "reg_number": "330825000024620"
-#     },
-#     "competitor_product_name": "qcc",
-#     "latest_date": {
-#         "严重违法": None,
-#         "公示催告": None,
-#         "双随机抽查": None,
-#         "司法拍卖": None,
-#         "失信信息": None,
-#         "抽查检查": None,
-#         "招投标": None,
-#         "欠税公告": None,
-#         "法院公告": None,
-#         "环保处罚": None,
-#         "税收违法": None,
-#         "终本案件": None,
-#         "经营异常": None,
-#         "行政处罚": None,
-#         "被执行人": None,
-#         "诉前调解": None,
-#         "询价评估": None,
-#         "送达公告": None,
-#         "限制消费": None
-#     },
-#     "spider_date": "2022-12-14 10:09:43",
-#     "summary": {
-#         "主要成员": 0,
-#         "企业年报": 0,
-#         "历史主要成员": 0,
-#         "历史变更": 0,
-#         "历史对外投资": 0,
-#         "历史股东信息": 0,
-#         "商标": 0,
-#         "土地公示": 0,
-#         "对外投资": 0,
-#         "股东信息": 0,
-#         "行政许可": 0,
-#         "裁判文书": 0,
-#         "购地信息": None
-#     }
-# }
-
-# def test():
-#     ds = get_ds()
-#     for i in col.find({"_id": {"$regex": "^" + ds}}).batch_size(200):
-#         print(map_2_json_str(i))
-#         break
-#         pass
-#
-#     data_transform([tmp_data,tmp_data_2])
-#
-#     pass
-
-
 if __name__ == '__main__':
     # test()
     #
@@ -358,4 +225,11 @@ if __name__ == '__main__':
     args = parser.parse_args()
 
     main(max_round=args.max_round, interval_of_sed=args.interval_of_sed)
+
+    while not q.empty():
+        log.info(f"遍历未结束,队列剩余:{q.qsize()}")
+        time.sleep(300)
+        pass
+
+    log.info(f"遍历完成,队列剩余:{q.qsize()}")
     pass

+ 2 - 2
utils/xxl_queue.py

@@ -114,9 +114,9 @@ class xxl_queue:
         elif self.data[index][0] == key:
             self.data[index][1].append(obj)
         else:
+            _key, _obj_list = self.data[index]
+            del self.index_data[_key]
             if self.overwrite_handle is not None:
-                _key, _obj_list = self.data[index]
-                del self.index_data[_key]
                 self.overwrite_handle(_key, _obj_list)
                 pass