Jelajahi Sumber

feat: 写出holo调整过滤

许家凯 2 tahun lalu
induk
melakukan
7159fef2be
1 mengubah file dengan 146 tambahan dan 6 penghapusan
  1. 146 6
      spider/cpa_agg.py

+ 146 - 6
spider/cpa_agg.py

@@ -87,10 +87,9 @@ def data_transform(data: list):
 
         summary_max, summary_max_p_name = get_max_data(data, "$.summary." + i)
         latest_date_max, latest_date_max_p_name = get_max_data(data, "$.latest_date." + i)
-        if (latest_date_max is None or latest_date_max == '') and (summary_max is None or summary_max == 0):
-            # print('这个维度为空...', i, )
-            continue
-            pass
+
+        winhc_dim_num = json_path(winhc_data, '$.summary.' + i)
+
         if latest_date_max is not None and date_part.match(latest_date_max):
             latest_date_max = datetime_format_transform(latest_date_max, '%Y年%m月%d日', "%Y-%m-%d %H:%M:%S")
             pass
@@ -102,12 +101,20 @@ def data_transform(data: list):
         if winhc_dim_date is not None and date_part.match(winhc_dim_date):
             winhc_dim_date = datetime_format_transform(winhc_dim_date, '%Y年%m月%d日', "%Y-%m-%d %H:%M:%S")
             pass
+
+        if (latest_date_max is None or latest_date_max == '') and (
+                summary_max is None or summary_max == 0) and winhc_dim_date is None and (
+                winhc_dim_num is None or winhc_dim_num == 0):
+            # print('这个维度为空...', i, )
+            continue
+            pass
+
         other_data = {
             "id": tmp_json['company_id'] + "_" + ds + "_" + i,
             "dim_name": i,
             "dim_max_num": summary_max,
             "dim_max_num_business_name": summary_max_p_name,
-            "winhc_dim_num": json_path(winhc_data, '$.summary.' + i),
+            "winhc_dim_num": winhc_dim_num,
             "dim_max_date": latest_date_max,
             "dim_max_date_business_name": latest_date_max_p_name,
             "winhc_dim_date": winhc_dim_date,
@@ -165,9 +172,142 @@ def main(max_round: int = 2, interval_of_sed: int = 300):
     pass
 
 
+# tmp_data = {
+#     "_id": "20221214_0000b94de6aa5fba1f4daa0f2c353815_winhc",
+#     "base_info": {
+#         "cate_first": "租赁和商务服务业",
+#         "cate_second": "商务服务业",
+#         "cate_third": "旅行社及相关服务",
+#         "city": "衢州市",
+#         "company_id": "0000b94de6aa5fba1f4daa0f2c353815",
+#         "company_name": "龙游县文化旅游发展有限公司",
+#         "company_org_type": "有限责任公司(非自然人投资或控股的法人独资)",
+#         "county": "龙游县",
+#         "credit_code": "91330825573984254D",
+#         "org_number": "573984254",
+#         "province": "浙江省",
+#         "reg_number": "330825000024620"
+#     },
+#     "competitor_product_name": "winhc",
+#     "latest_date": {
+#         "严重违法": None,
+#         "公示催告": None,
+#         "历史变更": "2022-04-15 00:00:00",
+#         "双随机抽查": None,
+#         "司法拍卖": None,
+#         "土地公示": "2022-06-14 00:00:00",
+#         "基本信息": "2021-12-17 00:00:00",
+#         "失信信息": None,
+#         "开庭公告": "2022-10-24 09:00:00",
+#         "抽查检查": None,
+#         "招投标": None,
+#         "欠税公告": None,
+#         "法院公告": None,
+#         "环保处罚": None,
+#         "税收违法": None,
+#         "立案信息": "2019-08-21 00:00:00",
+#         "终本案件": None,
+#         "经营异常": None,
+#         "行政处罚": None,
+#         "行政许可": "2022-08-17 00:00:00",
+#         "被执行人": None,
+#         "裁判文书": "2020-05-29 00:00:00",
+#         "诉前调解": None,
+#         "询价评估": None,
+#         "购地信息": "2022-06-27 00:00:00",
+#         "送达公告": None,
+#         "限制消费": None
+#     },
+#     "spider_date": "2022-12-14 10:09:43",
+#     "summary": {
+#         "主要成员": 2,
+#         "企业年报": 9,
+#         "历史主要成员": 1,
+#         "历史变更": 32,
+#         "历史对外投资": 0,
+#         "历史股东信息": 0,
+#         "商标": 97,
+#         "土地公示": 11,
+#         "对外投资": 21,
+#         "股东信息": 1,
+#         "行政许可": 10,
+#         "裁判文书": 5,
+#         "购地信息": 12
+#     }
+# }
+#
+# tmp_data_2 = {
+#     "_id": "20221215_0000b94de6aa5fba1f4daa0f2c353815_winhc",
+#     "base_info": {
+#         "cate_first": "租赁和商务服务业",
+#         "cate_second": "商务服务业",
+#         "cate_third": "旅行社及相关服务",
+#         "city": "衢州市",
+#         "company_id": "0000b94de6aa5fba1f4daa0f2c353815",
+#         "company_name": "龙游县文化旅游发展有限公司",
+#         "company_org_type": "有限责任公司(非自然人投资或控股的法人独资)",
+#         "county": "龙游县",
+#         "credit_code": "91330825573984254D",
+#         "org_number": "573984254",
+#         "province": "浙江省",
+#         "reg_number": "330825000024620"
+#     },
+#     "competitor_product_name": "qcc",
+#     "latest_date": {
+#         "严重违法": None,
+#         "公示催告": None,
+#         "双随机抽查": None,
+#         "司法拍卖": None,
+#         "失信信息": None,
+#         "抽查检查": None,
+#         "招投标": None,
+#         "欠税公告": None,
+#         "法院公告": None,
+#         "环保处罚": None,
+#         "税收违法": None,
+#         "终本案件": None,
+#         "经营异常": None,
+#         "行政处罚": None,
+#         "被执行人": None,
+#         "诉前调解": None,
+#         "询价评估": None,
+#         "送达公告": None,
+#         "限制消费": None
+#     },
+#     "spider_date": "2022-12-14 10:09:43",
+#     "summary": {
+#         "主要成员": 0,
+#         "企业年报": 0,
+#         "历史主要成员": 0,
+#         "历史变更": 0,
+#         "历史对外投资": 0,
+#         "历史股东信息": 0,
+#         "商标": 0,
+#         "土地公示": 0,
+#         "对外投资": 0,
+#         "股东信息": 0,
+#         "行政许可": 0,
+#         "裁判文书": 0,
+#         "购地信息": None
+#     }
+# }
+
+# def test():
+#     ds = get_ds()
+#     for i in col.find({"_id": {"$regex": "^" + ds}}).batch_size(200):
+#         print(map_2_json_str(i))
+#         break
+#         pass
+#
+#     data_transform([tmp_data,tmp_data_2])
+#
+#     pass
+
+
 if __name__ == '__main__':
+    # test()
+    #
     log.info(f"input args: {sys.argv}")
-
     parser = argparse.ArgumentParser()
     parser.add_argument("-m", "--max-round", type=int, default=2, help='最大迭代轮次')
     parser.add_argument("-i", "--interval_of_sed", type=int, default=300, help='每轮间隔时间(秒)')