|
@@ -87,10 +87,9 @@ def data_transform(data: list):
|
|
|
|
|
|
summary_max, summary_max_p_name = get_max_data(data, "$.summary." + i)
|
|
|
latest_date_max, latest_date_max_p_name = get_max_data(data, "$.latest_date." + i)
|
|
|
- if (latest_date_max is None or latest_date_max == '') and (summary_max is None or summary_max == 0):
|
|
|
- # print('这个维度为空...', i, )
|
|
|
- continue
|
|
|
- pass
|
|
|
+
|
|
|
+ winhc_dim_num = json_path(winhc_data, '$.summary.' + i)
|
|
|
+
|
|
|
if latest_date_max is not None and date_part.match(latest_date_max):
|
|
|
latest_date_max = datetime_format_transform(latest_date_max, '%Y年%m月%d日', "%Y-%m-%d %H:%M:%S")
|
|
|
pass
|
|
@@ -102,12 +101,20 @@ def data_transform(data: list):
|
|
|
if winhc_dim_date is not None and date_part.match(winhc_dim_date):
|
|
|
winhc_dim_date = datetime_format_transform(winhc_dim_date, '%Y年%m月%d日', "%Y-%m-%d %H:%M:%S")
|
|
|
pass
|
|
|
+
|
|
|
+ if (latest_date_max is None or latest_date_max == '') and (
|
|
|
+ summary_max is None or summary_max == 0) and winhc_dim_date is None and (
|
|
|
+ winhc_dim_num is None or winhc_dim_num == 0):
|
|
|
+ # print('这个维度为空...', i, )
|
|
|
+ continue
|
|
|
+ pass
|
|
|
+
|
|
|
other_data = {
|
|
|
"id": tmp_json['company_id'] + "_" + ds + "_" + i,
|
|
|
"dim_name": i,
|
|
|
"dim_max_num": summary_max,
|
|
|
"dim_max_num_business_name": summary_max_p_name,
|
|
|
- "winhc_dim_num": json_path(winhc_data, '$.summary.' + i),
|
|
|
+ "winhc_dim_num": winhc_dim_num,
|
|
|
"dim_max_date": latest_date_max,
|
|
|
"dim_max_date_business_name": latest_date_max_p_name,
|
|
|
"winhc_dim_date": winhc_dim_date,
|
|
@@ -165,9 +172,142 @@ def main(max_round: int = 2, interval_of_sed: int = 300):
|
|
|
pass
|
|
|
|
|
|
|
|
|
+# tmp_data = {
|
|
|
+# "_id": "20221214_0000b94de6aa5fba1f4daa0f2c353815_winhc",
|
|
|
+# "base_info": {
|
|
|
+# "cate_first": "租赁和商务服务业",
|
|
|
+# "cate_second": "商务服务业",
|
|
|
+# "cate_third": "旅行社及相关服务",
|
|
|
+# "city": "衢州市",
|
|
|
+# "company_id": "0000b94de6aa5fba1f4daa0f2c353815",
|
|
|
+# "company_name": "龙游县文化旅游发展有限公司",
|
|
|
+# "company_org_type": "有限责任公司(非自然人投资或控股的法人独资)",
|
|
|
+# "county": "龙游县",
|
|
|
+# "credit_code": "91330825573984254D",
|
|
|
+# "org_number": "573984254",
|
|
|
+# "province": "浙江省",
|
|
|
+# "reg_number": "330825000024620"
|
|
|
+# },
|
|
|
+# "competitor_product_name": "winhc",
|
|
|
+# "latest_date": {
|
|
|
+# "严重违法": None,
|
|
|
+# "公示催告": None,
|
|
|
+# "历史变更": "2022-04-15 00:00:00",
|
|
|
+# "双随机抽查": None,
|
|
|
+# "司法拍卖": None,
|
|
|
+# "土地公示": "2022-06-14 00:00:00",
|
|
|
+# "基本信息": "2021-12-17 00:00:00",
|
|
|
+# "失信信息": None,
|
|
|
+# "开庭公告": "2022-10-24 09:00:00",
|
|
|
+# "抽查检查": None,
|
|
|
+# "招投标": None,
|
|
|
+# "欠税公告": None,
|
|
|
+# "法院公告": None,
|
|
|
+# "环保处罚": None,
|
|
|
+# "税收违法": None,
|
|
|
+# "立案信息": "2019-08-21 00:00:00",
|
|
|
+# "终本案件": None,
|
|
|
+# "经营异常": None,
|
|
|
+# "行政处罚": None,
|
|
|
+# "行政许可": "2022-08-17 00:00:00",
|
|
|
+# "被执行人": None,
|
|
|
+# "裁判文书": "2020-05-29 00:00:00",
|
|
|
+# "诉前调解": None,
|
|
|
+# "询价评估": None,
|
|
|
+# "购地信息": "2022-06-27 00:00:00",
|
|
|
+# "送达公告": None,
|
|
|
+# "限制消费": None
|
|
|
+# },
|
|
|
+# "spider_date": "2022-12-14 10:09:43",
|
|
|
+# "summary": {
|
|
|
+# "主要成员": 2,
|
|
|
+# "企业年报": 9,
|
|
|
+# "历史主要成员": 1,
|
|
|
+# "历史变更": 32,
|
|
|
+# "历史对外投资": 0,
|
|
|
+# "历史股东信息": 0,
|
|
|
+# "商标": 97,
|
|
|
+# "土地公示": 11,
|
|
|
+# "对外投资": 21,
|
|
|
+# "股东信息": 1,
|
|
|
+# "行政许可": 10,
|
|
|
+# "裁判文书": 5,
|
|
|
+# "购地信息": 12
|
|
|
+# }
|
|
|
+# }
|
|
|
+#
|
|
|
+# tmp_data_2 = {
|
|
|
+# "_id": "20221215_0000b94de6aa5fba1f4daa0f2c353815_winhc",
|
|
|
+# "base_info": {
|
|
|
+# "cate_first": "租赁和商务服务业",
|
|
|
+# "cate_second": "商务服务业",
|
|
|
+# "cate_third": "旅行社及相关服务",
|
|
|
+# "city": "衢州市",
|
|
|
+# "company_id": "0000b94de6aa5fba1f4daa0f2c353815",
|
|
|
+# "company_name": "龙游县文化旅游发展有限公司",
|
|
|
+# "company_org_type": "有限责任公司(非自然人投资或控股的法人独资)",
|
|
|
+# "county": "龙游县",
|
|
|
+# "credit_code": "91330825573984254D",
|
|
|
+# "org_number": "573984254",
|
|
|
+# "province": "浙江省",
|
|
|
+# "reg_number": "330825000024620"
|
|
|
+# },
|
|
|
+# "competitor_product_name": "qcc",
|
|
|
+# "latest_date": {
|
|
|
+# "严重违法": None,
|
|
|
+# "公示催告": None,
|
|
|
+# "双随机抽查": None,
|
|
|
+# "司法拍卖": None,
|
|
|
+# "失信信息": None,
|
|
|
+# "抽查检查": None,
|
|
|
+# "招投标": None,
|
|
|
+# "欠税公告": None,
|
|
|
+# "法院公告": None,
|
|
|
+# "环保处罚": None,
|
|
|
+# "税收违法": None,
|
|
|
+# "终本案件": None,
|
|
|
+# "经营异常": None,
|
|
|
+# "行政处罚": None,
|
|
|
+# "被执行人": None,
|
|
|
+# "诉前调解": None,
|
|
|
+# "询价评估": None,
|
|
|
+# "送达公告": None,
|
|
|
+# "限制消费": None
|
|
|
+# },
|
|
|
+# "spider_date": "2022-12-14 10:09:43",
|
|
|
+# "summary": {
|
|
|
+# "主要成员": 0,
|
|
|
+# "企业年报": 0,
|
|
|
+# "历史主要成员": 0,
|
|
|
+# "历史变更": 0,
|
|
|
+# "历史对外投资": 0,
|
|
|
+# "历史股东信息": 0,
|
|
|
+# "商标": 0,
|
|
|
+# "土地公示": 0,
|
|
|
+# "对外投资": 0,
|
|
|
+# "股东信息": 0,
|
|
|
+# "行政许可": 0,
|
|
|
+# "裁判文书": 0,
|
|
|
+# "购地信息": None
|
|
|
+# }
|
|
|
+# }
|
|
|
+
|
|
|
+# def test():
|
|
|
+# ds = get_ds()
|
|
|
+# for i in col.find({"_id": {"$regex": "^" + ds}}).batch_size(200):
|
|
|
+# print(map_2_json_str(i))
|
|
|
+# break
|
|
|
+# pass
|
|
|
+#
|
|
|
+# data_transform([tmp_data,tmp_data_2])
|
|
|
+#
|
|
|
+# pass
|
|
|
+
|
|
|
+
|
|
|
if __name__ == '__main__':
|
|
|
+ # test()
|
|
|
+ #
|
|
|
log.info(f"input args: {sys.argv}")
|
|
|
-
|
|
|
parser = argparse.ArgumentParser()
|
|
|
parser.add_argument("-m", "--max-round", type=int, default=2, help='最大迭代轮次')
|
|
|
parser.add_argument("-i", "--interval_of_sed", type=int, default=300, help='每轮间隔时间(秒)')
|