浏览代码

feat: winhc的摘要查询

- winhc摘要查询
- 维度字段配置
许家凯 2 年之前
父节点
当前提交
4009ed1e52
共有 3 个文件被更改,包括 558 次插入6 次删除
  1. 72 0
      handle/search_winhc_summary.py
  2. 1 1
      spider/Demo.py
  3. 485 5
      utils/dim_name_mapping.py

+ 72 - 0
handle/search_winhc_summary.py

@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+# @Time : 2022/11/29 15:11
+# @Author : XuJiakai
+# @File : search_winhc_summary
+# @Software: PyCharm
+
+
+from sdk import get_es_sdk
+from utils import map_2_json_str
+
+es_sdk = get_es_sdk("new")
+
+skip_item = ['司法拍卖', '经营异常', '企业年报', '产品信息', '历史变更', '抽查检查', '软件著作权', '作品著作权', '失信信息', '双随机抽查', '融资信息', '股东信息', '行政许可',
+             '专利', '行政处罚', '主要成员', '商标', '被执行人', '终本案件', '限制消费', '询价评估']
+
+
+def _summary_format(summary: map):
+    from utils.dim_name_mapping import get_summary_map
+
+    result_data = {}
+    dim_name = get_summary_map()
+
+    for i in summary:
+        if i not in dim_name:
+            continue
+        info = dim_name[i]
+        item_name = info['item_name']
+        if item_name in result_data:
+            result_data[item_name] = result_data[item_name] + summary[i]
+            pass
+        else:
+            result_data[item_name] = summary[i]
+
+    for i in skip_item:
+        if i in result_data:
+            del result_data[i]
+
+    print(result_data.keys())
+
+    print(map_2_json_str(result_data))
+
+    return result_data
+    pass
+
+
+def search_summary(company_id: str):
+    dsl = {
+        "size": 1000,
+        "query": {
+            "term": {
+                "_id": {
+                    "value": company_id
+                }
+            }
+        }
+    }
+    res = es_sdk.query(index='ng_rt_summary_company', doc_type='_doc', dsl=dsl)
+
+    res_map = {}
+
+    for i in res:
+        m = i['summary']
+        for j in m:
+            res_map[j] = m[j]
+
+    print(map_2_json_str(res_map))
+    return _summary_format(res_map)
+
+
+if __name__ == '__main__':
+    search_summary(company_id='64406e27d43838e78aae09d8096ef7ff')
+    pass

+ 1 - 1
spider/Demo.py

@@ -21,7 +21,7 @@ input_args = {
 output_args = {
     "_id": "",  # concat_ws('_',company_id,competitor_product_name,spider_date)
     "base_info": input_args,
-    "competitor_product_name": "X查查",
+    "competitor_product_name": "qcc",
     "summary": {
         "股东信息": 1
     },

+ 485 - 5
utils/dim_name_mapping.py

@@ -20,11 +20,383 @@ _map_class = {
 _history_item_key = ['经营异常', '严重违法', '被执行人', '失信信息', '终本案件', '限制高消费', '法院公告', '行政处罚', '动产抵押', '土地抵押', '开庭公告',
                      '股权出质-质权人', '股权出质', '股权冻结', ]
 
-_map_latest_date = {
-    "历史变更": {"latest_field": "", "latest_name": ""}
-
+_winhc_dim_map = {
+    "股东信息": {
+        "dim_name": "company_holder",
+        "summary_key": "",
+        "history_summary_key": "",
+        "history": True,
+        "sort_field": None
+    },
+    "主要成员": {
+        "dim_name": "company_staff",
+        "history": True,
+        "sort_field": None
+    },
+    "对外投资": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    # "总公司": {
+    #     "dim_name": "",
+    #     "sort_field": None
+    # },
+    "分支机构": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "历史变更": {
+        "dim_name": "company_change",
+        "summary_key": "", "sort_field": ""
+    },
+    "企业年报": {
+        "dim_name": "company_annual_report",
+        "summary_key": "", "sort_field": ""
+    },
+    "司法案件": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "失信信息": {
+        "dim_name": "company_dishonest_info",
+        "summary_key": "", "sort_field": ""
+    },
+    "被执行人": {
+        "dim_name": "company_zxr",
+        "summary_key": "", "sort_field": ""
+    },
+    "限制消费": {
+        "dim_name": "company_zxr_restrict",
+        "summary_key": "", "sort_field": ""
+    },
+    "终本案件": {
+        "dim_name": "company_zxr_final_case",
+        "summary_key": "", "sort_field": ""
+    },
+    "送达公告": {
+        "dim_name": "company_send_announcement",
+        "summary_key": "", "sort_field": ""
+    },
+    "裁判文书": {
+        "dim_name": "wenshu_detail_v2",
+        "summary_key": "", "sort_field": ""
+    },
+    "开庭公告": {
+        "dim_name": "company_court_open_announcement",
+        "summary_key": "", "sort_field": ""
+    },
+    "立案信息": {
+        "dim_name": "company_court_register",
+        "summary_key": "", "sort_field": ""
+    },
+    "法院公告": {
+        "dim_name": "company_court_announcement",
+        "summary_key": "", "sort_field": ""
+    },
+    "股权冻结": {
+        "dim_name": "company_judicial_assistance",
+        "summary_key": "", "sort_field": ""
+    },
+    "破产重整": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "司法拍卖": {
+        "dim_name": "auction_tracking",
+        "summary_key": "", "sort_field": ""
+    },
+    "询价评估": {
+        "dim_name": "zxr_evaluate,zxr_evaluate_results",
+        "summary_key": "", "sort_field": ""
+    },
+    "限制出境": {
+        "dim_name": "restrictions_on_exit",
+        "summary_key": "", "sort_field": ""
+    },
+    "诉前调解": {
+        "dim_name": "litigation_mediation",
+        "summary_key": "", "sort_field": ""
+    },
+    "破产债权": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "行政处罚": {
+        "dim_name": "company_punishment_info,company_punishment_info_creditchina",
+        "summary_key": "", "sort_field": ""
+    },
+    "经营异常": {
+        "dim_name": "company_abnormal_info",
+        "summary_key": "", "sort_field": ""
+    },
+    "股权出质": {  # todo
+        "dim_name": "company_equity_info",
+        "summary_key": "", "sort_field": ""
+    },
+    "减资记录": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "土地转让-出让人": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "土地抵押-抵押人": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "动产抵押": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "环保处罚": {
+        "dim_name": "company_env_punishment",
+        "summary_key": "", "sort_field": ""
+    },
+    "税收违法": {
+        "dim_name": "company_tax_contravention",
+        "summary_key": "", "sort_field": ""
+    },
+    "欠税公告": {
+        "dim_name": "company_own_tax",
+        "summary_key": "", "sort_field": ""
+    },
+    "公示催告": {
+        "dim_name": "company_public_announcement",
+        "summary_key": "", "sort_field": ""
+    },
+    "严重违法": {
+        "dim_name": "company_illegal_info",
+        "summary_key": "", "sort_field": ""
+    },
+    "简易注销": {
+        "dim_name": "company_brief_cancel_announcement",
+        "summary_key": "", "sort_field": ""
+    },
+    "解散清算": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "股权质押": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "股权质押-质押人": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "知识产权出质": {
+        "dim_name": "company_ipr_pledge",
+        "summary_key": "", "sort_field": ""
+    },
+    "注销备案": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "招投标": {
+        "dim_name": "company_bid",
+        "summary_key": "", "sort_field": ""
+    },
+    "招聘信息": {
+        "dim_name": "company_employment",
+        "summary_key": "", "sort_field": ""
+    },
+    "特殊资质": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "进出口信用": {  # todo
+        "dim_name": "company_customs_credit",
+        "summary_key": "", "sort_field": ""
+    },
+    "行政许可": {
+        "dim_name": "company_license,company_license_entpub,company_license_creditchina",
+        "summary_key": "", "sort_field": ""
+    },
+    "土地公示": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "购地信息": {
+        "dim_name": "company_land_announcement",
+        "summary_key": "", "sort_field": ""
+    },
+    "土地转让-受让人": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "土地抵押-抵押权人": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "信用评级": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "税务信用": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "融资信息": {
+        "dim_name": "company_finance",
+        "summary_key": "", "sort_field": ""
+    },
+    "债券信息": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "股权出质-质权人": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "产品信息": {
+        "dim_name": "company_app_info",
+        "summary_key": "", "sort_field": ""
+    },
+    "主要客户": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "供应商": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "抽查检查": {
+        "dim_name": "company_check_info",
+        "summary_key": "", "sort_field": ""
+    },
+    "产权交易": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "双随机抽查": {
+        "dim_name": "company_double_random_check_info",
+        "summary_key": "", "sort_field": ""
+    },
+    "商标": {
+        "dim_name": "company_tm",
+        "summary_key": "", "sort_field": ""
+    },
+    "专利": {
+        "dim_name": "company_patent",
+        "summary_key": "", "sort_field": ""
+    },
+    "软件著作权": {
+        "dim_name": "company_copyright_reg",
+        "summary_key": "", "sort_field": ""
+    },
+    "作品著作权": {
+        "dim_name": "company_copyright_works",
+        "summary_key": "", "sort_field": ""
+    },
+    "网站域名": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "建筑资质": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "建筑人员": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "建筑项目": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "应收账款融资": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "DCM注册额度": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "债券融资": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "境外债券融资": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "银行借款": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "授信额度": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "租赁融资": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "信托融资": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
+    "其他融资": {
+        "dim_name": "",
+        "summary_key": "", "sort_field": ""
+    },
 }
 
+_summary_dim_2_name = {}
+list = ['行政处罚', '企业年报', '历史变更', '主要成员', '股东信息']
+
+
+_item_2_class = {}
+for i in _map_class:
+    for j in _map_class[i]:
+        _item_2_class[j] = i
+    pass
+
+for i in _winhc_dim_map:
+    # if i not in list:
+    #     continue
+
+    if 'summary_key' in _winhc_dim_map[i] and _winhc_dim_map[i]['summary_key'] != '':
+        summary_key = _winhc_dim_map[i]['summary_key']
+        for j in summary_key.split(','):
+            _summary_dim_2_name[j] = _winhc_dim_map[i].copy()
+            _summary_dim_2_name[j]['item_name'] = i
+            _summary_dim_2_name[j]['class'] = _item_2_class[i]
+        pass
+    else:
+        dim_name = _winhc_dim_map[i]['dim_name']
+        if dim_name is None or dim_name == '':
+            continue
+        for j in dim_name.split(','):
+            _summary_dim_2_name[j + "_del_0"] = _winhc_dim_map[i].copy()
+            _summary_dim_2_name[j + "_del_0"]['item_name'] = i
+            _summary_dim_2_name[j + "_del_0"]['class'] = _item_2_class[i]
+            pass
+
+    if 'history_summary_key' in _winhc_dim_map[i] and _winhc_dim_map[i]['history_summary_key'] != '':
+        history_summary_key = _winhc_dim_map[i]['history_summary_key']
+        for j in history_summary_key.split(','):
+            _summary_dim_2_name[j] = _winhc_dim_map[i].copy()
+            _summary_dim_2_name[j]['item_name'] = '历史' + i
+            _summary_dim_2_name[j]['class'] = '历史信息'
+        pass
+    elif 'history' in _winhc_dim_map[i] and _winhc_dim_map[i]['history']:
+        dim_name = _winhc_dim_map[i]['dim_name']
+        if dim_name is None or dim_name == '':
+            continue
+        for j in dim_name.split(','):
+            _summary_dim_2_name[j + "_del_1"] = _winhc_dim_map[i].copy()
+            _summary_dim_2_name[j + "_del_1"]['item_name'] = '历史' + i
+            _summary_dim_2_name[j + "_del_1"]['class'] = '历史信息'
+            pass
+    pass
+
+
+def get_summary_map():
+    return _summary_dim_2_name
+
+
 _map = {
     "accounts_receivablae": "应收账款融资", "auction_tracking": "司法拍卖", "auction_tracking_explode": "司法拍卖-explode",
     "auction_tracking_v9": "司法拍卖", "bank_loan": "银行借款", "bankruptcy_judgment_document": "破产文书",
@@ -142,12 +514,120 @@ _map = {
 def key_2_name(key: str):
     end_index = len(key)
     while True:
-        if key in _map:
-            return key, _map[key], end_index
+        if key in _summary_dim_2_name:
+            return key, _summary_dim_2_name[key], end_index
         end_index = key.rfind('_')
         key = key[:end_index]
     pass
 
 
+res = """
+ng_rt_summary_company_staff_20220217
+ng_rt_summary_company_annual_report_20220425
+ng_rt_summary_company_holder_20220221
+ng_rt_summary_company_change_20220407
+ng_rt_summary_company_license_creditchina_20220621
+ng_rt_summary_company_abnormal_info_20220124
+ng_rt_summary_finance_info_v3_20220426
+ng_rt_summary_company_license_20220224
+ng_rt_summary_private_enterprise_20220426
+ng_rt_summary_company_double_random_check_info_20220227
+ng_rt_summary_company_check_info_20220224
+ng_rt_summary_company_tm_20211223
+ng_rt_summary_company_brief_cancel_announcement_20220224
+ng_rt_summary_company_court_register_20220105
+ng_rt_summary_company_court_open_announcement_20220105
+ng_rt_summary_wenshu_detail_v2_20220614
+ng_rt_summary_general_taxpayer_20220518
+ng_rt_summary_company_tm_20220914
+ng_rt_summary_increase_registered_capital_info_20220519
+ng_rt_summary_company_punishment_info_20220523
+ng_rt_summary_company_zxr_20211223
+ng_rt_summary_company_punishment_info_creditchina_20220621
+ng_rt_summary_company_tax_20220818
+ng_rt_summary_company_liquidating_info_20220223
+ng_rt_summary_company_icp_20211223
+ng_rt_summary_online_retailers_20220329
+ng_rt_summary_company_illegal_info_20220125
+ng_rt_summary_company_patent_20220920
+ng_rt_summary_company_branch_20221019
+ng_rt_summary_company_zxr_restrict_20211223
+ng_rt_summary_company_license_entpub_20220621
+ng_rt_summary_company_zxr_final_case_20211223
+ng_rt_summary_cancellation_announcement_20220310
+ng_rt_summary_company_dishonest_info_20211223
+ng_rt_summary_company_court_announcement_20220105
+ng_rt_summary_reduction_registered_capital_info_20220519
+ng_rt_summary_company_copyright_reg_20220915
+ng_rt_summary_company_own_tax_20220621
+ng_rt_summary_company_send_announcement_20220105
+ng_rt_summary_company_judicial_assistance_20220221
+ng_rt_summary_company_equity_info_20220127
+ng_rt_summary_company_bid_new_20220512
+ng_rt_summary_construction_person_20220508
+ng_rt_summary_company_land_announcement_20220621
+ng_rt_summary_construction_qualification_20220508
+ng_rt_summary_company_copyright_works_20220915
+ng_rt_summary_company_land_publicity_20220621
+ng_rt_summary_company_env_punishment_20220621
+ng_rt_summary_high_tech_enterprise_20220222
+ng_rt_summary_company_mortgage_info_20220605
+ng_rt_summary_construction_project_20220508
+ng_rt_summary_bankruptcy_open_case_20220905
+ng_rt_summary_litigation_mediation_20220105
+ng_rt_summary_company_app_info_20211223
+ng_rt_summary_company_holder_sponsor_20220628
+ng_rt_summary_company_tax_contravention_20220818
+ng_rt_summary_auction_tracking_20220719
+ng_rt_summary_company_finance_20211223
+ng_rt_summary_company_land_mortgage_20220621
+ng_rt_summary_property_rights_transaction_20220105
+ng_rt_summary_company_public_announcement_20220621
+ng_rt_summary_zxr_evaluate_20211223
+ng_rt_summary_company_land_transfer_20220621
+ng_rt_summary_zxr_evaluate_results_20211223
+ng_rt_summary_company_patent_20220912
+ng_rt_summary_restrictions_on_exit_20220118
+ng_rt_summary_company_equity_pledge_holder_20220913
+ng_rt_summary_company_equity_pledge_20220913
+
+
+"""
+
+if __name__ == '1__main__':
+
+    from utils.base_utils import to_list
+    import re
+
+    part = re.compile("^ng_rt_summary_(.*?)_[0-9]+$")
+
+    li = to_list(res)
+    for i in li:
+        rr = part.findall(i)
+        name = rr[0]
+        tag_set = set()
+        tag_set_j = set()
+        for j in _map:
+            if name in j:
+                tag_set.add(_map[j])
+                tag_set_j.add(j)
+                pass
+            pass
+        print(name, tag_set, tag_set_j)
+        print()
+        pass
+    # out_map = ""
+    # for i in _map_class:
+    #     tm = _map_class[i]
+    #     for j in tm:
+    #         out_map += '"' + j + '": {\n        "dim_name": "",\n        "summary_key": "","sort_field": ""\n    },\n'
+    #         pass
+    # print(out_map)
+
+    pass
+
 if __name__ == '__main__':
+    from utils import map_2_json_str
+
+    print(map_2_json_str(_summary_dim_2_name))
     pass