# -*- coding: utf-8 -*- # @Time : 2020/12/10 10:50 # @Author : XuJiakai # @File : WinhcEsV8Sdk # @Software: PyCharm import json import logging import re from elasticsearch.exceptions import NotFoundError from zhconv import convert from sdk.WinhcElasticSearchSDK import WinhcElasticSearchSDK logging.info("Begin") logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) fmt = logging.Formatter('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s') class winhc_es_company_v8_sdk(WinhcElasticSearchSDK): def __init__(self, es_name='new'): super().__init__(es_name) self.pattern = re.compile('[^\\u4e00-\\u9fa50-9a-zA-Z]') self.index = "winhc-company-v9" self.doc_type = 'company' def get_doc_by_company_id(self, company_id): try: res = self.es.get(index=self.index, doc_type=self.doc_type, id=company_id)['_source'] res['company_id'] = company_id return res except NotFoundError: return None def exists_company_id(self, company_id): return super().exists(self.index, self.doc_type, company_id) def query_by_company(self, company_name): """ :param company_name: :return: 最多反回20条数据 """ cname = self.clean_up(company_name) dsl = """{"query":{"bool":{"must":[{"bool":{"should":[{"term":{"cname.value.keyword":{"value":"%s"}}},{"term":{"history_name.value.keyword":{"value":"%s"}}}]}}]}}}""" % ( cname, cname) return self.__get_doc_by_dsl(dsl) def query_by_credit_code_reg_number(self, credit_code, reg_number): """ :param company_name: :return: 最多反回20条数据 """ dsl_pre = """{"query":{"bool":{"must":[{"bool":{"filter":[{"bool":{"must_not":[{"term":{"deleted":{"value":9}}}]}}]}},{"bool":{"should":[""" dsl_post = """]}}]}}}""" dsl = [] if credit_code is not None and credit_code.strip() != '': dsl.append("""{"term":{"credit_code":{"value":"%s"}}}""" % credit_code) pass if reg_number is not None and reg_number.strip() != '': dsl.append("""{"term":{"reg_number":{"value":"%s"}}}""" % reg_number) pass if len(dsl) == 0: raise AttributeError("reg_number or credit_code is empty !") dsl = dsl_pre + ','.join(dsl) + dsl_post return self.__get_doc_by_dsl(dsl) def clean_up(self, name): return self.pattern.sub("", name) def __get_doc_by_dsl(self, dsl): res = super().query(index=self.index, doc_type=self.doc_type, dsl=dsl) return res def query(self, company_name): """ :param company_name: :return: 最多反回20条数据 """ logger.info("es查询:%s" % (company_name)) def get_term(n, is_clean=False): if is_clean: k = "value" else: k = 'show.keyword' all_term = [{ "term": { "cname." + k: { "value": n } } }, { "term": { "cname.simplified_chinese.keyword": { "value": n } } }, { "term": { "history_name." + k: { "value": n } } }, { "term": { "history_name.simplified_chinese.keyword": { "value": n } } }] return all_term all_term = get_term(company_name) cname = self.clean_up(company_name) if company_name != cname: all_term.extend(get_term(cname)) simplified_chinese = convert(company_name, 'zh-cn') if company_name != simplified_chinese: all_term.extend(get_term(simplified_chinese)) simplified_cname = self.clean_up(simplified_chinese) if company_name != simplified_cname: all_term.extend(get_term(simplified_cname)) dsl = { "size": 20, "_source": [ "cname.show" ], "query": { "bool": { "must": [ { "bool": { "should": all_term } }, { "term": { "deleted": { "value": "0" } } } ] } } } # print(json.dumps(dsl)) return self.__get_doc_by_dsl(dsl) if __name__ == '__main__': sdk = winhc_es_company_v8_sdk('new') res = sdk.query('成安建筑营造有限公司') print(res) # print(sdk.query_by_company('香港澳德利集团有限公司')) pass