123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- # -*- coding: utf-8 -*-
- # @Time : 2020/12/10 10:50
- # @Author : XuJiakai
- # @File : WinhcEsV8Sdk
- # @Software: PyCharm
- import json
- import logging
- import re
- from elasticsearch.exceptions import NotFoundError
- from zhconv import convert
- from sdk.WinhcElasticSearchSDK import WinhcElasticSearchSDK
- logging.info("Begin")
- logger = logging.getLogger(__name__)
- logger.setLevel(logging.INFO)
- fmt = logging.Formatter('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s')
- class winhc_es_company_v8_sdk(WinhcElasticSearchSDK):
- def __init__(self, es_name='new'):
- super().__init__(es_name)
- self.pattern = re.compile('[^\\u4e00-\\u9fa50-9a-zA-Z]')
- self.index = "winhc-company-v9"
- self.doc_type = 'company'
- def get_doc_by_company_id(self, company_id):
- try:
- res = self.es.get(index=self.index, doc_type=self.doc_type, id=company_id)['_source']
- res['company_id'] = company_id
- return res
- except NotFoundError:
- return None
- def exists_company_id(self, company_id):
- return super().exists(self.index, self.doc_type, company_id)
- def query_by_company(self, company_name):
- """
- :param company_name:
- :return: 最多反回20条数据
- """
- cname = self.clean_up(company_name)
- dsl = """{"query":{"bool":{"must":[{"bool":{"should":[{"term":{"cname.value.keyword":{"value":"%s"}}},{"term":{"history_name.value.keyword":{"value":"%s"}}}]}}]}}}""" % (
- cname, cname)
- return self.__get_doc_by_dsl(dsl)
- def query_by_credit_code_reg_number(self, credit_code, reg_number):
- """
- :param company_name:
- :return: 最多反回20条数据
- """
- dsl_pre = """{"query":{"bool":{"must":[{"bool":{"filter":[{"bool":{"must_not":[{"term":{"deleted":{"value":9}}}]}}]}},{"bool":{"should":["""
- dsl_post = """]}}]}}}"""
- dsl = []
- if credit_code is not None and credit_code.strip() != '':
- dsl.append("""{"term":{"credit_code":{"value":"%s"}}}""" % credit_code)
- pass
- if reg_number is not None and reg_number.strip() != '':
- dsl.append("""{"term":{"reg_number":{"value":"%s"}}}""" % reg_number)
- pass
- if len(dsl) == 0:
- raise AttributeError("reg_number or credit_code is empty !")
- dsl = dsl_pre + ','.join(dsl) + dsl_post
- return self.__get_doc_by_dsl(dsl)
- def clean_up(self, name):
- return self.pattern.sub("", name)
- def __get_doc_by_dsl(self, dsl):
- res = super().query(index=self.index, doc_type=self.doc_type, dsl=dsl)
- return res
- def query(self, company_name):
- """
- :param company_name:
- :return: 最多反回20条数据
- """
- logger.info("es查询:%s" % (company_name))
- def get_term(n, is_clean=False):
- if is_clean:
- k = "value"
- else:
- k = 'show.keyword'
- all_term = [{
- "term": {
- "cname." + k: {
- "value": n
- }
- }
- }, {
- "term": {
- "cname.simplified_chinese.keyword": {
- "value": n
- }
- }
- }, {
- "term": {
- "history_name." + k: {
- "value": n
- }
- }
- }, {
- "term": {
- "history_name.simplified_chinese.keyword": {
- "value": n
- }
- }
- }]
- return all_term
- all_term = get_term(company_name)
- cname = self.clean_up(company_name)
- if company_name != cname:
- all_term.extend(get_term(cname))
- simplified_chinese = convert(company_name, 'zh-cn')
- if company_name != simplified_chinese:
- all_term.extend(get_term(simplified_chinese))
- simplified_cname = self.clean_up(simplified_chinese)
- if company_name != simplified_cname:
- all_term.extend(get_term(simplified_cname))
- dsl = {
- "size": 20,
- "_source": [
- "cname.show"
- ],
- "query": {
- "bool": {
- "must": [
- {
- "bool": {
- "should": all_term
- }
- },
- {
- "term": {
- "deleted": {
- "value": "0"
- }
- }
- }
- ]
- }
- }
- }
- # print(json.dumps(dsl))
- return self.__get_doc_by_dsl(dsl)
- if __name__ == '__main__':
- sdk = winhc_es_company_v8_sdk('new')
- res = sdk.query('成安建筑营造有限公司')
- print(res)
- # print(sdk.query_by_company('香港澳德利集团有限公司'))
- pass
|