WinhcEsCompanyV8Sdk.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. # -*- coding: utf-8 -*-
  2. # @Time : 2020/12/10 10:50
  3. # @Author : XuJiakai
  4. # @File : WinhcEsV8Sdk
  5. # @Software: PyCharm
  6. import json
  7. import logging
  8. import re
  9. from elasticsearch.exceptions import NotFoundError
  10. from zhconv import convert
  11. from sdk.WinhcElasticSearchSDK import WinhcElasticSearchSDK
  12. logging.info("Begin")
  13. logger = logging.getLogger(__name__)
  14. logger.setLevel(logging.INFO)
  15. fmt = logging.Formatter('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s')
  16. class winhc_es_company_v8_sdk(WinhcElasticSearchSDK):
  17. def __init__(self, es_name='new'):
  18. super().__init__(es_name)
  19. self.pattern = re.compile('[^\\u4e00-\\u9fa50-9a-zA-Z]')
  20. self.index = "winhc-company-v9"
  21. self.doc_type = 'company'
  22. def get_doc_by_company_id(self, company_id):
  23. try:
  24. res = self.es.get(index=self.index, doc_type=self.doc_type, id=company_id)['_source']
  25. res['company_id'] = company_id
  26. return res
  27. except NotFoundError:
  28. return None
  29. def exists_company_id(self, company_id):
  30. return super().exists(self.index, self.doc_type, company_id)
  31. def query_by_company(self, company_name):
  32. """
  33. :param company_name:
  34. :return: 最多反回20条数据
  35. """
  36. cname = self.clean_up(company_name)
  37. dsl = """{"query":{"bool":{"must":[{"bool":{"should":[{"term":{"cname.value.keyword":{"value":"%s"}}},{"term":{"history_name.value.keyword":{"value":"%s"}}}]}}]}}}""" % (
  38. cname, cname)
  39. return self.__get_doc_by_dsl(dsl)
  40. def query_by_credit_code_reg_number(self, credit_code, reg_number):
  41. """
  42. :param company_name:
  43. :return: 最多反回20条数据
  44. """
  45. dsl_pre = """{"query":{"bool":{"must":[{"bool":{"filter":[{"bool":{"must_not":[{"term":{"deleted":{"value":9}}}]}}]}},{"bool":{"should":["""
  46. dsl_post = """]}}]}}}"""
  47. dsl = []
  48. if credit_code is not None and credit_code.strip() != '':
  49. dsl.append("""{"term":{"credit_code":{"value":"%s"}}}""" % credit_code)
  50. pass
  51. if reg_number is not None and reg_number.strip() != '':
  52. dsl.append("""{"term":{"reg_number":{"value":"%s"}}}""" % reg_number)
  53. pass
  54. if len(dsl) == 0:
  55. raise AttributeError("reg_number or credit_code is empty !")
  56. dsl = dsl_pre + ','.join(dsl) + dsl_post
  57. return self.__get_doc_by_dsl(dsl)
  58. def clean_up(self, name):
  59. return self.pattern.sub("", name)
  60. def __get_doc_by_dsl(self, dsl):
  61. res = super().query(index=self.index, doc_type=self.doc_type, dsl=dsl)
  62. return res
  63. def query(self, company_name):
  64. """
  65. :param company_name:
  66. :return: 最多反回20条数据
  67. """
  68. logger.info("es查询:%s" % (company_name))
  69. def get_term(n, is_clean=False):
  70. if is_clean:
  71. k = "value"
  72. else:
  73. k = 'show.keyword'
  74. all_term = [{
  75. "term": {
  76. "cname." + k: {
  77. "value": n
  78. }
  79. }
  80. }, {
  81. "term": {
  82. "cname.simplified_chinese.keyword": {
  83. "value": n
  84. }
  85. }
  86. }, {
  87. "term": {
  88. "history_name." + k: {
  89. "value": n
  90. }
  91. }
  92. }, {
  93. "term": {
  94. "history_name.simplified_chinese.keyword": {
  95. "value": n
  96. }
  97. }
  98. }]
  99. return all_term
  100. all_term = get_term(company_name)
  101. cname = self.clean_up(company_name)
  102. if company_name != cname:
  103. all_term.extend(get_term(cname))
  104. simplified_chinese = convert(company_name, 'zh-cn')
  105. if company_name != simplified_chinese:
  106. all_term.extend(get_term(simplified_chinese))
  107. simplified_cname = self.clean_up(simplified_chinese)
  108. if company_name != simplified_cname:
  109. all_term.extend(get_term(simplified_cname))
  110. dsl = {
  111. "size": 20,
  112. "_source": [
  113. "cname.show"
  114. ],
  115. "query": {
  116. "bool": {
  117. "must": [
  118. {
  119. "bool": {
  120. "should": all_term
  121. }
  122. },
  123. {
  124. "term": {
  125. "deleted": {
  126. "value": "0"
  127. }
  128. }
  129. }
  130. ]
  131. }
  132. }
  133. }
  134. # print(json.dumps(dsl))
  135. return self.__get_doc_by_dsl(dsl)
  136. if __name__ == '__main__':
  137. sdk = winhc_es_company_v8_sdk('new')
  138. res = sdk.query('成安建筑营造有限公司')
  139. print(res)
  140. # print(sdk.query_by_company('香港澳德利集团有限公司'))
  141. pass