search_winhc_latest_date.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. # -*- coding: utf-8 -*-
  2. # @Time : 2022/12/1 9:13
  3. # @Author : XuJiakai
  4. # @File : search_winhc_latest_date
  5. # @Software: PyCharm
  6. from sdk import get_es_sdk
  7. from utils import map_2_json_str
  8. from utils.dim_name_mapping import get_latest_date_map
  9. from sdk.WinhcAllClient import get_all_client
  10. from utils.base_utils import tuple_max
  11. from utils.datetime_utils import datetime_format_transform
  12. import re
  13. from log import get_log
  14. log = get_log("winhc_latest_date")
  15. all_client = get_all_client()
  16. hbase_client = all_client.get_hbase_client()
  17. es_sdk = get_es_sdk("new")
  18. old_es_sdk = get_es_sdk("old")
  19. old_es_index = ['wenshu_detail2']
  20. date_part = re.compile('\\d{4}年\\d{2}月\\d{2}日')
  21. def get_latest_date(index: str, company_id_f: str, company_id: str, latest_date_f: str):
  22. dsl = {
  23. "size": 1,
  24. "_source": [latest_date_f],
  25. "query": {
  26. "bool": {
  27. "must": [
  28. {
  29. "term": {
  30. company_id_f: {
  31. "value": company_id
  32. }
  33. }
  34. },
  35. {
  36. "term": {
  37. "deleted": {
  38. "value": 0
  39. }
  40. }
  41. }
  42. ]
  43. }
  44. }
  45. , "sort": [
  46. {
  47. latest_date_f: {
  48. "order": "desc"
  49. }
  50. }
  51. ]
  52. }
  53. if index in old_es_index:
  54. res = old_es_sdk.query(index=index, doc_type=None, dsl=dsl)
  55. pass
  56. else:
  57. res = es_sdk.query(index=index, doc_type='_doc', dsl=dsl)
  58. if len(res) == 0 or latest_date_f not in res[0]:
  59. return None
  60. latest_date = res[0][latest_date_f]
  61. # if date_part.match(latest_date)
  62. if date_part.match(latest_date):
  63. latest_date = datetime_format_transform(latest_date, '%Y年%m月%d日', "%Y-%m-%d %H:%M:%S")
  64. pass
  65. return latest_date
  66. pass
  67. latest_date_map = get_latest_date_map()
  68. def search_latest_date(company_id: str):
  69. result_data = {}
  70. for i in latest_date_map:
  71. str = latest_date_map[i]
  72. max_date = None
  73. for j in str.split(','):
  74. tmp_str = j.split(':')
  75. index = tmp_str[0]
  76. company_id_f = tmp_str[1]
  77. latest_date_f = tmp_str[2]
  78. tmp_date = get_latest_date(index=index, company_id_f=company_id_f, company_id=company_id,
  79. latest_date_f=latest_date_f)
  80. max_date = tuple_max(max_date, tmp_date)
  81. pass
  82. result_data[i] = max_date
  83. pass
  84. tmp_res = hbase_client.get_record('ng_rt_company', company_id)
  85. if tmp_res is not None and 'APPROVED_TIME' in tmp_res:
  86. result_data['基本信息'] = tmp_res['APPROVED_TIME']
  87. log.info('fetch hbase data: {}'.format(tmp_res))
  88. else:
  89. result_data['基本信息'] = None
  90. return result_data
  91. pass
  92. if __name__ == '__main__':
  93. # d = get_latest_date(index='winhc_index_rt_company_punishment_info', company_id_f='company_id',
  94. # company_id='059f83641cc4df8b9577cb1e2d89939e', latest_date_f='decision_date')
  95. # print(d)
  96. d = search_latest_date(company_id='6e13b126ee0c5fcd8fe454693ab4bbda')
  97. print(map_2_json_str(d))
  98. pass