parseData_YG.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. @author: Deepcold
  5. @file: parseData.py
  6. @time: 2019/8/14 15:55
  7. """
  8. import datetime
  9. import json
  10. from bin.common.esQuery import es_query_bg_name
  11. from bin.common.parse_name_is_company import parse_name
  12. from bin.utils.json_dump_date import JsonCustomEncoder
  13. def parse_data_yg(db_config, each, push_date, save_items):
  14. table_name = db_config.table_name
  15. query_words = db_config[table_name].QUERY_WORDS # 查询关键字
  16. summary_fields = db_config[table_name].SUMMARY_FIELDS
  17. event_type = db_config.event_type # 事件类型
  18. event_subtype = db_config.event_sub_type # 子分类
  19. event_date = each[query_words["event_date"]] # 事件发生日期
  20. _id = each["_id"]
  21. try:
  22. event_company = each[query_words["event_company"]] # 事件企业
  23. # 解析被告是企业不是个人
  24. if event_company:
  25. event_company_list = parse_name(event_company)
  26. else:
  27. event_company_list = []
  28. except Exception as e:
  29. event_company_list = []
  30. print(e)
  31. source_content = each # 原数据
  32. # 构建摘要信息
  33. summary_information = {}
  34. for word in summary_fields:
  35. summary_information[db_config[table_name].ALL_FIELDS[word]] = each[word]
  36. source_content = json.dumps(source_content, cls=JsonCustomEncoder, ensure_ascii=False)
  37. summary_information = json.dumps(summary_information, cls=JsonCustomEncoder, ensure_ascii=False)
  38. # print(summary_information)
  39. for event_company in event_company_list:
  40. item = {}
  41. # 封装成字典
  42. item["event_type"] = event_type
  43. item["event_subtype"] = event_subtype
  44. item["event_desc"] = summary_information
  45. item["event_company"] = event_company
  46. item["bg_name"] = each[query_words["bg_name"]]
  47. if len(item["bg_name"]) >= 99:
  48. item["bg_name"] = item["bg_name"][:99]
  49. item["event_date"] = event_date
  50. item["push_date"] = push_date
  51. item["source_platform"] = table_name
  52. item["source_id"] = _id
  53. item["source_content"] = source_content
  54. # print(item["push_date"]-datetime.datetime.date(datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')))
  55. # if item["push_date"]-datetime.datetime.date(datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')) <= datetime.timedelta(7):
  56. # print(item)
  57. # save_items.append(item)
  58. print(item)
  59. save_items.append(item)