12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- @author: Deepcold
- @file: parseData.py
- @time: 2019/8/14 15:55
- """
- import datetime
- import json
- from bin.common.esQuery import es_query_bg_name
- from bin.common.parse_name_is_company import parse_name
- from bin.utils.json_dump_date import JsonCustomEncoder
- def parse_data(db_config, each, push_date, save_items):
- table_name = db_config.table_name
- query_words = db_config[table_name].QUERY_WORDS # 查询关键字
- summary_fields = db_config[table_name].SUMMARY_FIELDS
- event_type = db_config.event_type # 事件类型
- event_subtype = db_config.event_sub_type # 子分类
- event_date = each[query_words["event_date"]] # 事件发生日期
- _id = each["_id"]
- try:
- event_company = each[query_words["event_company"]] # 事件企业
- # 解析被告是企业不是个人
- if event_company:
- event_company_list = parse_name(event_company)
- else:
- event_company_list = []
- except Exception as e:
- event_company_list = []
- print(e)
- source_content = each # 原数据
- # 构建摘要信息
- summary_information = {}
- for word in summary_fields:
- summary_information[db_config[table_name].ALL_FIELDS[word]] = each[word]
- source_content = json.dumps(source_content, cls=JsonCustomEncoder, ensure_ascii=False)
- summary_information = json.dumps(summary_information, cls=JsonCustomEncoder, ensure_ascii=False)
- for event_company in event_company_list:
- # 判断企业在es中是不是存在
- es_content = es_query_bg_name(event_company)
- # 如果在es中存在
- if es_content:
- new_content = []
- for temp in es_content:
- source = temp["_source"]
- yg_name = source["yishen_yg"]
- if parse_name(yg_name):
- new_content.append(temp)
- item = {}
- if new_content:
- # 封装成字典
- item["event_type"] = event_type
- item["event_subtype"] = event_subtype
- item["event_desc"] = summary_information
- item["event_company"] = event_company
- item["event_date"] = event_date
- item["push_date"] = push_date
- item["source_platform"] = table_name
- item["source_id"] = _id
- item["source_content"] = source_content
- item["hasBgCase"] = new_content
- # if type(item["event_date"]) is str:
- # if item["push_date"] - datetime.datetime.date(
- # datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')) <= datetime.timedelta(7):
- # # print(item)
- # save_items.append(item)
- # elif type(item["event_date"]) is datetime.date:
- # if item["push_date"] -item["event_date"] <= datetime.timedelta(7):
- # save_items.append(item)
- print(item)
- save_items.append(item)
|