#!/usr/bin/env python # -*- coding: utf-8 -*- """ @author: Deepcold @file: parseData.py @time: 2019/8/14 15:55 """ import datetime import json from bin.common.esQuery import es_query_bg_name from bin.common.parse_name_is_company import parse_name from bin.utils.json_dump_date import JsonCustomEncoder def parse_data(db_config, each, push_date, save_items): table_name = db_config.table_name query_words = db_config[table_name].QUERY_WORDS # 查询关键字 summary_fields = db_config[table_name].SUMMARY_FIELDS event_type = db_config.event_type # 事件类型 event_subtype = db_config.event_sub_type # 子分类 event_date = each[query_words["event_date"]] # 事件发生日期 _id = each["_id"] try: event_company = each[query_words["event_company"]] # 事件企业 # 解析被告是企业不是个人 if event_company: event_company_list = parse_name(event_company) else: event_company_list = [] except Exception as e: event_company_list = [] print(e) source_content = each # 原数据 # 构建摘要信息 summary_information = {} for word in summary_fields: summary_information[db_config[table_name].ALL_FIELDS[word]] = each[word] source_content = json.dumps(source_content, cls=JsonCustomEncoder, ensure_ascii=False) summary_information = json.dumps(summary_information, cls=JsonCustomEncoder, ensure_ascii=False) for event_company in event_company_list: # 判断企业在es中是不是存在 es_content = es_query_bg_name(event_company) # 如果在es中存在 if es_content: new_content = [] for temp in es_content: source = temp["_source"] yg_name = source["yishen_yg"] if parse_name(yg_name): new_content.append(temp) item = {} if new_content: # 封装成字典 item["event_type"] = event_type item["event_subtype"] = event_subtype item["event_desc"] = summary_information item["event_company"] = event_company item["event_date"] = event_date item["push_date"] = push_date item["source_platform"] = table_name item["source_id"] = _id item["source_content"] = source_content item["hasBgCase"] = new_content # if type(item["event_date"]) is str: # if item["push_date"] - datetime.datetime.date( # datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')) <= datetime.timedelta(7): # # print(item) # save_items.append(item) # elif type(item["event_date"]) is datetime.date: # if item["push_date"] -item["event_date"] <= datetime.timedelta(7): # save_items.append(item) print(item) save_items.append(item)