#!/usr/bin/env python # -*- coding: utf-8 -*- """ @author: Deepcold @file: parseData.py @time: 2019/8/14 15:55 """ import datetime import json from bin.common.esQuery import es_query_bg_name from bin.common.parse_name_is_company import parse_name from bin.utils.json_dump_date import JsonCustomEncoder def parse_data_yg(db_config, each, push_date, save_items): table_name = db_config.table_name query_words = db_config[table_name].QUERY_WORDS # 查询关键字 summary_fields = db_config[table_name].SUMMARY_FIELDS event_type = db_config.event_type # 事件类型 event_subtype = db_config.event_sub_type # 子分类 event_date = each[query_words["event_date"]] # 事件发生日期 _id = each["_id"] try: event_company = each[query_words["event_company"]] # 事件企业 # 解析被告是企业不是个人 if event_company: event_company_list = parse_name(event_company) else: event_company_list = [] except Exception as e: event_company_list = [] print(e) source_content = each # 原数据 # 构建摘要信息 summary_information = {} for word in summary_fields: summary_information[db_config[table_name].ALL_FIELDS[word]] = each[word] source_content = json.dumps(source_content, cls=JsonCustomEncoder, ensure_ascii=False) summary_information = json.dumps(summary_information, cls=JsonCustomEncoder, ensure_ascii=False) # print(summary_information) for event_company in event_company_list: item = {} # 封装成字典 item["event_type"] = event_type item["event_subtype"] = event_subtype item["event_desc"] = summary_information item["event_company"] = event_company item["bg_name"] = each[query_words["bg_name"]] if len(item["bg_name"]) >= 99: item["bg_name"] = item["bg_name"][:99] item["event_date"] = event_date item["push_date"] = push_date item["source_platform"] = table_name item["source_id"] = _id item["source_content"] = source_content # print(item["push_date"]-datetime.datetime.date(datetime.datetime.strptime(item["event_date"], '%Y-%m-%d'))) # if item["push_date"]-datetime.datetime.date(datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')) <= datetime.timedelta(7): # print(item) # save_items.append(item) print(item) save_items.append(item)