1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- @author: Deepcold
- @file: parseData.py
- @time: 2019/8/14 15:55
- """
- import datetime
- import json
- from bin.common.esQuery import es_query_bg_name
- from bin.common.parse_name_is_company import parse_name
- from bin.utils.json_dump_date import JsonCustomEncoder
- def parse_data_yg(db_config, each, push_date, save_items):
- table_name = db_config.table_name
- query_words = db_config[table_name].QUERY_WORDS # 查询关键字
- summary_fields = db_config[table_name].SUMMARY_FIELDS
- event_type = db_config.event_type # 事件类型
- event_subtype = db_config.event_sub_type # 子分类
- event_date = each[query_words["event_date"]] # 事件发生日期
- _id = each["_id"]
- try:
- event_company = each[query_words["event_company"]] # 事件企业
- # 解析被告是企业不是个人
- if event_company:
- event_company_list = parse_name(event_company)
- else:
- event_company_list = []
- except Exception as e:
- event_company_list = []
- print(e)
- source_content = each # 原数据
- # 构建摘要信息
- summary_information = {}
- for word in summary_fields:
- summary_information[db_config[table_name].ALL_FIELDS[word]] = each[word]
- source_content = json.dumps(source_content, cls=JsonCustomEncoder, ensure_ascii=False)
- summary_information = json.dumps(summary_information, cls=JsonCustomEncoder, ensure_ascii=False)
- # print(summary_information)
- for event_company in event_company_list:
- item = {}
- # 封装成字典
- item["event_type"] = event_type
- item["event_subtype"] = event_subtype
- item["event_desc"] = summary_information
- item["event_company"] = event_company
- item["bg_name"] = each[query_words["bg_name"]]
- if len(item["bg_name"]) >= 99:
- item["bg_name"] = item["bg_name"][:99]
- item["event_date"] = event_date
- item["push_date"] = push_date
- item["source_platform"] = table_name
- item["source_id"] = _id
- item["source_content"] = source_content
- # print(item["push_date"]-datetime.datetime.date(datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')))
- # if item["push_date"]-datetime.datetime.date(datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')) <= datetime.timedelta(7):
- # print(item)
- # save_items.append(item)
- print(item)
- save_items.append(item)
|