소스 검색

每日机会初始化

deepCold 4 년 전
부모
커밋
66b0721346

+ 92 - 0
bin/DailyEvent.py

@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: DailyEvent.py
+@time: 2019/6/19 10:40
+"""
+import datetime
+from pprint import pprint
+
+from bin.utils.covert_dict_object import dict_to_object
+from .common.handleDate import get_date
+from .db.getData.fromEs import GetDataFromEs
+from .db.getData.fromMongo import GetDataFromMongo
+from .db.getData.fromMysql import GetDataFromMysql
+from .db.saveDate.saveEventChance import saveEventChance
+
+
+class DailyEvent(object):
+    def __init__(self, config):
+        # 将开始日期读进来
+        self.start_date = get_date()
+        self.push_date = datetime.date.today()
+        self.config = config
+        self.save_event_chance = saveEventChance(config)
+
+    def run(self):
+        # 遍历配置中的数据库
+        for _event_type, _content in self.config.ALL_EVENT_TYPE.items():
+            if "DEFENDANT" in _event_type:
+                party = "DEFENDANT"
+            else:
+                party = "PLAINTIFF"
+            event_type = self.config.ALL_TYPE[_event_type]  # 获取事件类型
+            for _event_sub_type, _table in _content.items():
+                event_sub_type = self.config.ALL_TYPE[_event_sub_type]  # 获取事件子类型
+                for table_name, value in _table.items():
+                    db_config = {}
+                    db = value.DB
+                    if "QUERY_CONDITION" in value.keys():
+                        query_condition = value.QUERY_CONDITION
+                    else:
+                        query_condition = None
+                    if "SAVE_TABLE" in value.keys():
+                        save_table = value.SAVE_TABLE
+                    else:
+                        save_table = None
+                    event_config = self.config.DATABASE[db]
+                    db_type = event_config.DB_TYPE  # 数据库类型
+                    db_info = event_config.DB_INFO  # 数据库信息
+                    table = event_config[table_name]  # 数据表信息
+                    # 封装成字典
+                    db_config["event_type"] = event_type
+                    db_config["event_sub_type"] = event_sub_type
+                    db_config["query_condition"] = query_condition
+                    db_config["db_type"] = db_type
+                    db_config["db_info"] = db_info
+                    db_config[table_name] = table
+                    db_config["table_name"] = table_name
+                    db_config["party"] = party
+                    db_config["save_table"] = save_table
+
+                    db_config = dict_to_object(db_config)
+                    sql_data = self.get_data(db_config)
+                    self.save_event_chance.save(sql_data, db_config)
+        pass
+
+    def get_data(self, db_config):
+        """
+        :param db_config:
+        :return:
+        """
+        db_type = db_config.db_type
+
+        if db_type == "mysql":
+            mysql = GetDataFromMysql(db_config, self.push_date)
+            sql_data = mysql.get_data(self.start_date)
+            pass
+
+        elif db_type == "mongodb":
+            mongo = GetDataFromMongo(db_config, self.push_date)
+            sql_data = mongo.get_data(self.start_date)
+            pass
+
+        elif db_type == "es":
+            es = GetDataFromEs(db_config, self.push_date)
+            sql_data = es.get_data(self.start_date)
+            pass
+        else:
+            sql_data = []
+
+        return sql_data

+ 0 - 0
bin/__init__.py


+ 7 - 0
bin/common/__init__.py

@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: __init__.py.py
+@time: 2019/8/5 14:51
+"""

+ 33 - 0
bin/common/esQuery.py

@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: esQuery.py
+@time: 2019/8/5 14:59
+"""
+from elasticsearch import helpers, Elasticsearch
+from manage import CONFIG
+
+
+def es_query(query, index, doc_type):
+    es_client = Elasticsearch(
+        [CONFIG.ES_INFO.HOST], http_auth=(CONFIG.ES_INFO.USER, CONFIG.ES_INFO.PASSWD), port=CONFIG.ES_INFO.PORT,
+        use_ssl=False, timeout=40)
+
+    es_result = helpers.scan(client=es_client, query=query, scroll='10h',
+                             index=index, doc_type=doc_type, timeout='1m')
+
+    return es_result
+
+
+def es_query_bg_name(event_company):
+    query = {
+        "query": {"bool": {
+            "must": [
+                {"match_phrase": {"yishen_bg": event_company}},
+                {"term": {"end_susong_result": {"value": "胜"}}}],
+            "must_not": [
+                {"term": {"zhixing_result": {"value": "1"}}}]}}
+    }
+    query_content = es_query(query, "case_connect", "case_connect_type")
+    return query_content

+ 33 - 0
bin/common/handleDate.py

@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: handleDate.py
+@time: 2019/8/5 14:51
+"""
+import os
+from ruamel import yaml
+
+from config import SOURCE_ROOT
+
+file_path = os.path.join(SOURCE_ROOT, "properties", "start_date.yaml")
+
+
+def get_date():
+    # 将开始日期读进来
+    with open(file_path, encoding='utf-8')as f:
+        content = yaml.load(f.read(), Loader=yaml.Loader)
+    return content
+
+
+def set_date(name, new_date):
+    # 更改日期为当天日期
+    content = get_date()
+    with open(file_path, "w+", encoding='utf-8') as f:
+        content[name] = new_date
+        yaml.dump(content, f, default_flow_style=False,
+                  indent=2, allow_unicode=True, Dumper=yaml.RoundTripDumper)
+    return content
+
+# if __name__ == '__main__':
+#     set_date("wenshu_detail1", "2019-08-50")

+ 37 - 0
bin/common/parse_name_is_company.py

@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: parse_name_is_company.py
+@time: 2019/6/26 11:29
+"""
+import re
+
+from manage import CONFIG
+
+"""
+原告,上诉人
+"""
+
+def parse_name(name):
+    name_list = name.split("\n")
+    new_name_list = []
+    for name in name_list:
+        # 解析规则不要个人,要公司,排除政府机构,排除配置中的排除公司名单
+        # 长度小于3的过滤
+        if len(name) <= 5:
+            pass
+        # if "电力公司" in name:
+        #     pass
+        elif name in CONFIG.PASS_COMPANY:
+            pass
+        else:
+            if "公司" not in name:
+                if re.findall('^.*[厂店社]$|^.*[学银超][校行市]$', name):
+                    new_name_list.append(name)
+            else:
+                new_name_list.append(name)
+    if len(new_name_list) <= 4:
+        return new_name_list
+    else:
+        return []

+ 7 - 0
bin/db/__init__.py

@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: __init__.py.py
+@time: 2019/8/3 14:41
+"""

+ 7 - 0
bin/db/getData/__init__.py

@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: __init__.py.py
+@time: 2019/8/3 14:48
+"""

+ 54 - 0
bin/db/getData/fromEs.py

@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: fromEs.py
+@time: 2019/8/3 14:49
+"""
+import datetime
+import json
+from bin.common.esQuery import es_query
+from bin.common.handleDate import set_date
+from bin.db.parseDate.parseData import parse_data
+from bin.db.parseDate.parseData_YG import parse_data_yg
+
+
+class GetDataFromEs(object):
+    def __init__(self, db_config, push_date):
+        self.db_config = db_config
+        self.save_items = []
+        self.push_date = push_date
+        self.table_name = self.db_config.table_name
+        self.query_condition = self.db_config.query_condition
+
+    def get_data(self, start_date):
+        # 遍历数据表信息
+        print("正在读取表" + self.table_name)
+        table_info = self.db_config[self.table_name]
+        date = start_date[self.db_config.event_type + self.db_config.event_sub_type + self.table_name]
+        end_date = datetime.datetime.strftime(self.push_date, "%Y-%m-%d")
+        fields = [x for x in table_info.ALL_FIELDS.keys()]
+        es_type = table_info.TYPE
+        query_condition = json.dumps(self.query_condition)
+        query_condition = query_condition.replace("start", date).replace("end", end_date)
+
+        # 查询审判结果里包含终结执行程序的案件
+        query = {
+            "_source": fields,
+            "query": json.loads(query_condition)}
+        print(query)
+        query_data = es_query(query, self.table_name, es_type)
+        if query_data:
+            # 查询有结果,将日期更新至最新
+            set_date(self.db_config.event_type + self.db_config.event_sub_type + self.table_name, datetime.datetime.strftime(self.push_date, "%Y-%m-%d"))
+        # 遍历查询结果
+        for each in query_data:
+            _id = each["_id"]  # 案件id
+            each = each["_source"]  # es内容主体
+            each["_id"] = _id
+            party = self.db_config.party
+            if party == "PLAINTIFF":
+                parse_data_yg(self.db_config, each, self.push_date, self.save_items)
+            else:
+                parse_data(self.db_config, each, self.push_date, self.save_items)
+        return self.save_items

+ 50 - 0
bin/db/getData/fromMongo.py

@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: fromMysql.py
+@time: 2019/8/3 14:49
+"""
+import datetime
+from pymongo import MongoClient
+from bin.common.handleDate import set_date
+from bin.db.parseDate.parseData import parse_data
+
+
+class GetDataFromMongo(object):
+    def __init__(self, db_config, push_date):
+        self.db_config = db_config
+        self.db_info = self.db_config.db_info
+        # 连接mongodb
+        self.client = MongoClient(self.db_info.HOST, replicaSet=self.db_info.REPLICASET)
+        self.client[self.db_info.DBNAME].authenticate(self.db_info.USER, self.db_info.PASSWD)
+
+        self.save_items = []
+        self.push_date = push_date
+        self.table_name = self.db_config.table_name
+
+    def get_data(self, start_date):
+        # 遍历数据表信息
+        print("正在读取表" + self.table_name)
+        table_info = self.db_config[self.table_name]
+        query_words = table_info.QUERY_WORDS  # 查询关键字
+        date = start_date[self.db_config.event_type + self.db_config.event_sub_type + self.table_name]
+        end_date = datetime.datetime.strftime(self.push_date, "%Y-%m-%d")  # 要查询的结束日期
+        db = self.client[self.db_info.DBNAME][self.table_name]
+        limit_key = {}
+        for temp in table_info.ALL_FIELDS.keys():
+            limit_key[temp] = 1
+        query = {query_words["start_date"]: {"$gt": date, "$lt": end_date}}
+        query_data = db.find(query, limit_key, no_cursor_timeout=True)
+
+        if query_data:
+            #     # 查询有结果,将日期更新至最新
+            set_date(self.db_config.event_type + self.db_config.event_sub_type + self.table_name,
+                     datetime.datetime.strftime(self.push_date, "%Y-%m-%d"))
+        # 解析数据
+        for each in query_data:
+            _id = str(each["_id"])
+            each["_id"] = str(each["_id"])
+            parse_data(self.db_config, each, self.push_date, self.save_items)
+
+        return self.save_items

+ 54 - 0
bin/db/getData/fromMysql.py

@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: fromMysql.py
+@time: 2019/8/3 14:49
+"""""
+import datetime
+import pymysql
+from bin.common.handleDate import set_date
+from bin.db.parseDate.parseData import parse_data
+
+
+class GetDataFromMysql(object):
+    def __init__(self, db_config, push_date):
+        # 连接mysql数据库
+        self.db_config = db_config
+        db_info = self.db_config.db_info
+        connect = pymysql.connect(
+            host=db_info.HOST, user=db_info.USER, passwd=db_info.PASSWD,
+            db=db_info.DBNAME, charset='utf8', use_unicode=True)
+        self.cursor = connect.cursor()
+        self.save_items = []
+        self.push_date = push_date
+        self.table_name = self.db_config.table_name
+
+    def get_data(self, start_date):
+        # 遍历数据表信息
+        print("正在读取表" + self.table_name)
+        table_info = self.db_config[self.table_name]
+        query_words = table_info.QUERY_WORDS  # 查询关键字
+        date = start_date[self.db_config.event_type + self.db_config.event_sub_type + self.table_name]
+        date = "'" + date + "'"  # 要查询的开始日期
+
+        end_date = "'" + datetime.datetime.strftime(self.push_date, "%Y-%m-%d") + "'"  # 要查询的结束日期
+        fields = ",".join(table_info.ALL_FIELDS.keys())
+        try:
+            self.cursor.execute("SELECT %s FROM %s where %s >= %s and %s < %s" % (
+                fields, self.table_name, query_words["start_date"], date, query_words["start_date"], end_date))
+            query_data = self.cursor.fetchall()
+            print("读取完毕")
+        except Exception as e:
+            query_data = []
+            print(e)
+        if query_data:
+            # 查询有结果,将日期更新至最新
+            set_date(self.db_config.event_type + self.db_config.event_sub_type + self.table_name, datetime.datetime.strftime(self.push_date, "%Y-%m-%d"))
+        for each in query_data:
+            each = list(each)
+            each = dict(zip(table_info.ALL_FIELDS.keys(), each))
+            each["_id"] = each["id"]
+            parse_data(self.db_config, each, self.push_date, self.save_items)
+
+        return self.save_items

+ 1 - 0
bin/db/parseDate/__init__.py

@@ -0,0 +1 @@
+{"_source": ["title", "yg_name", "bg_name", "case_type", "case_reason", "case_amt", "court_name", "judge_date", "case_no", "url"], "query": {"bool": {"must": [{"range": {"import_time": {"gte": "2019-08-27", "lt": "2019-09-02"}}}, {"match_phrase": {"judge_result": "终结本次执行"}}]}}}

+ 76 - 0
bin/db/parseDate/parseData.py

@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: parseData.py
+@time: 2019/8/14 15:55
+"""
+import datetime
+import json
+from bin.common.esQuery import es_query_bg_name
+from bin.common.parse_name_is_company import parse_name
+from bin.utils.json_dump_date import JsonCustomEncoder
+
+
+def parse_data(db_config, each, push_date, save_items):
+    table_name = db_config.table_name
+    query_words = db_config[table_name].QUERY_WORDS  # 查询关键字
+    summary_fields = db_config[table_name].SUMMARY_FIELDS
+    event_type = db_config.event_type  # 事件类型
+    event_subtype = db_config.event_sub_type  # 子分类
+    event_date = each[query_words["event_date"]]  # 事件发生日期
+    _id = each["_id"]
+
+    try:
+        event_company = each[query_words["event_company"]]  # 事件企业
+        # 解析被告是企业不是个人
+        if event_company:
+            event_company_list = parse_name(event_company)
+        else:
+            event_company_list = []
+    except Exception as e:
+        event_company_list = []
+        print(e)
+
+    source_content = each  # 原数据
+    # 构建摘要信息
+    summary_information = {}
+    for word in summary_fields:
+        summary_information[db_config[table_name].ALL_FIELDS[word]] = each[word]
+    source_content = json.dumps(source_content, cls=JsonCustomEncoder, ensure_ascii=False)
+    summary_information = json.dumps(summary_information, cls=JsonCustomEncoder, ensure_ascii=False)
+
+    for event_company in event_company_list:
+        # 判断企业在es中是不是存在
+        es_content = es_query_bg_name(event_company)
+        # 如果在es中存在
+        if es_content:
+            new_content = []
+            for temp in es_content:
+                source = temp["_source"]
+                yg_name = source["yishen_yg"]
+                if parse_name(yg_name):
+                    new_content.append(temp)
+            item = {}
+            if new_content:
+                # 封装成字典
+                item["event_type"] = event_type
+                item["event_subtype"] = event_subtype
+                item["event_desc"] = summary_information
+                item["event_company"] = event_company
+                item["event_date"] = event_date
+                item["push_date"] = push_date
+                item["source_platform"] = table_name
+                item["source_id"] = _id
+                item["source_content"] = source_content
+                item["hasBgCase"] = new_content
+                # if type(item["event_date"]) is str:
+                #     if item["push_date"] - datetime.datetime.date(
+                #             datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')) <= datetime.timedelta(7):
+                #         # print(item)
+                #         save_items.append(item)
+                # elif type(item["event_date"]) is datetime.date:
+                #     if item["push_date"] -item["event_date"] <= datetime.timedelta(7):
+                #         save_items.append(item)
+                print(item)
+                save_items.append(item)

+ 65 - 0
bin/db/parseDate/parseData_YG.py

@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: parseData.py
+@time: 2019/8/14 15:55
+"""
+import datetime
+import json
+from bin.common.esQuery import es_query_bg_name
+from bin.common.parse_name_is_company import parse_name
+from bin.utils.json_dump_date import JsonCustomEncoder
+
+
+def parse_data_yg(db_config, each, push_date, save_items):
+    table_name = db_config.table_name
+    query_words = db_config[table_name].QUERY_WORDS  # 查询关键字
+    summary_fields = db_config[table_name].SUMMARY_FIELDS
+    event_type = db_config.event_type  # 事件类型
+    event_subtype = db_config.event_sub_type  # 子分类
+    event_date = each[query_words["event_date"]]  # 事件发生日期
+    _id = each["_id"]
+
+    try:
+        event_company = each[query_words["event_company"]]  # 事件企业
+        # 解析被告是企业不是个人
+        if event_company:
+            event_company_list = parse_name(event_company)
+        else:
+            event_company_list = []
+    except Exception as e:
+        event_company_list = []
+        print(e)
+
+    source_content = each  # 原数据
+    # 构建摘要信息
+    summary_information = {}
+    for word in summary_fields:
+        summary_information[db_config[table_name].ALL_FIELDS[word]] = each[word]
+    source_content = json.dumps(source_content, cls=JsonCustomEncoder, ensure_ascii=False)
+    summary_information = json.dumps(summary_information, cls=JsonCustomEncoder, ensure_ascii=False)
+    # print(summary_information)
+
+    for event_company in event_company_list:
+        item = {}
+        # 封装成字典
+        item["event_type"] = event_type
+        item["event_subtype"] = event_subtype
+        item["event_desc"] = summary_information
+        item["event_company"] = event_company
+        item["bg_name"] = each[query_words["bg_name"]]
+        if len(item["bg_name"]) >= 99:
+            item["bg_name"] = item["bg_name"][:99]
+        item["event_date"] = event_date
+        item["push_date"] = push_date
+        item["source_platform"] = table_name
+        item["source_id"] = _id
+        item["source_content"] = source_content
+
+        # print(item["push_date"]-datetime.datetime.date(datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')))
+        # if item["push_date"]-datetime.datetime.date(datetime.datetime.strptime(item["event_date"], '%Y-%m-%d')) <= datetime.timedelta(7):
+        #     print(item)
+        #     save_items.append(item)
+        print(item)
+        save_items.append(item)

+ 1 - 0
bin/db/saveDate/__init__.py

@@ -0,0 +1 @@
+{"_source": ["party1", "party2", "bltn_type_name", "province", "publish_page", "bltn_no", "content", "court_name", "create_date", "publish_date"], "query": {"bool": {"must": [{"range": {"create_date": {"gte": "2019-09-09", "lt": "2019-09-10"}}}, {"wildcard": {"bltn_type_name": {"value": "*开庭公告*"}}}]}}}

+ 123 - 0
bin/db/saveDate/saveEventChance.py

@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: saveEventChance.py
+@time: 2019/8/5 9:20
+"""
+import pymysql
+from bin.common.parse_name_is_company import parse_name
+
+
+class saveEventChance(object):
+    def __init__(self, config):
+        # 每日事件数据库
+        save_db = config.SAVE_DB_INFO
+        self.save_db_connect = pymysql.connect(
+            host=save_db.HOST, user=save_db.USER, passwd=save_db.PASSWD, db=save_db.DBNAME,
+            charset='utf8', use_unicode=True)
+        self.save_db_cursor = self.save_db_connect.cursor()
+
+    def save(self, save_items, db_config):
+        save_table = db_config.save_table
+        for items in save_items:
+            print(items)
+            if "被告" in items["event_type"]:
+                items["event_flag"] = "1"
+                # 先查重
+                try:
+                    self.save_db_cursor.execute(
+                        "SELECT id FROM daily_event WHERE EVENT_COMPANY=%s and SOURCE_PLATFORM=%s and SOURCE_ID = %s",
+                        (items["event_company"], items["source_platform"], items["source_id"]))
+                    has_value = self.save_db_cursor.fetchone()
+                except Exception as e:
+                    has_value = None
+                    print(e)
+                if has_value:
+                    pass
+                else:
+                    try:
+                        self.save_db_cursor.execute(
+                            "INSERT INTO daily_event(EVENT_TYPE, EVENT_SUBTYPE,EVENT_COMPANY, EVENT_DESC, EVENT_DATE, PUSH_DATE, STATUS,SOURCE_PLATFORM,SOURCE_ID,SOURCE_CONTENT,EVENT_FLAG) VALUES (%s,%s,%s, %s, %s, %s, %s, %s,%s,%s,%s)",
+                            (items["event_type"], items["event_subtype"], items["event_company"], items["event_desc"],
+                             items["event_date"], items["push_date"], 0, items["source_platform"], items["source_id"],
+                             items["source_content"], items["event_flag"]))
+                        self.save_db_connect.commit()
+                    except Exception as e:
+                        print(e)
+
+                    event_id = self.save_db_cursor.lastrowid
+                    # 解析
+                    for temp in items["hasBgCase"]:
+                        source = temp["_source"]
+                        yg_name = source["yishen_yg"]
+                        if len(yg_name) >= 99:
+                            yg_name = yg_name[:99]
+                        # 将原告进行切分,并解析原告是否为企业
+                        new_yg_name_list = parse_name(yg_name)
+                        for new_yg_name in new_yg_name_list:
+                            case_id = temp["_id"]
+                            bg_name = items["event_company"]
+                            case_no = source["case_no"]
+                            court = source["court_name"]
+                            judge_date = source["judge_date"]
+                            case_reason = source["case_reason"]
+                            case_amt = source["case_amt"]
+                            status = 0
+
+                            # 先查重
+                            try:
+                                self.save_db_cursor.execute(
+                                    "SELECT id FROM event_chance WHERE EVENT_ID=%s and YG_NAME=%s and BG_NAME=%s and CASE_NO = %s",
+                                    (event_id, new_yg_name, bg_name, case_no))
+                                has_value = self.save_db_cursor.fetchone()
+                            except Exception as e:
+                                has_value = None
+                                print(e)
+                            if has_value:
+                                pass
+                            else:
+                                # 保存至事件机会表
+                                try:
+                                    self.save_db_cursor.execute(
+                                        "INSERT INTO event_chance(EVENT_ID,YG_NAME,BG_NAME,CASE_ID,CASE_NO,COURT,JUDGE_DATE,CASE_REASON,CASE_AMT,STATUS) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
+                                        (event_id, new_yg_name, bg_name, case_id, case_no, court, judge_date,
+                                         case_reason, case_amt, status))
+                                    self.save_db_connect.commit()
+                                except Exception as e:
+                                    print(e)
+            elif "原告" in items["event_type"]:
+                items["event_flag"] = "0"
+                # 先查重
+                try:
+                    self.save_db_cursor.execute(
+                        "SELECT id FROM yg_daily_event WHERE EVENT_COMPANY='{1}' and SOURCE_PLATFORM='{2}' and SOURCE_ID = '{3}'".format
+                        (save_table, items["event_company"], items["source_platform"], items["source_id"]))
+                    has_value = self.save_db_cursor.fetchone()
+                except Exception as e:
+                    has_value = None
+                    print(e)
+                if has_value:
+                    pass
+                else:
+                    sql = "INSERT INTO yg_daily_event"
+                    sql = sql + "(EVENT_TYPE, EVENT_SUBTYPE,EVENT_COMPANY, BG_NAME,EVENT_DESC, EVENT_DATE, PUSH_DATE, STATUS,SOURCE_PLATFORM,SOURCE_ID,SOURCE_CONTENT,EVENT_FLAG) VALUES (%s,%s,%s, %s,%s, %s, %s, %s, %s,%s,%s,%s)"
+                    data = (
+                        items["event_type"],
+                        items["event_subtype"],
+                        items["event_company"],
+                        items["bg_name"],
+                        items["event_desc"],
+                        items["event_date"],
+                        items["push_date"],
+                        0,
+                        items["source_platform"],
+                        items["source_id"],
+                        items["source_content"],
+                        items["event_flag"])
+
+                    try:
+                        self.save_db_cursor.execute(sql, data)
+                        self.save_db_connect.commit()
+                    except Exception as e:
+                        print(e)

+ 7 - 0
bin/log/__init__.py

@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: __init__.py.py
+@time: 2019/8/16 17:33
+"""

+ 7 - 0
bin/utils/__init__.py

@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: __init__.py.py
+@time: 2019/8/19 15:29
+"""

+ 21 - 0
bin/utils/covert_dict_object.py

@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: covert_dict_object.py
+@time: 2019/8/20 9:49
+"""
+
+
+class Dict(dict):
+    __setattr__ = dict.__setitem__
+    __getattr__ = dict.__getitem__
+
+
+def dict_to_object(dict_obj):
+    if not isinstance(dict_obj, dict):
+        return dict_obj
+    inst = Dict()
+    for k, v in dict_obj.items():
+        inst[k] = dict_to_object(v)
+    return inst

+ 21 - 0
bin/utils/json_dump_date.py

@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: json_dump_date.py
+@time: 2019/8/21 8:51
+"""
+
+from datetime import date
+import json
+from datetime import datetime
+
+
+class JsonCustomEncoder(json.JSONEncoder):
+    def default(self, value):
+        if isinstance(value, datetime):
+            return value.strftime('%Y-%m-%d %H:%M:%S')
+        elif isinstance(value, date):
+            return value.strftime('%Y-%m-%d')
+        else:
+            return json.JSONEncoder.default(self, value)

+ 45 - 0
bin/utils/read_properties.py

@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: test_ymal.py
+@time: 2019/8/16 18:33
+"""
+
+
+class Properties(object):
+
+    def __init__(self, fileName):
+        self.fileName = fileName
+        self.properties = {}
+
+    def __getDict(self, strName, dictName, value):
+
+        if (strName.find('.') > 0):
+            k = strName.split('.')[0]
+            dictName.setdefault(k, {})
+            return self.__getDict(strName[len(k) + 1:], dictName[k], value)
+        else:
+            dictName[strName] = value
+            return
+
+    def getProperties(self):
+        try:
+            pro_file = open(self.fileName, 'Ur')
+            for line in pro_file.readlines():
+                line = line.strip().replace('\n', '')
+                if line.find("#") != -1:
+                    line = line[0:line.find('#')]
+                if line.find('=') > 0:
+                    strs = line.split('=')
+                    strs[1] = line[len(strs[0]) + 1:]
+                    self.__getDict(strs[0].strip(), self.properties, strs[1].strip())
+        except Exception as e:
+            raise e
+        else:
+            pro_file.close()
+        return self.properties
+
+
+dictProperties = Properties("application-dev.properties").getProperties()
+print(dictProperties)

+ 19 - 0
bin/utils/read_yaml.py

@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: read_yaml.py
+@time: 2019/8/19 15:45
+"""
+import os
+import yaml
+from bin.utils.covert_dict_object import dict_to_object
+from config import SOURCE_ROOT
+
+
+def read_yaml(file_name):
+    yaml_path = os.path.join(SOURCE_ROOT, "properties", file_name)
+    with open(yaml_path, encoding="utf8") as f:
+        config_text = yaml.load(f.read(), Loader=yaml.FullLoader)
+    config = dict_to_object(config_text)
+    return config

+ 24 - 0
bin/utils/transferSql.py

@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: transferSql.py
+@time: 2019/9/3 11:39
+"""
+
+
+def transfer(content):
+    if content is None:
+        return None
+    else:
+        string = ""
+        for c in content:
+            if c == '"':
+                string += '\\\"'
+            elif c == "'":
+                string += "\\\'"
+            elif c == "\\":
+                string += "\\\\"
+            else:
+                string += c
+        return string

+ 15 - 0
config.py

@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: config.py
+@time: 2019/8/20 11:16
+"""
+import os
+SOURCE_ROOT = os.path.abspath(os.path.dirname(__file__))
+
+CONFIG_DICT = {
+    "develop": "config_dev.yaml",
+    "product": "config_prd.yaml",
+    "testing": "config_test.yaml"}
+

+ 55 - 0
config.yaml

@@ -0,0 +1,55 @@
+api:
+  address: "http://localhost:8000"  # API 地址
+mongo:
+  host: localhost  # MongoDB host
+  port: 27017  # MongoDB port
+  db: crawlab_test  # MongoDB database
+  username: ""  # MongoDB username
+  password: ""  # MongoDB password
+  authSource: "admin"  # MongoDB auth source database
+redis:
+  address: localhost  # Redis host
+  password: ""  # Redis password
+  database: 1  # Redis database
+  port: 6379  # Redis port
+log:
+  level: info  # Log Level
+  path: "/var/logs/crawlab"  # Log Path
+  isDeletePeriodically: "N"  # 是否定期删除日志(默认为否)
+  deleteFrequency: "@hourly"  # 删除日志频率
+server:
+  host: 0.0.0.0  # 后端监听地址
+  port: 8000  # 后端监听端口
+  master: "Y"  # 是否为主节点
+  secret: "crawlab"  # JWT secret
+  register:
+    # mac地址 或者 ip地址,如果是ip,则需要手动指定IP
+    type: "mac"
+    ip: ""
+  lang: # 安装语言环境, Y 为安装,N 为不安装
+    python: "Y"
+    node: "N"
+    java: "N"
+    dotnet: "N"
+spider:
+  path: "/app/spiders"  # 爬虫路径
+task:
+  workers: 4  # 同时运行任务的执行器数量
+other:
+  tmppath: "/tmp"  # 临时文件目录
+version: 0.4.7  # 版本号
+setting:
+  allowRegister: "N"  # 是否允许注册
+  enableTutorial: "N"  # 是否弃用教程
+  runOnMaster: "Y"  # 是否在主节点上运行任务
+  demoSpiders: "N"  # 是否加载demo爬虫
+  checkScrapy: "Y"  # 是否自动检查Scrapy
+notification: # 消息通知
+  mail: # 邮箱通知
+    server: '' # SMPT 服务器地址
+    port: ''  # SMTP 服务器端口
+    senderEmail: ''  # 发送者邮箱
+    senderIdentity: ''  # 发送者身份
+    smtp:  # SMTP
+      user: ''  # SMTP 用户名
+      password: ''  # SMTP 密码

+ 113 - 0
deal_bidder.py

@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: deal_bidder.py
+@time: 2019/7/4 9:57
+"""
+import re
+from bson import ObjectId
+from pymongo import MongoClient
+
+
+class BidInfo(object):
+    def __init__(self, date):
+        self.client = MongoClient(['dds-uf6ff5dfd9aef3641601-pub.mongodb.rds.aliyuncs.com:3717',
+                                   'dds-uf6ff5dfd9aef3642555-pub.mongodb.rds.aliyuncs.com:3717'],
+                                  replicaSet='mgset-6501997')
+        # 授权. 这里的user基于admin数据库授权
+        self.client["itslaw"].authenticate('itslaw', 'itslaw_168')
+        self.db_new = self.client["itslaw"].WinBidInfo
+        # self.db_new = self.client["itslaw"].WinBidInfo_copy1
+        # 获取所有数据的游标对象
+        self.response = self.db_new.find({"spider_time": {"$gte": date}})
+        self.new_item = {}
+
+    def parse_data(self):
+        # 遍历游标对象,获取所有数据
+        for index, content in enumerate(self.response):
+            print(index)
+            if "new_bidder" in content.keys():
+                pass
+            else:
+                bidder = content["bidder"]
+                code = content["code"]
+                _id = str(content["_id"])
+                new_bidder = self.parse_bidder(bidder, code)
+                new_values = {"$set": {"new_bidder": new_bidder}}
+                self.db_new.update_one(content, new_values)
+
+    @staticmethod
+    def parse_bidder(bidder, code):
+        # 中文符号换成英文符号
+        punctuation_en = u',。、!?:;【】()%#@&1234567890'
+        punctuation_zh = u',..!?:;[]()%#@&1234567890'
+        punctuation = str.maketrans(punctuation_en, punctuation_zh)
+        bidder = bidder.translate(punctuation)
+
+        # 统一中标人名称
+        bidder = re.sub(r"中标供应商名称|中标人名称|中标单位名称|中标商名称|中标人|供应商|成交供应商名称|"
+                        "成交人名称|成交人|中标商|中标\(成交\)供应商名称|成交单位|中标企业|中标人为",
+                        "中标单位", bidder)
+
+        if "废标" in bidder:
+            return ""
+        else:
+            text_list = re.split(r"[,.!?;\\*—/]", bidder)
+            company = []
+            for text in text_list:
+                if 22 >= len(text) >= 6 and text.endswith("公司"):
+                    if ":" in text:
+                        temp = re.findall("中标单位为*:*([\u4E00-\u9FA5()]{4,20}?公司)", text)
+                        company.extend(temp)
+                    else:
+                        if "中标" in text or "候选" in text:
+                            pass
+                        else:
+                            city_list = ["河北", "石家庄", "张家口", "承德", "唐山", "秦皇岛", "廊坊", "保定", "沧州", "衡水", "邢台", "邯郸",
+                                         "山西", "太原", "大同", "朔州", "忻州", "阳泉", "晋中", "吕梁", "长治", "临汾", "晋城", "运城", "内蒙古",
+                                         "呼和浩特", "呼伦贝尔", "通辽", "赤峰", "巴彦淖尔", "乌兰察布", "包头", "鄂尔多斯", "乌海", "黑龙江", "哈尔滨",
+                                         "黑河", "伊春", "齐齐哈尔", "鹤岗", "佳木斯", "双鸭山", "绥化", "大庆", "七台河", "鸡西", "牡丹江", "吉林",
+                                         "长春", "白城", "松原", "吉林", "四平", "辽源", "白山", "通化", "辽宁", "沈阳", "铁岭", "阜新", "抚顺",
+                                         "朝阳", "本溪", "辽阳", "鞍山", "盘锦", "锦州", "葫芦岛", "营口", "丹东", "大连", "江苏", "南京", "连云港",
+                                         "徐州", "宿迁", "淮安", "盐城", "泰州", "扬州", "镇江", "南通", "常州", "无锡", "苏州", "浙江", "杭州",
+                                         "湖州", "嘉兴", "绍兴", "舟山", "宁波", "金华", "衢州", "台州", "丽水", "温州", "安徽", "合肥", "淮北",
+                                         "亳州", "宿州", "蚌埠", "阜阳", "淮南", "滁州", "六安", "马鞍山", "芜湖", "宣城", "铜陵", "池州", "安庆",
+                                         "黄山", "福建", "福州", "宁德", "南平", "三明", "莆田", "龙岩", "泉州", "漳州", "厦门", "江西", "南昌",
+                                         "九江", "景德镇", "上饶", "鹰潭", "抚州", "新余", "宜春", "萍乡", "吉安", "赣州", "山东", "济南", "德州",
+                                         "滨州", "东营", "烟台", "威海", "淄博", "潍坊", "聊城", "泰安", "莱芜", "青岛", "日照", "济宁", "菏泽",
+                                         "临沂", "枣庄", "河南", "郑州", "安阳", "鹤壁", "濮阳", "新乡", "焦作", "三门峡", "开封", "洛阳", "商丘",
+                                         "许昌", "平顶山", "周口", "漯河", "南阳", "驻马店", "信阳", "湖北", "武汉", "十堰", "襄樊", "随州", "荆门",
+                                         "孝感", "宜昌", "黄冈", "鄂州", "荆州", "黄石", "咸宁", "湖南", "长沙", "岳阳", "张家界", "常德", "益阳",
+                                         "湘潭", "株洲", "娄底", "怀化", "邵阳", "衡阳", "永州", "郴州", "广东", "广州", "韶关", "梅州", "河源",
+                                         "清远", "潮州", "揭阳", "汕头", "肇庆", "惠州", "佛山", "东莞", "云浮", "汕尾", "江门", "中山", "深圳",
+                                         "珠海", "阳江", "茂名", "湛江", "广西", "南宁", "桂林", "河池", "贺州", "柳州", "百色", "来宾", "梧州",
+                                         "贵港", "玉林", "崇左", "钦州", "防城港", "海", "海南", "海口", "三亚", "三沙", "儋州", "四川", "成都",
+                                         "广元", "巴中", "绵阳", "德阳", "达州", "南充", "遂宁", "广安", "资阳", "眉山", "雅安", "内江", "乐山",
+                                         "自贡", "泸州", "宜宾", "攀枝花", "贵州", "贵阳", "遵义", "六盘水", "安顺", "铜仁", "毕节", "云南", "昆明",
+                                         "昭通", "丽江", "曲靖", "保山", "玉溪", "临沧", "普洱", "西藏", "拉萨", "日喀则", "昌都", "林芝", "山南",
+                                         "那曲", "陕西", "西安", "榆林", "延安", "铜川", "渭南", "宝鸡", "咸阳", "商洛", "汉中", "安康", "甘肃",
+                                         "兰州", "嘉峪关", "酒泉", "张掖", "金昌", "武威", "白银", "庆阳", "平凉", "定西", "天水", "陇南", "青海",
+                                         "西宁", "海东", "宁夏", "银川", "石嘴山", "吴忠", "中卫", "固原", "新疆", "乌鲁木齐", "克拉玛依", "吐鲁番",
+                                         "哈密"]
+                            for city in city_list:
+                                if text.startswith(city):
+                                    company.append(text)
+                                    break
+                elif len(text) <= 5:
+                    pass
+                elif text.isdigit():
+                    pass
+                elif "公司" in text:
+                    temp = re.findall("中标单位:*([\u4E00-\u9FA5()]{4,20}?公司)", text)
+                    company.extend(temp)
+                else:
+                    pass
+            # 去除重复内容
+            company = '\n'.join(set(company))
+            return company
+
+
+if __name__ == '__main__':
+    bid_info = BidInfo("2019-09-12")
+    bid_info.parse_data()

+ 24 - 0
manage.py

@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@author: Deepcold
+@file: manage.py
+@time: 2019/8/19 9:45
+"""
+import datetime
+from config import CONFIG_DICT
+from bin.utils.read_yaml import read_yaml
+from deal_bidder import BidInfo
+
+config_name = CONFIG_DICT.get("product")  # 更改加载的配置文件
+# config_name = CONFIG_DICT.get("develop")  # 更改加载的配置文件
+CONFIG = read_yaml(config_name)
+
+if __name__ == '__main__':
+    from bin.DailyEvent import DailyEvent
+
+    bid_info = BidInfo(datetime.datetime.strftime(datetime.date.today(), "%Y-%m-%d"))
+    # bid_info = BidInfo("2019-08-24")
+    bid_info.parse_data()
+    app = DailyEvent(CONFIG)
+    app.run()

+ 321 - 0
properties/config_dev.yaml

@@ -0,0 +1,321 @@
+# -------------------------------------------------------------------
+# ----------------------------开发环境配置----------------------------
+# -------------------------------------------------------------------
+DATABASE:
+  BIG_DATA:
+    DB_TYPE: mysql
+    DB_INFO:
+      HOST: rm-uf61r3m23ba1p5z3dfo.mysql.rds.aliyuncs.com
+      DBNAME: prism1
+      USER: wenshu
+      PASSWD: wenshu_168
+      # 土地抵押
+    land_mortgage_info:
+      ALL_FIELDS: # 事件描述所用到的字段,及中文含义
+        id: ID
+        url: url
+        land_addr: 土地位置
+        land_acreage: 土地面积(公顷)
+        mortgagor_name: 土地抵押人名称
+        land_purpose: 土地用途
+        use_type: 抵押土地用途
+        assessment_price: 评估金额(万元)
+        mortgage_price: 抵押金额(万元)
+        start_date: 抵押开始日期
+        end_date: 抵押结束日期
+        source_platform: 数据来源
+      SUMMARY_FIELDS:
+        ["land_addr", "land_acreage", "mortgagor_name", "mortgage_price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: start_date
+        event_company: mortgagor_name
+      # --------------------------------------------------------
+      # 地块公示
+    land_notice_info:
+      # 事件描述所用到的字段
+      ALL_FIELDS:
+        id: ID
+        notice_title: 公示标题
+        url: url
+        land_addr: 土地位置
+        land_acreage: 土地面积(公顷)
+        land_purpose: 土地用途
+        sell_year: 出让年限
+        price: 成交价格(万元)
+        assignee_unit: 受让单位
+        publicity_date: 公示期
+        release_date: 发布日期
+        source_platform: 数据来源
+      SUMMARY_FIELDS:
+        ["land_addr", "land_acreage", "assignee_unit", "price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: release_date
+        event_company: assignee_unit
+      # --------------------------------------------------------
+      # 土地成交
+    land_purchase_info:
+      ALL_FIELDS:
+        id: ID
+        pro_name: 项目名称
+        url: url
+        region_name: 行政区
+        land_addr: 土地位置
+        land_acreage: 土地面积(公顷)
+        land_user: 土地使用权人
+        land_purpose: 土地用途
+        deal_mode: 供地方式
+        price: 成交价格(万元)
+        Sign_date: 合同签订日期
+        deal_date: 约定交地日期
+        source_platform: 数据来源
+      # 查询事件,事件发生日期,事件企业
+      SUMMARY_FIELDS:
+        ["land_addr", "land_acreage", "land_user", "price"]
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: Sign_date
+        event_company: land_user
+      # --------------------------------------------------------
+      # 土地转让
+    land_transfer_info:
+      ALL_FIELDS:
+        id: ID
+        land_addr: 土地位置
+        url: url
+        original_user: 原土地使用权人
+        now_user: 现土地使用权人
+        land_acreage: 土地面积(公顷)
+        use_type: 土地使用权类型
+        mode_transfer: 转让方式
+        land_purpose: 土地用途
+        price: 转让价格(万元)
+        deal_date: 成交日期
+        source_platform: 数据来源
+      SUMMARY_FIELDS:
+        ["land_addr", "land_acreage", "now_user", "price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: deal_date
+        event_company: now_user
+      # --------------------------------------------------------
+      # 矿产
+    win_bid_mineral:
+      ALL_FIELDS:
+        id: ID
+        bid_code: 文号
+        pro_url: url
+        pro_addr: 矿产位置
+        bidder_name: 中标人
+        business_type: 矿权类型
+        pro_name: 项目名称
+        bid_price: 中标价格(万元)
+        transfer_time: 出让时间
+        release_time: 发布日期
+      SUMMARY_FIELDS:
+        ["pro_addr", "business_type", "bidder_name", "bid_price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: release_time
+        event_company: bidder_name
+      # ---------------------------------------------------Es-------------------------------------------------------------
+
+  ES:
+    DB_TYPE: es
+    # 数据库连接信息
+    DB_INFO:
+      HOST: es-cn-0pp0r32zf000ipovd.elasticsearch.aliyuncs.com
+      USER: elastic
+      PASSWD: elastic_168
+      PORT: 9200
+
+      # --------------------------------------------------------
+      # es执行成功案件
+    wenshu_detail1:
+      TYPE: wenshu_detail_type
+      ALL_FIELDS:
+        title: 标题
+        yg_name: 原告
+        bg_name: 被告
+        case_type: 案件类型
+        case_reason: 案由
+        case_amt: 案件标的
+        court_name: 法院名称
+        judge_date: 判决日期
+        case_no: 案号
+        url: url
+      SUMMARY_FIELDS: ["title", "case_no"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        event_date: judge_date
+        event_company: yg_name
+        bg_name: bg_name
+
+      # 被告开庭公告
+    court_notice:
+      TYPE: court_notice_type
+      ALL_FIELDS:
+        party1: 原告
+        party2: 被告
+        bltn_type_name: 公告类型名称
+        province: 省份
+        publish_page: 刊登版面
+        bltn_no: 公告号
+        content: 内容
+        court_name: 法院名称
+        create_date: 创建日期
+        publish_date: 生成日期
+      SUMMARY_FIELDS: ["bltn_type_name", "publish_date"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        event_date: publish_date
+        event_company: party1
+        bg_name: party2
+
+  MONGO:
+    DB_TYPE: mongodb
+    # 数据库连接信息
+    DB_INFO:
+      HOST: ["dds-uf6ff5dfd9aef3641601-pub.mongodb.rds.aliyuncs.com:3717","dds-uf6ff5dfd9aef3642555-pub.mongodb.rds.aliyuncs.com:3717"]
+      REPLICASET: mgset-6501997
+      DBNAME: itslaw
+      USER: itslaw
+      PASSWD: itslaw_168
+    WinBidInfo:
+      EVENT_TYPE: 被告有财产线索
+      EVENT_SUBTYPE: 中标项目  # 中标数据
+      ALL_FIELDS:
+        _id: ID
+        title: 标题
+        url: url
+        new_bidder: 中标人
+        price: 中标价(万元)
+        province: 省份
+        bid_date: 中标日期
+      SUMMARY_FIELDS: ["new_bidder", "price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: bid_date
+        event_company: new_bidder
+
+# 保存数据库
+SAVE_DB_INFO:
+  HOST: "47.100.20.161"
+  DBNAME: test1
+  USER: test
+  PORT: 3306
+  PASSWD: test952
+# es查询数据库
+ES_INFO:
+  HOST: es-cn-0pp0r32zf000ipovd.public.elasticsearch.aliyuncs.com
+  USER: elastic
+  PASSWD: elastic_168
+  PORT: 9200
+
+PASS_COMPANY: "[
+        阳光财产保险股份有限公司,
+        中国太平洋财产保险股份有限公司,
+        中国平安财产保险股份有限公司,
+        中华联合财产保险股份有限公司,
+        中国人民财产保险股份有限公司,
+        中国人寿财产保险股份有限公司,
+        中国大地财产保险股份有限公司,
+        中国人寿保险股份有限公司,
+        中国电信股份有限公司,
+        中国联合网络通信有限公司,
+        中国邮政集团公司,
+        中国工商银行股份有限公司,
+        中国联合网络通信有限公司,
+        中国人民财险保险股份有限公司,
+        中国人民财产保险股份有限公司廊坊市分公司,
+        国网山东省电力公司]"
+
+# 事件类型映射
+ALL_TYPE:
+  DEFENDANT_PROPERTY_CLUE: 被告有财产线索
+  DEFENDANT_ADDITIONAL_WENSHU: 被告有新增文书
+  DEFENDANT_ADDITIONAL_NOTICE: 被告有新增公告
+  PLAINTIFF_ADDITIONAL_WENSHU: 原告有新增文书
+  PLAINTIFF_ADDITIONAL_NOTICE: 原告有新增公告
+  LAND_MORTGAGE: 土地抵押
+  LAND_NOTICE: 土地公示
+  LAND_PURCHASE: 土地出让
+  LAND_TRANSFER: 土地转让
+  MINING_TRANSFER: 矿业权出让
+  WIN_BID_PROJECT: 中标项目
+  SUCCESSFUL_EXECUTION: 执行成功文书
+  COURT_SESSION: 开庭公告
+  BANKRUPT_NOTICE: 破产公告
+  TERMINATE_EXECUTION: 终结本次执行
+
+# 事件类型
+ALL_EVENT_TYPE:
+  DEFENDANT_PROPERTY_CLUE:
+    LAND_MORTGAGE: # 土地抵押
+      land_mortgage_info:
+        DB: BIG_DATA
+    LAND_NOTICE:
+      land_notice_info:
+        DB: BIG_DATA
+    LAND_PURCHASE:
+      land_purchase_info:
+        DB: BIG_DATA
+    LAND_TRANSFER:
+      land_transfer_info:
+        DB: BIG_DATA
+    MINING_TRANSFER:
+      win_bid_mineral:
+        DB: BIG_DATA
+    WIN_BID_PROJECT:
+      WinBidInfo:
+        DB: MONGO
+  DEFENDANT_ADDITIONAL_WENSHU:
+    SUCCESSFUL_EXECUTION:
+      wenshu_detail1:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"import_time": {"gte": "start", "lt": "end"}}},
+          {"match_phrase": {"judge_result": "终结执行程序"}}]}}
+  DEFENDANT_ADDITIONAL_NOTICE:
+    COURT_SESSION:
+      court_notice:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"create_date": {"gte": "start", "lt": "end"}}},
+          {"bool": {"should": [
+          {"wildcard": {"bltn_type_name": {"value": "*诉状*"}}},
+          {"wildcard": {"bltn_type_name": {"value": "*开庭*"}}}]}}]}}
+    BANKRUPT_NOTICE:
+      court_notice:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"create_date": {"gte": "start", "lt": "end"}}},
+          {"wildcard": {"bltn_type_name": {"value": "*破产*"}}}]}}
+
+  PLAINTIFF_ADDITIONAL_WENSHU:
+    TERMINATE_EXECUTION:
+      wenshu_detail1:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"import_time": {"gte": "start", "lt": "end"}}},
+          {"match_phrase": {"judge_result": "终结本次执行"}}
+          ]}}
+  PLAINTIFF_ADDITIONAL_NOTICE:
+    COURT_SESSION:
+      court_notice:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"create_date": {"gte": "start", "lt": "end"}}},
+          {"wildcard": {"bltn_type_name": {"value": "*开庭公告*"}}}]}}
+

+ 323 - 0
properties/config_prd.yaml

@@ -0,0 +1,323 @@
+# -------------------------------------------------------------------
+# ----------------------------开发环境配置----------------------------
+# -------------------------------------------------------------------
+DATABASE:
+  BIG_DATA:
+    DB_TYPE: mysql
+    DB_INFO:
+      HOST: rm-uf61r3m23ba1p5z3d.mysql.rds.aliyuncs.com
+      DBNAME: prism1
+      USER: wenshu
+      PASSWD: wenshu_168
+      # 土地抵押
+    land_mortgage_info:
+      ALL_FIELDS: # 事件描述所用到的字段,及中文含义
+        id: ID
+        url: url
+        land_addr: 土地位置
+        land_acreage: 土地面积(公顷)
+        mortgagor_name: 土地抵押人名称
+        land_purpose: 土地用途
+        use_type: 抵押土地用途
+        assessment_price: 评估金额(万元)
+        mortgage_price: 抵押金额(万元)
+        start_date: 抵押开始日期
+        end_date: 抵押结束日期
+        source_platform: 数据来源
+      SUMMARY_FIELDS:
+        ["land_addr", "land_acreage", "mortgagor_name", "mortgage_price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: start_date
+        event_company: mortgagor_name
+      # --------------------------------------------------------
+      # 地块公示
+    land_notice_info:
+      # 事件描述所用到的字段
+      ALL_FIELDS:
+        id: ID
+        notice_title: 公示标题
+        url: url
+        land_addr: 土地位置
+        land_acreage: 土地面积(公顷)
+        land_purpose: 土地用途
+        sell_year: 出让年限
+        price: 成交价格(万元)
+        assignee_unit: 受让单位
+        publicity_date: 公示期
+        release_date: 发布日期
+        source_platform: 数据来源
+      SUMMARY_FIELDS:
+        ["land_addr", "land_acreage", "assignee_unit", "price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: release_date
+        event_company: assignee_unit
+      # --------------------------------------------------------
+      # 土地成交
+    land_purchase_info:
+      ALL_FIELDS:
+        id: ID
+        pro_name: 项目名称
+        url: url
+        region_name: 行政区
+        land_addr: 土地位置
+        land_acreage: 土地面积(公顷)
+        land_user: 土地使用权人
+        land_purpose: 土地用途
+        deal_mode: 供地方式
+        price: 成交价格(万元)
+        Sign_date: 合同签订日期
+        deal_date: 约定交地日期
+        source_platform: 数据来源
+      # 查询事件,事件发生日期,事件企业
+      SUMMARY_FIELDS:
+        ["land_addr", "land_acreage", "land_user", "price"]
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: Sign_date
+        event_company: land_user
+      # --------------------------------------------------------
+      # 土地转让
+    land_transfer_info:
+      ALL_FIELDS:
+        id: ID
+        land_addr: 土地位置
+        url: url
+        original_user: 原土地使用权人
+        now_user: 现土地使用权人
+        land_acreage: 土地面积(公顷)
+        use_type: 土地使用权类型
+        mode_transfer: 转让方式
+        land_purpose: 土地用途
+        price: 转让价格(万元)
+        deal_date: 成交日期
+        source_platform: 数据来源
+      SUMMARY_FIELDS:
+        ["land_addr", "land_acreage", "now_user", "price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: deal_date
+        event_company: now_user
+      # --------------------------------------------------------
+      # 矿产
+    win_bid_mineral:
+      ALL_FIELDS:
+        id: ID
+        bid_code: 文号
+        pro_url: url
+        pro_addr: 矿产位置
+        bidder_name: 中标人
+        business_type: 矿权类型
+        pro_name: 项目名称
+        bid_price: 中标价格(万元)
+        transfer_time: 出让时间
+        release_time: 发布日期
+      SUMMARY_FIELDS:
+        ["pro_addr", "business_type", "bidder_name", "bid_price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: release_time
+        event_company: bidder_name
+      # ---------------------------------------------------Es-------------------------------------------------------------
+
+  ES:
+    DB_TYPE: es
+    # 数据库连接信息
+    DB_INFO:
+      HOST: es-cn-0pp0r32zf000ipovd.elasticsearch.aliyuncs.com
+      USER: elastic
+      PASSWD: elastic_168
+      PORT: 9200
+
+      # --------------------------------------------------------
+      # es执行成功案件
+    wenshu_detail1:
+      TYPE: wenshu_detail_type
+      ALL_FIELDS:
+        title: 标题
+        yg_name: 原告
+        bg_name: 被告
+        case_type: 案件类型
+        case_reason: 案由
+        case_amt: 案件标的
+        court_name: 法院名称
+        judge_date: 判决日期
+        case_no: 案号
+        url: url
+      SUMMARY_FIELDS: ["title", "case_no"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        event_date: judge_date
+        event_company: yg_name
+        bg_name: bg_name
+
+      # 被告开庭公告
+    court_notice:
+      TYPE: court_notice_type
+      ALL_FIELDS:
+        party1: 原告
+        party2: 被告
+        bltn_type_name: 公告类型名称
+        province: 省份
+        publish_page: 刊登版面
+        bltn_no: 公告号
+        content: 内容
+        court_name: 法院名称
+        create_date: 创建日期
+        publish_date: 生成日期
+      SUMMARY_FIELDS: ["bltn_type_name", "publish_date"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        event_date: publish_date
+        event_company: party1
+        bg_name: party2
+
+  MONGO:
+    DB_TYPE: mongodb
+    # 数据库连接信息
+    DB_INFO:
+      HOST: ["dds-uf6ff5dfd9aef3641601-pub.mongodb.rds.aliyuncs.com:3717","dds-uf6ff5dfd9aef3642555-pub.mongodb.rds.aliyuncs.com:3717"]
+      REPLICASET: mgset-6501997
+      DBNAME: itslaw
+      USER: itslaw
+      PASSWD: itslaw_168
+    WinBidInfo:
+      EVENT_TYPE: 被告有财产线索
+      EVENT_SUBTYPE: 中标项目  # 中标数据
+      ALL_FIELDS:
+        _id: ID
+        title: 标题
+        url: url
+        new_bidder: 中标人
+        price: 中标价(万元)
+        province: 省份
+        bid_date: 中标日期
+      SUMMARY_FIELDS: ["new_bidder", "price"]
+      # 查询事件,事件发生日期,事件企业
+      QUERY_WORDS:
+        start_date: spider_time
+        event_date: bid_date
+        event_company: new_bidder
+
+# 保存数据库
+SAVE_DB_INFO:
+  HOST: rm-uf61r3m23ba1p5z3d.mysql.rds.aliyuncs.com
+  DBNAME: prism1
+  USER: wenshu
+  PORT: 3306
+  PASSWD: wenshu_168
+# es查询数据库
+ES_INFO:
+  HOST: es-cn-0pp0r32zf000ipovd.elasticsearch.aliyuncs.com
+  USER: elastic
+  PASSWD: elastic_168
+  PORT: 9200
+
+PASS_COMPANY: "[
+        阳光财产保险股份有限公司,
+        中国太平洋财产保险股份有限公司,
+        中国平安财产保险股份有限公司,
+        中华联合财产保险股份有限公司,
+        中国人民财产保险股份有限公司,
+        中国人寿财产保险股份有限公司,
+        中国大地财产保险股份有限公司,
+        中国人寿保险股份有限公司,
+        中国电信股份有限公司,
+        中国联合网络通信有限公司,
+        中国邮政集团公司,
+        中国工商银行股份有限公司,
+        中国联合网络通信有限公司,
+        中国人民财险保险股份有限公司,
+        中国人民财产保险股份有限公司廊坊市分公司,
+        国网山东省电力公司]"
+
+# 事件类型映射
+ALL_TYPE:
+  DEFENDANT_PROPERTY_CLUE: 被告有财产线索
+  DEFENDANT_ADDITIONAL_WENSHU: 被告有新增文书
+  DEFENDANT_ADDITIONAL_NOTICE: 被告有新增公告
+  PLAINTIFF_ADDITIONAL_WENSHU: 原告有新增文书
+  PLAINTIFF_ADDITIONAL_NOTICE: 原告有新增公告
+  LAND_MORTGAGE: 土地抵押
+  LAND_NOTICE: 土地公示
+  LAND_PURCHASE: 土地出让
+  LAND_TRANSFER: 土地转让
+  MINING_TRANSFER: 矿业权出让
+  WIN_BID_PROJECT: 中标项目
+  SUCCESSFUL_EXECUTION: 执行成功文书
+  COURT_SESSION: 开庭公告
+  BANKRUPT_NOTICE: 破产公告
+  TERMINATE_EXECUTION: 终结本次执行
+
+# 事件类型
+ALL_EVENT_TYPE:
+  DEFENDANT_PROPERTY_CLUE:
+    LAND_MORTGAGE: # 土地抵押
+      land_mortgage_info:
+        DB: BIG_DATA
+    LAND_NOTICE:
+      land_notice_info:
+        DB: BIG_DATA
+    LAND_PURCHASE:
+      land_purchase_info:
+        DB: BIG_DATA
+    LAND_TRANSFER:
+      land_transfer_info:
+        DB: BIG_DATA
+    MINING_TRANSFER:
+      win_bid_mineral:
+        DB: BIG_DATA
+    WIN_BID_PROJECT:
+      WinBidInfo:
+        DB: MONGO
+  DEFENDANT_ADDITIONAL_WENSHU:
+    SUCCESSFUL_EXECUTION:
+      wenshu_detail1:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"import_time": {"gte": "start", "lt": "end"}}},
+          {"match_phrase": {"judge_result": "终结执行程序"}}]}}
+  DEFENDANT_ADDITIONAL_NOTICE:
+    COURT_SESSION:
+      court_notice:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"create_date": {"gte": "start", "lt": "end"}}},
+          {"bool": {"should": [
+          {"wildcard": {"bltn_type_name": {"value": "*诉状*"}}},
+          {"wildcard": {"bltn_type_name": {"value": "*开庭*"}}}]}}]}}
+    BANKRUPT_NOTICE:
+      court_notice:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"create_date": {"gte": "start", "lt": "end"}}},
+          {"wildcard": {"bltn_type_name": {"value": "*破产*"}}}]}}
+
+  PLAINTIFF_ADDITIONAL_WENSHU:
+    TERMINATE_EXECUTION:
+      wenshu_detail1:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"import_time": {"gte": "start", "lt": "end"}}},
+          {"match_phrase": {"judge_result": "终结本次执行"}}
+          ]}}
+        SAVE_TABLE: yg_wenshu
+  PLAINTIFF_ADDITIONAL_NOTICE:
+    COURT_SESSION:
+      court_notice:
+        DB: ES
+        QUERY_CONDITION: {
+          "bool": {"must": [
+          {"range": {"create_date": {"gte": "start", "lt": "end"}}},
+          {"wildcard": {"bltn_type_name": {"value": "*开庭公告*"}}}]}}
+        SAVE_TABLE: yg_notice
+

+ 11 - 0
properties/start_date.yaml

@@ -0,0 +1,11 @@
+被告有新增公告破产公告court_notice: '2019-12-10'
+被告有财产线索土地公示land_notice_info: '2019-12-09'
+被告有新增文书执行成功文书wenshu_detail1: '2019-12-10'
+被告有财产线索矿业权出让win_bid_mineral: '2019-12-09'
+原告有新增文书终结本次执行wenshu_detail1: '2019-12-10'
+被告有财产线索土地抵押land_mortgage_info: '2019-12-06'
+被告有财产线索中标项目WinBidInfo: '2019-12-10'
+原告有新增公告开庭公告court_notice: '2019-12-10'
+被告有财产线索土地出让land_purchase_info: '2019-12-10'
+被告有新增公告开庭公告court_notice: '2019-12-10'
+被告有财产线索土地转让land_transfer_info: '2019-12-07'

+ 7 - 0
requiments.txt

@@ -0,0 +1,7 @@
+elasticsearch==5.5.3
+pymongo==3.9.0
+PyMySQL==0.9.3
+PyYAML==5.1.2
+ruamel.yaml==0.16.5
+ruamel.yaml.clib==0.1.2
+urllib3==1.25.3

+ 2 - 0
run.sh

@@ -0,0 +1,2 @@
+#!/bin/bash
+/home/lxh_spider/.virtualenvs/daily_event/bin/python /home/lxh_spider/DailyEvent/manage.py