فهرست منبع

风险数据维度修正

lyb 3 سال پیش
والد
کامیت
30e39d13e8

+ 23 - 8
src/main/scala/com/winhc/bigdata/spark/ng/jobs/args_company_job.scala

@@ -176,6 +176,8 @@ object args_company_job {
 
     , args_company_job("bankruptcy_open_case", Seq("applicant", "respondent", "public_date")
       , rowkey_udf = "md5(cleanup(concat_ws('',applicant,respondent,split_date(cast(public_date as String)))))"
+      , id_user_defined_rowkey = true
+
       , is_super_filter = false
       , where = "is_json_str(applicant_info) and is_json_str(respondent_info) "
       , explode_args = Seq(
@@ -186,12 +188,14 @@ object args_company_job {
     )
 
     , args_company_job("bankruptcy_open_announcement", Seq("title", "case_no")
-      , rowkey_udf = "md5(cleanup(concat_ws('',title,case_no)))"
+      , rowkey_udf = "concat_ws('_', main_id, md5(cleanup(concat_ws('',title,case_no))) )"
+
       , is_super_filter = false
     )
 
     , args_company_job("bankruptcy_judgment_document", Seq("title", "case_no")
-      , rowkey_udf = "md5(cleanup(concat_ws('',title,case_no)))"
+      , rowkey_udf = "concat_ws('_', main_id, md5(cleanup(concat_ws('',title,case_no))) )"
+
       , is_super_filter = false
     )
 
@@ -205,30 +209,41 @@ object args_company_job {
       )
     )
 
-    , args_company_job("company_punishment_info", Seq("punish_number")
-      , rowkey_udf = "md5(cleanup(concat_ws('',punish_number)))"
+    , args_company_job("company_punishment_info", Seq("company_name", "punish_number")
+      , rowkey_udf = "md5(cleanup(concat_ws('',company_name, punish_number)))"
       , is_super_filter = false
     )
 
-    , args_company_job("company_punishment_info_creditchina", Seq("punish_number")
-      , rowkey_udf = "md5(cleanup(concat_ws('',punish_number)))"
+    , args_company_job("company_punishment_info_creditchina", Seq("company_name", "punish_number")
+      , rowkey_udf = "md5(cleanup(concat_ws('',company_name, punish_number)))"
       , is_super_filter = false
     )
 
     , args_company_job("company_brief_cancel_announcement", Seq("company_id")
       , rowkey_udf = "md5(cleanup(concat_ws('',company_id)))"
+      , id_user_defined_rowkey = true
       , is_super_filter = false
     )
 
     , args_company_job("company_brief_cancel_announcement_objection", Seq("main_id", "objection_date", "objection_content")
-      , rowkey_udf = "md5(cleanup(concat_ws('',main_id, split_date(cast(objection_date as String)), objection_content)))"
+      , rowkey_udf = "concat_ws('_', main_id, md5(cleanup(concat_ws('',main_id, split_date(cast(objection_date as String)), objection_content))) )"
       , is_super_filter = false
     )
 
     , args_company_job("company_brief_cancel_announcement_result", Seq("main_id", "announcement_apply_date", "brief_cancel_result")
-      , rowkey_udf = "md5(cleanup(concat_ws('',main_id, split_date(cast(announcement_apply_date as String)), brief_cancel_result)))"
+      , rowkey_udf = "concat_ws('_', main_id, md5(cleanup(concat_ws('',main_id, split_date(cast(announcement_apply_date as String)), brief_cancel_result))) )"
       , is_super_filter = false
     )
+
+    , args_company_job("company_lawsuit", Seq("case_no", "title")
+      , rowkey_udf = "md5(cleanup(concat_ws('', case_no, title)))"
+      , is_super_filter = false
+      , where = "is_json_str(litigant_info) and is_json_str(defendant_info) and is_json_str(plaintiff_info)"
+      , explode_args = Seq(
+        explode_args("plaintiff_info", "$.litigant_id", "plaintiff_info_id_explode")
+        , explode_args("defendant_info", "$.litigant_id", "defendant_info_id_explode")
+      )
+    )
   )
 
   def get_args_company_job(tn: String): args_company_job = {

+ 5 - 2
src/main/scala/com/winhc/bigdata/spark/ng/utils/export_company_index_2_es.scala

@@ -238,13 +238,16 @@ object export_company_index_2_es {
     , export_2_es_args("bankruptcy_open_case"
       , "rowkey,case_no,case_type,agency_court,applicant,applicant_info,respondent,respondent_info,public_date,deleted".split(","))
     , export_2_es_args("auction_tracking"
-      , "rowkey,auction_items_id,company_info,case_no,auction_title,initial_price,start_time,apply_count,url,deleted".split(","))
+      , "rowkey,auction_items_id,company_info,case_no,auction_title,initial_price,start_time,apply_count,url,deleted".split(",")
+      , handles = Seq(field_handle(field_name = "initial_price", handle = "round(initial_price, 2)")))
     , export_2_es_args("company_punishment_info"
-      , "rowkey,company_id,company_name,person_name,pid,type,department_name,decision_date,publish_date,deleted".split(","))
+      , "rowkey,company_id,punish_number,company_name,person_name,pid,type,department_name,decision_date,publish_date,deleted".split(","))
     , export_2_es_args("company_punishment_info_creditchina"
       , "rowkey,company_id,company_name,punish_number,punish_name,person_name,pid,decision_date,status,deleted".split(","))
     , export_2_es_args("company_brief_cancel_announcement"
       , "rowkey,company_id,deleted".split(","))
+    , export_2_es_args("company_lawsuit"
+      , "rowkey,case_id,doc_id,case_no,doc_type,case_type,case_reason_level2,case_reason_level3,case_reason_level4,case_reason,case_reason_levelnum,case_stage,case_amt,court_name,court_province,court_city,court_level,judge_date,judge_year,judge_result,title,spider_date,update_date,plaintiff_info,defendant_info,litigant_info,all_lawyer,pub_date,pub_year,deleted".split(","))
   )