|
@@ -0,0 +1,587 @@
|
|
|
+package com.winhc.bigdata.spark.ng.judicial
|
|
|
+
|
|
|
+import com.winhc.bigdata.spark.udf._
|
|
|
+import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
|
|
|
+import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
|
|
|
+import org.apache.commons.lang3.StringUtils
|
|
|
+import org.apache.spark.sql.SparkSession
|
|
|
+
|
|
|
+import scala.collection.mutable
|
|
|
+
|
|
|
+/**
|
|
|
+ * @Description:司法案件新版本聚合(迭代)
|
|
|
+ * @author π
|
|
|
+ * @date 2021/8/12 16:46
|
|
|
+ */
|
|
|
+
|
|
|
+case class args_case_v2(tableName: String = ""
|
|
|
+ , rowkey: String = "rowkey"
|
|
|
+ , cols_map: Map[String, String] = Map.empty
|
|
|
+ )
|
|
|
+
|
|
|
+object args_case_v2 {
|
|
|
+ val tn_mapping = Map[String, String](
|
|
|
+ "company_lawsuit" -> "0"
|
|
|
+ , "company_court_open_announcement" -> "1"
|
|
|
+ , "company_court_announcement" -> "2"
|
|
|
+ , "company_dishonest_info" -> "3"
|
|
|
+ , "company_send_announcement" -> "4"
|
|
|
+ , "company_zxr_restrict" -> "5"
|
|
|
+ , "company_zxr_final_case" -> "6"
|
|
|
+ , "company_zxr" -> "7"
|
|
|
+ , "company_court_register" -> "8"
|
|
|
+ )
|
|
|
+ val tab_args = Seq(
|
|
|
+ //文书(金额万元)
|
|
|
+ args_case_v2(tableName = "wenshu_detail_v2"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "0", "case_stage" -> "case_stage(case_no)"
|
|
|
+ , "yg_name" -> "plaintiff_info", "bg_name" -> "defendant_info", "date" -> "judge_date"
|
|
|
+ , "detail_id" -> "rowkey", "case_amt" -> "case_amt", "judge_amt" -> "judge_amt", "exec_amt" -> "null"
|
|
|
+ , "data" -> "map('date',judge_date,'party_title',party_title)"
|
|
|
+ , "all_name" -> "litigant_info"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '0', 'date',judge_date, 'detail_id', rowkey, 'doc_type', doc_type, 'judge_result', judge_result))"
|
|
|
+ ))
|
|
|
+ //开庭公告
|
|
|
+ , args_case_v2(tableName = "company_court_open_announcement"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "1", "title" -> "null", "case_type" -> "case_type(case_no)"
|
|
|
+ , "case_stage" -> "case_stage(case_no)", "court_name" -> "court", "case_reason" -> "case_reason"
|
|
|
+ , "yg_name" -> "plaintiff_info", "bg_name" -> "defendant_info", "date" -> "start_date"
|
|
|
+ , "detail_id" -> "rowkey", "case_amt" -> "null", "judge_amt" -> "null", "exec_amt" -> "null"
|
|
|
+ , "data" -> "map('date',start_date)"
|
|
|
+ , "all_name" -> "litigant_info"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '1', 'date',start_date, 'detail_id', rowkey, 'court', court, 'court_room',court_room))"
|
|
|
+ ))
|
|
|
+ //法院公告
|
|
|
+ , args_case_v2(tableName = "company_court_announcement"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "2", "title" -> "null", "case_type" -> "case_type(case_no)"
|
|
|
+ , "case_stage" -> "case_stage(case_no)", "court_name" -> "court_name", "case_reason" -> "null"
|
|
|
+ , "yg_name" -> "plaintiff_info", "bg_name" -> "litigant_info", "date" -> "concat_ws(' ',publish_date,'00:00:00')"
|
|
|
+ , "detail_id" -> "rowkey", "case_amt" -> "null", "judge_amt" -> "null", "exec_amt" -> "null"
|
|
|
+ , "data" -> "map('date',concat_ws(' ',publish_date,'00:00:00'))"
|
|
|
+ , "all_name" -> "null"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '2', 'date',concat_ws(' ',publish_date,'00:00:00'), 'detail_id', rowkey, 'announcement_type', announcement_type, 'court_name', court_name))"
|
|
|
+ ))
|
|
|
+ //失信人
|
|
|
+ , args_case_v2(tableName = "company_dishonest_info"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "3", "title" -> "null", "case_type" -> "case_type(case_no)"
|
|
|
+ , "case_stage" -> "case_stage(case_no)", "court_name" -> "court", "case_reason" -> "null"
|
|
|
+ , "yg_name" -> "null", "bg_name" -> " to_json(array(named_struct('litigant_id',COALESCE(keyno,''),'name',name)))", "date" -> "pub_date"
|
|
|
+ , "detail_id" -> "rowkey", "case_amt" -> "null", "judge_amt" -> "null", "exec_amt" -> "null"
|
|
|
+ , "data" -> "map('date',reg_time)"
|
|
|
+ , "all_name" -> "null"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '3', 'date', pub_date, 'detail_id', rowkey, 'name', array(named_struct('litigant_id',COALESCE(keyno,''),'name',name)), 'performance', performance, 'action_content', action_content ))"
|
|
|
+ ))
|
|
|
+ //送达公告
|
|
|
+ , args_case_v2(tableName = "company_send_announcement"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "4", "title" -> "null", "case_type" -> "case_type(case_no)"
|
|
|
+ , "case_stage" -> "case_stage(case_no)", "court_name" -> "court", "case_reason" -> "case_reason"
|
|
|
+ , "yg_name" -> "plaintiff_info", "bg_name" -> "defendant_info", "date" -> "start_date"
|
|
|
+ , "detail_id" -> "rowkey", "case_amt" -> "null", "judge_amt" -> "null", "exec_amt" -> "null"
|
|
|
+ , "data" -> "map('date',start_date)"
|
|
|
+ , "all_name" -> "litigant_info"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '4', 'date',start_date, 'detail_id', rowkey, 'defendant_info', json_array(defendant_info), 'plaintiff_info', json_array(plaintiff_info)))"
|
|
|
+ ))
|
|
|
+ //限高
|
|
|
+ , args_case_v2(tableName = "company_zxr_restrict"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "5", "title" -> "null", "case_type" -> "case_type(case_no)"
|
|
|
+ , "case_stage" -> "case_stage(case_no)", "court_name" -> "court_name", "case_reason" -> "null", "yg_name" -> "null"
|
|
|
+ , "bg_name" -> "to_json(array(named_struct('litigant_id',COALESCE(company_id,'') ,'name',COALESCE(company_name,'')) ,named_struct('litigant_id',COALESCE(pid,''),'name',COALESCE(person_name,'')) ))"
|
|
|
+ , "date" -> "case_create_time", "detail_id" -> "rowkey", "case_amt" -> "null", "judge_amt" -> "null", "exec_amt" -> "null"
|
|
|
+ , "data" -> "map('date',case_create_time)"
|
|
|
+ , "all_name" -> "null"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '5', 'date', case_create_time, 'detail_id', rowkey, 'person', array(named_struct('litigant_id',COALESCE(pid,''),'person_name',person_name)), 'company', array(named_struct('litigant_id', company_id, 'company_name',company_name)) ))"
|
|
|
+ ))
|
|
|
+ //终本
|
|
|
+ , args_case_v2(tableName = "company_zxr_final_case"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "6", "title" -> "null", "case_type" -> "case_type(case_no)"
|
|
|
+ , "case_stage" -> "case_stage(case_no)", "court_name" -> "court_name", "case_reason" -> "null"
|
|
|
+ , "yg_name" -> "null", "bg_name" -> "to_json(array(named_struct('litigant_id',COALESCE(keyno,''),'name',name)))"
|
|
|
+ , "date" -> "case_create_time"
|
|
|
+ , "detail_id" -> "rowkey", "case_amt" -> "null", "judge_amt" -> "null", "exec_amt" -> "null"
|
|
|
+ , "data" -> "map('date',case_create_time)"
|
|
|
+ , "all_name" -> "null"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '6', 'date', case_create_time, 'detail_id', rowkey, 'name', array(named_struct('litigant_id',COALESCE(keyno,''), 'name',name)), 'exec_amount', amt_div(exec_amount, 10000), 'no_exec_amount', amt_div(no_exec_amount, 10000) ))"
|
|
|
+ ))
|
|
|
+ //被执
|
|
|
+ , args_case_v2(tableName = "company_zxr"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "7", "title" -> "null", "case_type" -> "case_type(case_no)"
|
|
|
+ , "case_stage" -> "case_stage(case_no)", "court_name" -> "court", "case_reason" -> "null"
|
|
|
+ , "yg_name" -> "null", "bg_name" -> "to_json(array(named_struct('litigant_id',COALESCE(keyno,''),'name',name)))", "date" -> "case_create_time"
|
|
|
+ , "detail_id" -> "rowkey", "case_amt" -> "null", "judge_amt" -> "null", "exec_amt" -> "amt_div(exec_money,10000)"
|
|
|
+ , "data" -> "map('date', case_create_time, 'exec_info', to_json(array(named_struct('litigant_id',COALESCE(keyno,''),'name',name,'exec_money',amt_div(exec_money,10000),'date',case_create_time ))) )"
|
|
|
+ , "all_name" -> "null"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '7', 'date', case_create_time, 'detail_id', rowkey, 'name', array(named_struct('litigant_id',COALESCE(keyno,''),'name',name)), 'exec_money', amt_div(exec_money,10000) ))"
|
|
|
+ ))
|
|
|
+ //立案信息
|
|
|
+ , args_case_v2(tableName = "company_court_register"
|
|
|
+ , cols_map = Map[String, String]("flag" -> "8", "title" -> "null", "case_type" -> "case_type(case_no)"
|
|
|
+ , "case_stage" -> "case_stage(case_no)", "court_name" -> "court", "case_reason" -> "case_reason"
|
|
|
+ , "yg_name" -> "plaintiff_info", "bg_name" -> "defendant_info", "date" -> "filing_date"
|
|
|
+ , "detail_id" -> "rowkey", "case_amt" -> "null", "judge_amt" -> "null", "exec_amt" -> "null"
|
|
|
+ , "data" -> "map('date',filing_date)"
|
|
|
+ , "all_name" -> "litigant_info"
|
|
|
+ , "detail_info" -> "to_json(named_struct('flag', '8', 'date',filing_date, 'detail_id', rowkey, 'court', court, 'judge', judge))"
|
|
|
+ ))
|
|
|
+ )
|
|
|
+
|
|
|
+ def get_job_args(tn: String): args_case_v2 = {
|
|
|
+ tab_args.find(p => tn.equals(p.tableName)).getOrElse(throw new NullPointerException("tn is not fount"))
|
|
|
+ }
|
|
|
+
|
|
|
+ def get_job_args(): args_case_v2 = {
|
|
|
+ args_case_v2()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+object JudicialCaseRelationAggsV2 {
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+ var project = ""
|
|
|
+ var tn = ""
|
|
|
+ var c = ""
|
|
|
+ if (args.length == 3) {
|
|
|
+ val Array(p1, p2, p3) = args
|
|
|
+ project = p1
|
|
|
+ tn = p2
|
|
|
+ c = p3
|
|
|
+ } else if (args.length == 2) {
|
|
|
+ val Array(p1, p2) = args
|
|
|
+ project = p1
|
|
|
+ c = p2
|
|
|
+ } else {
|
|
|
+ println("please check project tn c!")
|
|
|
+ sys.exit(-1)
|
|
|
+ }
|
|
|
+ println(
|
|
|
+ s"""
|
|
|
+ |project: $project
|
|
|
+ |tn: $tn
|
|
|
+ |c: $c
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+ val config = mutable.Map(
|
|
|
+ "spark.hadoop.odps.project.name" -> s"$project",
|
|
|
+ "spark.hadoop.odps.spark.local.partition.amt" -> "10000"
|
|
|
+ )
|
|
|
+ val spark: SparkSession = SparkUtils.InitEnv(this.getClass.getSimpleName, config)
|
|
|
+ if (StringUtils.isBlank(tn)) {
|
|
|
+ tn = "wenshu_detail_v2"
|
|
|
+ }
|
|
|
+ if ("all".equals(tn)) {
|
|
|
+ args_case_v2.tab_args.map(_.tableName).foreach(t => {
|
|
|
+ run(project, t, c, spark)
|
|
|
+ })
|
|
|
+ } else {
|
|
|
+ run(project, tn, c, spark)
|
|
|
+ }
|
|
|
+
|
|
|
+ spark.stop()
|
|
|
+ }
|
|
|
+
|
|
|
+ private def run(project: String, tn: String, c: String, spark: SparkSession) = {
|
|
|
+ val r = JudicialCaseRelationAggsV2(spark, project, args_case_v2.get_job_args(tn))
|
|
|
+ c match {
|
|
|
+ case "pre_calc" => r.pre_calc()
|
|
|
+ case "calc_mapping" => r.calc_mapping()
|
|
|
+ case "calc" => r.calc()
|
|
|
+ case _ => {
|
|
|
+ println("not fun to run !")
|
|
|
+ sys.exit(-1)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+case class JudicialCaseRelationAggsV2(s: SparkSession, project: String, args_case_v2: args_case_v2
|
|
|
+ ) extends LoggingUtils with CompanyMapping with BaseFunc with CourtRank {
|
|
|
+ override protected val spark: SparkSession = s
|
|
|
+
|
|
|
+ //预处理表
|
|
|
+ val ads_judicial_case_relation_pre = s" $project.ads_judicial_case_relation_pre_v2"
|
|
|
+ //替换id表
|
|
|
+ val ads_judicial_case_relation_id = s" $project.ads_judicial_case_relation_id_v2"
|
|
|
+ //id映射表
|
|
|
+ val ads_judicial_case_id_mapping = s" $project.ads_judicial_case_id_mapping"
|
|
|
+ //id映射表(原始表)
|
|
|
+ val ods_judicial_case_id_mapping = s" $project.ods_judicial_case_id_mapping"
|
|
|
+ //主表
|
|
|
+ val ads_judicial_case_relation_r1 = s" $project.ads_judicial_case_relation_r1_v2"
|
|
|
+ // //明细表
|
|
|
+ // val ads_judicial_case_relation_r2 = s" $project.ads_judicial_case_relation_r2"
|
|
|
+ //明细表(增强)
|
|
|
+ val ads_judicial_case_relation_r3 = s" $project.ads_judicial_case_relation_r3_v2"
|
|
|
+ //案件移除表
|
|
|
+ val ads_judicial_case_id_mapping_r1_deleted = s" $project.ads_judicial_case_id_mapping_r1_deleted"
|
|
|
+ //案件移除表
|
|
|
+ val ads_judicial_case_id_mapping_r3_deleted = s" $project.ads_judicial_case_id_mapping_r3_deleted"
|
|
|
+ //案件关系表
|
|
|
+ val bds_judicial_case_relation = s" $project.bds_judicial_case_relation"
|
|
|
+ val ads_judicial_case_node_kafka = s" $project.ads_judicial_case_node_kafka"
|
|
|
+ val ads_judicial_case_relation_kafka = s" $project.ads_judicial_case_relation_kafka"
|
|
|
+
|
|
|
+ val ads_judicial_case_node = s" $project.ads_judicial_case_node"
|
|
|
+ val ads_judicial_case_relation = s" $project.ads_judicial_case_relation"
|
|
|
+
|
|
|
+ //黑名单表
|
|
|
+ val ads_case_id_big = s"winhc_ng.ads_case_id_big"
|
|
|
+
|
|
|
+ val update = s"update"
|
|
|
+ val incr = s"incr"
|
|
|
+
|
|
|
+ private val cols_map: Map[String, String] = args_case_v2.cols_map
|
|
|
+ private val rowkey: String = args_case_v2.rowkey
|
|
|
+ private val tableName: String = args_case_v2.tableName
|
|
|
+
|
|
|
+ val ads_table = s" $project.ads_$tableName"
|
|
|
+ val inc_ads_table = s" $project.inc_ads_$tableName"
|
|
|
+
|
|
|
+
|
|
|
+ val pre_cols = getColumns(ads_judicial_case_relation_pre).diff(Seq("ds", "tn"))
|
|
|
+ var last_ds = BaseUtil.getPartion(ads_judicial_case_relation_pre, tableName, spark)
|
|
|
+ val calc_ds = BaseUtil.getYesterday()
|
|
|
+
|
|
|
+ if (calc_ds.equals(last_ds)) {
|
|
|
+ last_ds = BaseUtil.getSecondPartion(ads_judicial_case_relation_pre, tableName, spark)
|
|
|
+ }
|
|
|
+ val is_incr = if (StringUtils.isBlank(last_ds)) false else true
|
|
|
+
|
|
|
+ val cols = pre_cols.map(c => {
|
|
|
+ if (cols_map.contains(c)) {
|
|
|
+ s"${cols_map(c)} as $c"
|
|
|
+ } else c
|
|
|
+ })
|
|
|
+
|
|
|
+ case_no_trim_udf_v2()
|
|
|
+ prepareFunctions(spark)
|
|
|
+
|
|
|
+ val sort = get_partition_order_by()
|
|
|
+
|
|
|
+ def pre_calc(): Unit = {
|
|
|
+ var all_sql = ""
|
|
|
+ if (!is_incr) {
|
|
|
+ all_sql =
|
|
|
+ s"""
|
|
|
+ |SELECT *
|
|
|
+ |FROM $ads_table
|
|
|
+ |WHERE ${if (is_incr) "ds = -1" else "ds > 0"}
|
|
|
+ |UNION ALL
|
|
|
+ |""".stripMargin
|
|
|
+ }
|
|
|
+
|
|
|
+ //裁判文书
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $ads_judicial_case_relation_pre PARTITION(ds='$calc_ds',tn='$tableName')
|
|
|
+ |SELECT
|
|
|
+ |${pre_cols.mkString(",")}
|
|
|
+ |from (
|
|
|
+ | SELECT ${cols.mkString(",")}
|
|
|
+ | ,ROW_NUMBER() OVER(PARTITION BY $rowkey ORDER BY $sort) AS num
|
|
|
+ | from (
|
|
|
+ | $all_sql
|
|
|
+ | SELECT *
|
|
|
+ | FROM $inc_ads_table
|
|
|
+ | WHERE ${if (is_incr) s"ds > $last_ds" else "ds > 0"}
|
|
|
+ | )
|
|
|
+ | )
|
|
|
+ |WHERE num = 1
|
|
|
+ |${if (isWindows) "LIMIT 1000" else ""}
|
|
|
+ |""".stripMargin).show(100, false)
|
|
|
+
|
|
|
+ //分区不存在,插入空分区
|
|
|
+ addEmptyPartitionOrSkipPlus(ads_judicial_case_relation_pre, calc_ds, tableName)
|
|
|
+ }
|
|
|
+
|
|
|
+ def calc_mapping(): Unit = {
|
|
|
+ //ods 转换 ads
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $ads_judicial_case_id_mapping PARTITION(ds='$calc_ds')
|
|
|
+ |SELECT id,rowkey,tn
|
|
|
+ |FROM (
|
|
|
+ | SELECT component_id id
|
|
|
+ | ,rowkey
|
|
|
+ | ,flag_tn(tn) tn
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY rowkey,flag_tn(tn) ORDER BY ds DESC, update_time DESC) num
|
|
|
+ | FROM $ods_judicial_case_id_mapping
|
|
|
+ | WHERE ds = '$calc_ds'
|
|
|
+ |)
|
|
|
+ |WHERE num = 1
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+ //可能有重复 TODO
|
|
|
+ //主表删除
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $ads_judicial_case_id_mapping_r1_deleted PARTITION(ds='$calc_ds')
|
|
|
+ |SELECT new_id, old_id, rowkey, tn, deleted
|
|
|
+ |FROM (
|
|
|
+ | SELECT
|
|
|
+ | a.id new_id
|
|
|
+ | ,b.id old_id
|
|
|
+ | ,a.rowkey
|
|
|
+ | ,a.tn
|
|
|
+ | ,1 AS deleted
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY b.id ORDER BY ds DESC) num2
|
|
|
+ | FROM (
|
|
|
+ | SELECT *
|
|
|
+ | ,md5(concat_ws('',rowkey,tn)) row_id
|
|
|
+ | FROM $ads_judicial_case_id_mapping
|
|
|
+ | WHERE ds = '$calc_ds'
|
|
|
+ | ) a
|
|
|
+ | JOIN (
|
|
|
+ | SELECT id, row_id, num
|
|
|
+ | FROM (
|
|
|
+ | SELECT id
|
|
|
+ | ,md5(concat_ws('',rowkey,tn)) row_id
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY rowkey,tn ORDER BY ds DESC) num
|
|
|
+ | FROM $ads_judicial_case_id_mapping
|
|
|
+ | WHERE ds < '$calc_ds'
|
|
|
+ | )
|
|
|
+ | WHERE num = 1
|
|
|
+ | ) b
|
|
|
+ | ON a.row_id = b.row_id
|
|
|
+ | WHERE a.id <> b.id
|
|
|
+ |)
|
|
|
+ |WHERE num2 = 1
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+ //明细表删除
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $ads_judicial_case_id_mapping_r3_deleted PARTITION(ds='$calc_ds')
|
|
|
+ |SELECT id, rowkey, tn, deleted
|
|
|
+ |FROM (
|
|
|
+ | SELECT b.id,a.rowkey,a.tn, 1 as deleted
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY b.id ORDER BY tn DESC) num2
|
|
|
+ | FROM (
|
|
|
+ | SELECT rowkey, tn, old_id
|
|
|
+ | FROM $ads_judicial_case_id_mapping_r1_deleted
|
|
|
+ | WHERE ds = '$calc_ds'
|
|
|
+ | ) a
|
|
|
+ | JOIN (
|
|
|
+ | SELECT id, judicase_id
|
|
|
+ | FROM (
|
|
|
+ | SELECT id, judicase_id
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY id ORDER BY ds DESC) num
|
|
|
+ | FROM $ads_judicial_case_relation_r3
|
|
|
+ | WHERE ds < '$calc_ds'
|
|
|
+ | )
|
|
|
+ | WHERE num = 1
|
|
|
+ | ) b
|
|
|
+ | ON a.old_id = b.judicase_id
|
|
|
+ |)
|
|
|
+ |WHERE num2 = 1
|
|
|
+ |""".stripMargin)
|
|
|
+
|
|
|
+ //分区不存在
|
|
|
+ addEmptyPartitionOrSkip(ads_judicial_case_id_mapping_r1_deleted, calc_ds)
|
|
|
+ addEmptyPartitionOrSkip(ads_judicial_case_id_mapping_r3_deleted, calc_ds)
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ def calc(): Unit = {
|
|
|
+ prepareFunctions(spark)
|
|
|
+ case_no_trim_udf_v2()
|
|
|
+ registerCourtRank()
|
|
|
+ spark.udf.register("name_aggs", new NameAggsPlusV2(1000))
|
|
|
+ spark.udf.register("case_reason", new CaseReasonAggs(1000))
|
|
|
+ spark.udf.register("all_name_plus_v2", new AllNamePlusV2(1000))
|
|
|
+ spark.udf.register("case_amt_plus_v2", new CaseAmtAggsPlusV2(1000))
|
|
|
+
|
|
|
+ //detail 文书id
|
|
|
+ //替换司法案件id
|
|
|
+// sql(
|
|
|
+// s"""
|
|
|
+// |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $ads_judicial_case_relation_id PARTITION(ds='$calc_ds')
|
|
|
+// |SELECT a.id
|
|
|
+// | ,b.flag,b.title,b.case_type,b.case_reason,b.case_no,b.court_name,b.case_stage,b.lable,b.detail
|
|
|
+// | ,b.yg_name,b.bg_name,b.all_name,b.date,b.detail_id,b.case_amt,b.case_id,b.tn,b.data
|
|
|
+// |FROM (
|
|
|
+// | SELECT id, concat_ws('',rowkey,tn) row_id
|
|
|
+// | FROM $ads_judicial_case_id_mapping
|
|
|
+// | WHERE ds = '$calc_ds'
|
|
|
+// | ) a
|
|
|
+// |JOIN (
|
|
|
+// | SELECT flag
|
|
|
+// | ,title
|
|
|
+// | ,case_type(case_no) case_type
|
|
|
+// | ,adjust_reason(case_reason) case_reason
|
|
|
+// | ,case_no_trim(case_no) as case_no
|
|
|
+// | ,court_name
|
|
|
+// | ,case_stage(case_no) case_stage
|
|
|
+// | ,case_label(flag) lable
|
|
|
+// | ,to_json(named_struct('flag', flag, 'date',date, 'detail_id', detail_id, 'name', json_array(bg_name)) ) detail
|
|
|
+// | ,yg_name
|
|
|
+// | ,bg_name
|
|
|
+// | ,merge_json(yg_name, bg_name, all_name) all_name
|
|
|
+// | ,date
|
|
|
+// | ,detail_id
|
|
|
+// | ,case_amt
|
|
|
+// | ,case_id
|
|
|
+// | ,tn
|
|
|
+// | ,data
|
|
|
+// | ,row_id
|
|
|
+// | FROM (
|
|
|
+// | SELECT *, concat_ws('',detail_id,tn) row_id
|
|
|
+// | ,ROW_NUMBER() OVER (PARTITION BY detail_id,tn ORDER BY ds DESC) num
|
|
|
+// | FROM $ads_judicial_case_relation_pre
|
|
|
+// | WHERE ds > 0 AND case_no_trim(case_no) is not null AND date is not null
|
|
|
+// | )
|
|
|
+// | WHERE num = 1
|
|
|
+// | ) b
|
|
|
+// |ON a.row_id = b.row_id
|
|
|
+// |""".stripMargin).show(20, false)
|
|
|
+
|
|
|
+ //明细表
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $ads_judicial_case_relation_r3 PARTITION(ds='$calc_ds')
|
|
|
+ |SELECT
|
|
|
+ | id,
|
|
|
+ | judicase_id,
|
|
|
+ | title ,
|
|
|
+ | case_type ,
|
|
|
+ | case_reason ,
|
|
|
+ | case_no ,
|
|
|
+ | court_name ,
|
|
|
+ | case_stage ,
|
|
|
+ | lable ,
|
|
|
+ | detail ,
|
|
|
+ | name_aggs['yg_name'] yg_name,
|
|
|
+ | name_aggs['bg_name'] bg_name,
|
|
|
+ | last_date ,
|
|
|
+ | 0 deleted ,
|
|
|
+ | all_name ,
|
|
|
+ | court_level,
|
|
|
+ | case_amt,
|
|
|
+ | judge_amt,
|
|
|
+ | exec_info
|
|
|
+ |FROM
|
|
|
+ |(
|
|
|
+ |SELECT md5(concat_ws('',concat_ws('',judicase_id),CLEANUP(case_no))) id
|
|
|
+ | ,judicase_id
|
|
|
+ | ,max(title) title
|
|
|
+ | ,case_type(max(case_no)) as case_type
|
|
|
+ | ,case_reason(case_reason,date,flag) case_reason
|
|
|
+ | ,case_no
|
|
|
+ | ,concat_ws(',',collect_set(court_name)) court_name
|
|
|
+ | ,case_stage(max(case_no)) as case_stage
|
|
|
+ | ,trim_black(concat_ws(',',max(case_type),collect_set(lable))) lable
|
|
|
+ | ,concat('[',concat_ws(',',collect_set(detail)),']') detail
|
|
|
+ | ,max(date) last_date
|
|
|
+ | ,name_aggs(yg_name,bg_name,flag,data['date']) name_aggs
|
|
|
+ | ,all_name_plus_v2(all_name) all_name
|
|
|
+ | ,trim_black(concat_ws(',',collect_set(court_level))) court_level
|
|
|
+ | ,max(case_amt) as case_amt
|
|
|
+ | ,max(judge_amt) as judge_amt
|
|
|
+ | ,case_amt_plus_v2(data['exec_info']) as exec_info
|
|
|
+ |FROM (
|
|
|
+ | SELECT a.*,court_level(court_name) court_level
|
|
|
+ | FROM (
|
|
|
+ | SELECT judicase_id
|
|
|
+ | ,flag
|
|
|
+ | ,title
|
|
|
+ | ,case_type(case_no) case_type
|
|
|
+ | ,adjust_reason(case_reason) case_reason
|
|
|
+ | ,case_no_trim(case_no) as case_no
|
|
|
+ | ,court_name
|
|
|
+ | ,case_stage(case_no) case_stage
|
|
|
+ | ,case_label(flag) lable
|
|
|
+ | ,detail
|
|
|
+ | ,yg_name
|
|
|
+ | ,bg_name
|
|
|
+ | ,all_name
|
|
|
+ | ,date
|
|
|
+ | ,detail_id
|
|
|
+ | ,case_amt
|
|
|
+ | ,judge_amt
|
|
|
+ | ,tn
|
|
|
+ | ,data
|
|
|
+ | FROM $ads_judicial_case_relation_id
|
|
|
+ | WHERE ds = '$calc_ds' AND length(case_label(flag)) > 0 AND case_no_trim(case_no) is not null AND date is not null
|
|
|
+ | )a
|
|
|
+ |)
|
|
|
+ |GROUP BY judicase_id
|
|
|
+ | ,case_no
|
|
|
+ |) x
|
|
|
+ |""".stripMargin).show(10, false)
|
|
|
+
|
|
|
+ //司法案件主表
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
+ |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $ads_judicial_case_relation_r1 PARTITION(ds='$calc_ds')
|
|
|
+ |SELECT
|
|
|
+ | judicase_id,
|
|
|
+ | title ,
|
|
|
+ | case_type ,
|
|
|
+ | case_reason ,
|
|
|
+ | case_no ,
|
|
|
+ | court_name ,
|
|
|
+ | case_stage ,
|
|
|
+ | lable ,
|
|
|
+ | name_aggs['yg_name'] yg_name,
|
|
|
+ | name_aggs['bg_name'] bg_name,
|
|
|
+ | all_name,
|
|
|
+ | case_info ,
|
|
|
+ | judge_info ,
|
|
|
+ | exec_info ,
|
|
|
+ | date ,
|
|
|
+ | court_level ,
|
|
|
+ | 0 deleted
|
|
|
+ |FROM
|
|
|
+ |(
|
|
|
+ |SELECT judicase_id
|
|
|
+ | ,max(title) title
|
|
|
+ | ,concat_ws(',',collect_set(case_type)) case_type
|
|
|
+ | ,case_reason(case_reason,date,'0') case_reason
|
|
|
+ | ,concat_ws(',',collect_set(case_no)) case_no
|
|
|
+ | ,trim_black(concat_ws(',',collect_set(court_name))) court_name
|
|
|
+ | ,max(last_stage) case_stage
|
|
|
+ | ,trim_black(concat_ws(',', collect_set(lable)) ) lable
|
|
|
+ | -- ,max(first_case_amt) case_amt
|
|
|
+ | ,max(date) AS date
|
|
|
+ | ,trim_black(concat_ws(',',collect_set(court_level))) court_level
|
|
|
+ | ,name_aggs(yg_name,bg_name,'0',date) name_aggs
|
|
|
+ | ,all_name_plus_v2(all_name) all_name
|
|
|
+ | ,amt_merge(concat_ws('&',collect_set(case_info))) case_info
|
|
|
+ | ,amt_merge(concat_ws('&',collect_set(judge_info))) judge_info
|
|
|
+ | ,case_amt_plus_v2(exec_info) as exec_info
|
|
|
+ |FROM (
|
|
|
+ | SELECT a.*
|
|
|
+ | FROM (
|
|
|
+ | SELECT judicase_id,title,case_type,case_reason,case_no,court_name,case_stage,lable,yg_name,bg_name,all_name,date,case_amt,judge_amt,exec_info
|
|
|
+ | ,court_level(court_name) court_level
|
|
|
+ | ,concat_ws('|',case_stage,coalesce(case_amt,0)) as case_info
|
|
|
+ | ,concat_ws('|',case_stage,coalesce(judge_amt,0)) as judge_info
|
|
|
+ | ,first_value(case_stage) OVER (PARTITION BY judicase_id ORDER BY date DESC ) AS last_stage
|
|
|
+ | FROM $ads_judicial_case_relation_r3
|
|
|
+ | WHERE ds = '$calc_ds'
|
|
|
+ | ) a
|
|
|
+ | )
|
|
|
+ |GROUP BY judicase_id
|
|
|
+ |)x
|
|
|
+ |""".stripMargin).show(20, false)
|
|
|
+
|
|
|
+ //分区不存在,插入空分区
|
|
|
+ addEmptyPartitionOrSkip(ads_judicial_case_relation_r1, calc_ds)
|
|
|
+ addEmptyPartitionOrSkip(ads_judicial_case_relation_r3, calc_ds)
|
|
|
+ }
|
|
|
+
|
|
|
+ private def get_partition_order_by(): String = {
|
|
|
+ if (pre_cols.contains("update_time") || pre_cols.contains("update_date")) {
|
|
|
+ " ds DESC,update_time DESC "
|
|
|
+ } else {
|
|
|
+ " ds DESC "
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ def calc_last_ds(tabName: String, default: String = "0"): String = {
|
|
|
+ var d1 = getLastPartitionsOrElse(tabName, default)
|
|
|
+ val d2 = BaseUtil.getYesterday()
|
|
|
+ if (d1.equals(d2)) {
|
|
|
+ d1 = getSecondLastPartitionOrElse(tabName, default)
|
|
|
+ }
|
|
|
+ d1
|
|
|
+ }
|
|
|
+}
|