Pārlūkot izejas kodu

司法案件优化

xufei 4 gadi atpakaļ
vecāks
revīzija
a6a65cf3e1

+ 2 - 0
src/main/scala/com/winhc/bigdata/spark/jobs/CompanyForCid.scala

@@ -35,6 +35,8 @@ object CompanyForCid {
 //  winhc_eci_dev ods_company_dishonest_info new_cid,case_no
 
 //  winhc_eci_dev wenshu_detail_combine new_cid,case_no,cname,name_type
+
+//  winhc_eci_dev company_tax new_cid,year
   def main(args: Array[String]): Unit = {
     val Array(space, sourceTable, cols) = args
 

+ 1 - 0
src/main/scala/com/winhc/bigdata/spark/jobs/CompanyIncrForCid.scala

@@ -24,6 +24,7 @@ object CompanyIncrForCid {
   // winhc_eci_dev company_illegal_info new_cid,put_reason,put_date,put_department
   //  winhc_eci_dev company_finance new_cid,round,money
   // winhc_eci_dev company_dishonest_info new_cid,case_no
+  //winhc_eci_dev company_tax new_cid,year
   def main(args: Array[String]): Unit = {
     val Array(project, tableName, dupliCols) = args
     println(

+ 7 - 7
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationPre10.scala

@@ -94,7 +94,7 @@ case class JudicialCaseRelationPre10(s: SparkSession, project: String
          |                WHERE   length(case_no) >0 AND ds> '0'
          |                UNION ALL
          |                SELECT
-         |                        "7" AS flag
+         |                        "-1" AS flag
          |                        ,concat_ws('',cname,'被执行人') AS title
          |                        ,concat_ws('',case_type(case_no)) AS case_type
          |                        ,NULL AS case_reason
@@ -110,7 +110,7 @@ case class JudicialCaseRelationPre10(s: SparkSession, project: String
          |                WHERE   length(gist_id) >0 AND ds> '0'
          |                UNION ALL
          |                SELECT
-         |                        "7" AS flag
+         |                        "-1" AS flag
          |                        ,concat_ws('',cname,'被执行人') AS title
          |                        ,concat_ws('',case_type(case_no)) AS case_type
          |                        ,NULL AS case_reason
@@ -164,7 +164,7 @@ case class JudicialCaseRelationPre10(s: SparkSession, project: String
          |from (
          |      select
          |      COALESCE(C.judicase_id,md5(cleanup(A.case_no))) as judicase_id
-         |      ,"10" as flag
+         |      ,flag
          |      ,concat_ws('',A.cname,'被执行人') AS title
          |      ,concat_ws('',case_type(A.case_no)) as case_type
          |      ,NULL AS case_reason
@@ -178,19 +178,19 @@ case class JudicialCaseRelationPre10(s: SparkSession, project: String
          |      ,exec_money as case_amt
          |      ,row_number() over(partition by A.rowkey,A.case_no order by update_time desc) num
          |      from (
-         |        select case_no,court,cname,case_create_time,rowkey,update_time,card,exec_money
+         |        select case_no,court,cname,case_create_time,rowkey,update_time,card,exec_money,"10" as flag
          |        from $project.ads_company_zxr_person
          |        where length(case_no) > 0 and ds>'0'
          |        union all
-         |        select case_no,court,cname,case_create_time,rowkey,update_time,card,exec_money
+         |        select case_no,court,cname,case_create_time,rowkey,update_time,card,exec_money,"10" as flag
          |        from $project.inc_ads_company_zxr_person
          |        where length(case_no) > 0 and ds>'0'
          |        union all
-         |        select gist_id as case_no,court,cname,case_create_time,rowkey,update_time,card,exec_money
+         |        select gist_id as case_no,court,cname,case_create_time,rowkey,update_time,card,exec_money,"-1" as flag
          |        from $project.ads_company_zxr_person
          |        where length(gist_id) > 0 and ds>'0'
          |        union all
-         |        select gist_id as case_no,court,cname,case_create_time,rowkey,update_time,card,exec_money
+         |        select gist_id as case_no,court,cname,case_create_time,rowkey,update_time,card,exec_money,"-1" as flag
          |        from $project.inc_ads_company_zxr_person
          |        where length(gist_id) > 0 and ds>'0'
          |      ) A

+ 1 - 1
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationPre39.scala

@@ -116,7 +116,7 @@ case class JudicialCaseRelationPre39(s: SparkSession,
         s"""
            |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $project.ads_judicial_case_relation_pre PARTITION(ds='$inc_last_ds',tn='$table_name')
            |SELECT  judicase_id
-           |        ,flag
+           |        ,"-1" as flag
            |        ,title
            |        ,case_type
            |        ,case_reason

+ 79 - 60
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationPreNew.scala

@@ -303,8 +303,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
     spark.udf.register("name_aggs", new NameAggs(1000))
     spark.udf.register("case_reason", new CaseReasonAggs(1000))
     //预处理数据
-    val cols = Seq("flag", "date", "detail_id")
-
+    val cols = Seq("flag", "date", "detail_id","name")
     val t1 = s"$project.inc_ads_company_court_announcement"
     val t2 = s"ads_judicial_case_relation_pre"
     var t2_ds = ds
@@ -325,52 +324,42 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
     sql(
       s"""
          |INSERT ${if (isWindows) "INTO" else "OVERWRITE"} TABLE $project.$t3 partition (ds = '$t1_ds')
-         |SELECT  COALESCE(b.judicase_id,a.new_judicase_id) judicase_id
-         |        ,a.flag
-         |        ,a.title
-         |        ,a.case_type
-         |        ,a.case_reason
-         |        ,case_no_trim(a.case_no) as case_no
-         |        ,a.court_name
-         |        ,a.case_stage
-         |        ,case_label(a.flag) lable
-         |        ,map_2_json(${getStrToMap(cols)}) as detail
-         |        ,a.yg_name
-         |        ,a.bg_name
-         |        ,a.date
-         |        ,a.detail_id
-         |        ,a.case_amt
-         |FROM    (
-         |  select
-         |     judicase_id
-         |     ,flag
-         |     ,title
-         |     ,case_type
-         |     ,case_reason
-         |     ,case_no_trim(case_no) as case_no
-         |     ,court_name
-         |     ,case_stage
-         |     ,replace_char(yg_name) as yg_name
-         |     ,replace_char(bg_name) as bg_name
-         |     ,date
-         |     ,detail_id
-         |     ,case_amt
-         |     ,md5(CLEANUP(case_no_trim(case_no))) as new_judicase_id
-         |  from $project.$t2
-         |  where ds= '$t2_ds' and tn not in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
-         |        and case_no_trim(case_no) is not null
-         |        and date is not null and length(date) = 19
-         |) a
-         |LEFT JOIN (
-         |  select case_no_trim(case_no) as case_no,max(judicase_id) judicase_id
-         |  from $project.$t2
-         |  where ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
-         |  and case_no_trim(case_no) is not null
-         |  group by case_no
-         |) b
-         |ON  CLEANUP(a.case_no) = CLEANUP(b.case_no)
-         |union all
-         |SELECT   judicase_id
+         |SELECT
+         |   judicase_id
+         |   ,flag
+         |   ,title
+         |   ,case_type
+         |   ,case_reason
+         |   ,case_no
+         |   ,court_name
+         |   ,case_stage
+         |   ,lable
+         |   ,map_2_json(${getStrToMap(cols)}) as detail
+         |   ,yg_name
+         |   ,bg_name
+         |   ,date
+         |   ,detail_id
+         |   ,case_amt
+         |FROM
+         |(
+         |   SELECT  COALESCE(b.judicase_id,a.new_judicase_id) judicase_id
+         |           ,a.flag
+         |           ,a.title
+         |           ,a.case_type
+         |           ,a.case_reason
+         |           ,case_no_trim(a.case_no) as case_no
+         |           ,a.court_name
+         |           ,a.case_stage
+         |           ,case_label(a.flag) lable
+         |           ,a.yg_name
+         |           ,a.bg_name
+         |           ,a.date
+         |           ,a.detail_id
+         |           ,a.case_amt
+         |           ,a.bg_name as name
+         |   FROM    (
+         |     SELECT
+         |        judicase_id
          |        ,flag
          |        ,title
          |        ,case_type
@@ -378,17 +367,46 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
          |        ,case_no_trim(case_no) as case_no
          |        ,court_name
          |        ,case_stage
-         |        ,case_label(flag) lable
-         |        ,map_2_json(${getStrToMap(cols)}) as detail
          |        ,replace_char(yg_name) as yg_name
          |        ,replace_char(bg_name) as bg_name
          |        ,date
          |        ,detail_id
          |        ,case_amt
-         |from $project.$t2
-         |where ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
-         |      and case_no_trim(case_no) is not null
-         |      and date is not null and length(date) = 19
+         |        ,md5(CLEANUP(case_no_trim(case_no))) as new_judicase_id
+         |     FROM $project.$t2
+         |     WHERE ds= '$t2_ds' and tn not in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
+         |           and case_no_trim(case_no) is not null
+         |           and date is not null and length(date) = 19
+         |   ) a
+         |   LEFT JOIN (
+         |     SELECT case_no_trim(case_no) as case_no,max(judicase_id) judicase_id
+         |     FROM $project.$t2
+         |     WHERE ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
+         |     and case_no_trim(case_no) is not null
+         |     GROUP BY case_no
+         |   ) b
+         |   ON  CLEANUP(a.case_no) = CLEANUP(b.case_no)
+         |   UNION ALL
+         |   SELECT   judicase_id
+         |           ,flag
+         |           ,title
+         |           ,case_type
+         |           ,case_reason
+         |           ,case_no_trim(case_no) as case_no
+         |           ,court_name
+         |           ,case_stage
+         |           ,case_label(flag) lable
+         |           ,replace_char(yg_name) as yg_name
+         |           ,replace_char(bg_name) as bg_name
+         |           ,date
+         |           ,detail_id
+         |           ,case_amt
+         |           ,replace_char(bg_name) as name
+         |   FROM $project.$t2
+         |   WHERE ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
+         |         and case_no_trim(case_no) is not null
+         |         and date is not null and length(date) = 19
+         |)
          |""".stripMargin).show(10, false)
 
     //name 替换 cid
@@ -509,13 +527,14 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
          |(
          |SELECT  judicase_id
          |        ,max(first_title) title
-         |        ,max(case_type) case_type
+         |        ,concat_ws(',',collect_set(case_type)) case_type
          |        ,case_reason(case_reason,date,flag) case_reason
          |        ,concat_ws(',',collect_set(case_no)) case_no
          |        ,concat_ws(',',collect_set(court_name)) court_name
          |        ,last_stage(concat_ws(' ',collect_set(case_stage))) case_stage
-         |        ,concat_ws(',',max(case_type),collect_set(lable)) lable
-         |        ,concat('[',concat_ws(',',collect_set(detail)),']') detail
+         |        ,trim_black(concat_ws(',',max(case_type),collect_set(lable))) lable
+         |        -- ,concat('[',concat_ws(',',collect_set(detail)),']') detail
+         |        ,null as detail
          |        ,max(case_amt) AS case_amt
          |        ,max(date) AS date
          |        ,trim_black(concat_ws(',',collect_set(court_level))) court_level
@@ -564,12 +583,12 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
          |SELECT  md5(concat_ws('',judicase_id,CLEANUP(case_no))) id
          |        ,judicase_id
          |        ,max(first_title) title
-         |        ,max(case_type) case_type
+         |        ,case_type(max(case_no)) as case_type
          |        ,case_reason(case_reason,date,flag) case_reason
          |        ,case_no
          |        ,max(court_name) court_name
          |        ,case_stage(max(case_no)) as case_stage
-         |        ,concat_ws(',',max(case_type),collect_set(lable)) lable
+         |        ,trim_black(concat_ws(',',max(case_type),collect_set(lable))) lable
          |        ,concat('[',concat_ws(',',collect_set(detail)),']') detail
          |        ,max(last_date) last_date
          |        ,max(deleted) deleted
@@ -582,7 +601,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
          |        FROM    (
          |                   SELECT  *
          |                   FROM    $project.$t6
-         |                   WHERE   ds >= '$second_ds'
+         |                   WHERE   ds >= '$second_ds' AND length(lable) > 0
          |                )a JOIN
          |                (
          |                   select *

+ 10 - 8
src/main/scala/com/winhc/bigdata/spark/utils/BaseUtil.scala

@@ -184,6 +184,7 @@ object BaseUtil {
     }
     return hash
   }
+
   def MD5hash(content: String): String = {
     val md5 = MessageDigest.getInstance("MD5")
     val encoded = md5.digest((content).getBytes)
@@ -222,21 +223,22 @@ object BaseUtil {
 
   def caseStage(caseNo: String): String = {
     if (StringUtils.isNotBlank(caseNo)) {
-      if (StrUtil.containsAny(caseNo, "破申", "商申")) {
-        return "其它"
+      val casePre = caseType(caseNo).replaceAll("案件", "")
+      if (StrUtil.containsAny(caseNo, "破申", "商申") || casePre.contains("其它")) {
+        return "其它阶段"
       }
       if (StrUtil.containsAny(caseNo, "监", "抗", "再", "申", "提", "再")) {
-        return "再审"
+        return s"${casePre}再审"
       }
       if (StrUtil.containsAny(caseNo, "初")) {
-        return "一审"
+        return s"${casePre}一审"
       } else if (StrUtil.containsAny(caseNo, "终")) {
-        return "二审"
+        return s"${casePre}二审"
       } else if (StrUtil.containsAny(caseNo, "执")) {
-        return "执行"
+        return "首次执行"
       }
     }
-    "其它"
+    "其它阶段"
   }
 
   def lastStage(s: String): String = {
@@ -398,7 +400,7 @@ object BaseUtil {
     //    println(case_no_trim("(2015)怀执字第03601号号"))
     //    val seq = Seq("1", "3", "2", "7").mkString("\001")
     //    println(sortString(seq))
-    println(case_no_trim("(2019)鄂执7号"))
+    println(caseStage("(2019)鄂初7号"))
   }
 
 }