Quellcode durchsuchen

fix: 司法案件关联优化

许家凯 vor 3 Jahren
Ursprung
Commit
4691967374

+ 4 - 3
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationDebtorRelation.scala

@@ -5,7 +5,8 @@ import com.winhc.bigdata.spark.utils.BaseUtil.isWindows
 import com.winhc.bigdata.spark.utils.{BaseUtil, LoggingUtils, SparkUtils}
 import org.apache.commons.lang3.StringUtils
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.SparkSession
+
 import scala.collection.mutable
 import scala.collection.mutable.ListBuffer
 
@@ -214,8 +215,8 @@ case class JudicialCaseRelationDebtorRelation(s: SparkSession, project: String,
          |,bg_name_x as bg_name
          |,deleted
          |FROM winhc_eci_dev.ads_judicial_case_relation_r1
-         |LATERAL VIEW explode(split(yg_name,',')) a AS yg_name_x
-         |LATERAL VIEW explode(split(bg_name,',')) b AS bg_name_x
+         |LATERAL VIEW OUTER explode(split(yg_name,',')) a AS yg_name_x
+         |LATERAL VIEW OUTER explode(split(bg_name,',')) b AS bg_name_x
          |WHERE compare_name(yg_name,bg_name)
          |AND (lable like '%被执行人%' or lable like '%限制高消费%'  or lable like '%失信人%')
          |AND  LENGTH(cleanup(yg_name_x)) > 4

+ 1 - 1
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationPreNew.scala

@@ -462,7 +462,7 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
              |                                FROM    $project.$t3
              |                                WHERE   ds = '$t1_ds' AND length(cleanup(bg_name)) >4
              |                                ) a
-             |                        LATERAL VIEW explode(split(names,',')) t AS name
+             |                        LATERAL VIEW OUTER explode(split(names,',')) t AS name
              |                        ) e
              |                JOIN (
              |                        SELECT

+ 53 - 10
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelation_CaseAgg.scala

@@ -330,7 +330,7 @@ case class JudicialCaseRelation_CaseAgg(s: SparkSession,
     sql(
       s"""
          |SELECT  *
-         |FROM    $tmp_tab lateral view explode(split(connect_case_no,'\\n')) t as single_connect_case_no
+         |FROM    $tmp_tab lateral view OUTER explode(split(connect_case_no,'\\n')) t as single_connect_case_no
          |""".stripMargin)
       //      .cache()
       .createTempView(s"explode_$tmp_tab")
@@ -392,7 +392,7 @@ case class JudicialCaseRelation_CaseAgg(s: SparkSession,
          |                    ,case_attribute
          |                    ,party
          |            FROM    dwd_judicial_case_tmp
-         |            LATERAL VIEW explode(split(concat_ws('\\n',case_attribute['yg_name'],case_attribute['bg_name']) ,'\\n')) t AS party
+         |            LATERAL VIEW OUTER explode(split(concat_ws('\\n',case_attribute['yg_name'],case_attribute['bg_name']) ,'\\n')) t AS party
          |            WHERE   main_case_no = 1
          |        ) AS t1
          |WHERE   length(t1.party) > 4
@@ -407,7 +407,7 @@ case class JudicialCaseRelation_CaseAgg(s: SparkSession,
          |                    ,case_attribute
          |                    ,party
          |            FROM    dwd_judicial_case_tmp
-         |            LATERAL VIEW explode(split(concat_ws('\\n',case_attribute['yg_name'],case_attribute['bg_name']) ,'\\n')) t AS party
+         |            LATERAL VIEW OUTER explode(split(concat_ws('\\n',case_attribute['yg_name'],case_attribute['bg_name']) ,'\\n')) t AS party
          |            WHERE   tn in ('company_dishonest_info','company_dishonest_info_person','company_zxr','company_zxr_person')
          |        ) AS t1
          |GROUP BY case_no
@@ -440,21 +440,64 @@ case class JudicialCaseRelation_CaseAgg(s: SparkSession,
          |        ,t2.case_no AS case_no_2
          |        ,t1.tn AS tn_1
          |        ,t2.tn AS tn_2
-         |        ,1 as connect_type
-         |        ,str_sort(concat_ws('',t1.id,t1.tn),concat_ws('',t2.id,t2.tn)) as xjk_sorted
+         |        ,1 AS connect_type
+         |        ,str_sort(
+         |            concat_ws('',t1.id,t1.tn)
+         |            ,concat_ws('',t2.id,t2.tn)
+         |        ) AS xjk_sorted
          |FROM    (
-         |select * from dwd_judicial_case_tmp where main_case_no = 1 and tn = 'wenshu_detail'
-         |) AS t1
+         |            SELECT  *
+         |            FROM    dwd_judicial_case_tmp
+         |            WHERE   main_case_no = 1
+         |            AND     tn = 'wenshu_detail'
+         |        ) AS t1
          |FULL JOIN (
+         |              SELECT  *
+         |              FROM    dwd_judicial_case_tmp
+         |              WHERE   main_case_no = 0
+         |              UNION ALL
+         |              SELECT  *
+         |              FROM    dwd_judicial_case_tmp
+         |              WHERE   main_case_no = 1
+         |              AND     tn <> 'wenshu_detail'
+         |          ) AS t2
+         |ON      t1.case_no = t2.case_no
+         |AND     t1.id <> t2.id
+         |AND     case_equ(t1.case_attribute , t2.case_attribute,t1.tn,t2.tn)
          |
-         |select * from dwd_judicial_case_tmp where main_case_no = 0
          |UNION ALL
-         |select * from dwd_judicial_case_tmp where main_case_no = 1 and tn <> 'wenshu_detail'
          |
-         |) AS t2
+         |SELECT  t1.id AS id_1
+         |        ,t2.id AS id_2
+         |        ,t1.case_no AS case_no_1
+         |        ,t2.case_no AS case_no_2
+         |        ,t1.tn AS tn_1
+         |        ,t2.tn AS tn_2
+         |        ,1 AS connect_type
+         |        ,str_sort(
+         |            concat_ws('',t1.id,t1.tn)
+         |            ,concat_ws('',t2.id,t2.tn)
+         |        ) AS xjk_sorted
+         |FROM    (
+         |            SELECT  *
+         |            FROM    dwd_judicial_case_tmp
+         |            WHERE   main_case_no = 0
+         |            AND     tn = 'wenshu_detail'
+         |        ) AS t1
+         |FULL JOIN (
+         |              SELECT  *
+         |              FROM    dwd_judicial_case_tmp
+         |              WHERE   main_case_no = 1
+         |              UNION ALL
+         |              SELECT  *
+         |              FROM    dwd_judicial_case_tmp
+         |              WHERE   main_case_no = 0
+         |              AND     tn <> 'wenshu_detail'
+         |          ) AS t2
          |ON      t1.case_no = t2.case_no
          |AND     t1.id <> t2.id
          |AND     case_equ(t1.case_attribute , t2.case_attribute,t1.tn,t2.tn)
+         |
          |""".stripMargin)
       .createTempView("connect_tmp_2")