Kaynağa Gözat

增加去重逻辑

xufei 4 yıl önce
ebeveyn
işleme
b4d9fcac2c

+ 6 - 3
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationPre10.scala

@@ -126,9 +126,10 @@ case class JudicialCaseRelationPre10(s: SparkSession, project: String
          |                WHERE   length(gist_id) >0 AND ds> '0'
          |                ) A
          |        LEFT JOIN (
-         |                SELECT  *
+         |                SELECT  id,max(judicase_id) judicase_id
          |                FROM    $project.ads_judicial_case_relation_graph
          |                WHERE   flag = 'company_zxr'
+         |                GROUP BY id
          |                  ) C
          |        ON      A.detail_id = C.id
          |        )
@@ -200,8 +201,10 @@ case class JudicialCaseRelationPre10(s: SparkSession, project: String
          |      ) B
          |      on A.cname=B.name AND A.case_no=B.case_no
          |      left join(
-         |        SELECT  *
-         |            FROM $project.ads_judicial_case_relation_graph WHERE flag = 'company_zxr_person'
+         |        SELECT  id,max(judicase_id) judicase_id
+         |        FROM $project.ads_judicial_case_relation_graph
+         |        WHERE flag = 'company_zxr_person'
+         |        GROUP BY id
          |      ) C
          |      on A.rowkey=C.id
          |      where is_id_card(A.card) OR is_id_card(B.identity_num)

+ 2 - 1
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationPre39.scala

@@ -39,9 +39,10 @@ case class JudicialCaseRelationPre39(s: SparkSession,
 
       sql(
         s"""
-           |SELECT  *
+           |SELECT  id,max(judicase_id) judicase_id
            |FROM    winhc_eci_dev.ads_judicial_case_relation_graph
            |WHERE   flag = '$table_name'
+           |group by id
            |""".stripMargin)
         .createOrReplaceTempView("all_judicial_mapping_tmp")
 

+ 8 - 4
src/main/scala/com/winhc/bigdata/spark/jobs/judicial/JudicialCaseRelationPreNew.scala

@@ -152,8 +152,9 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
          |        ,case_id as detail_id
          |        ,case_amt
          |FROM    (
-         |            SELECT  *
+         |            SELECT  id,max(judicase_id) judicase_id
          |            FROM $project.ads_judicial_case_relation_graph WHERE flag = 'wenshu_detail'
+         |            GROUP BY id
          |        ) a
          |RIGHT JOIN (
          |            SELECT *,md5(cleanup(case_no_trim(case_no))) as new_judicase_id
@@ -356,13 +357,15 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
          |     ,case_amt
          |     ,md5(CLEANUP(case_no_trim(case_no))) as new_judicase_id
          |  from $project.$t2
-         |  where ds= '$t2_ds' and tn not in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person') and case_no_trim(case_no) is not null
+         |  where ds= '$t2_ds' and tn not in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
+         |        and case_no_trim(case_no) is not null
          |        and date is not null and length(date) = 19
          |) a
          |LEFT JOIN (
          |  select case_no_trim(case_no) as case_no,max(judicase_id) judicase_id
          |  from $project.$t2
-         |  where ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person') and case_no_trim(case_no) is not null
+         |  where ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
+         |  and case_no_trim(case_no) is not null
          |  group by case_no
          |) b
          |ON  CLEANUP(a.case_no) = CLEANUP(b.case_no)
@@ -383,7 +386,8 @@ case class JudicialCaseRelationPreNew(s: SparkSession, project: String, ds: Stri
          |        ,detail_id
          |        ,case_amt
          |from $project.$t2
-         |where ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person') and case_no_trim(case_no) is not null
+         |where ds = '$t2_ds' and tn in ('wenshu','zxr','zxr_person','company_dishonest_info','company_dishonest_info_person')
+         |      and case_no_trim(case_no) is not null
          |      and date is not null and length(date) = 19
          |""".stripMargin).show(10, false)