Browse Source

fix: 查老赖下游统一拉取历史数据

许家凯 4 years ago
parent
commit
626dbc632d

+ 59 - 19
src/main/scala/com/winhc/bigdata/spark/jobs/deadbeat/deadbeat_info.scala

@@ -214,6 +214,29 @@ case class deadbeat_info(s: SparkSession,
 
     sql(
       s"""
+         |SELECT  *
+         |FROM    winhc_eci_dev.ads_deadbeat_person
+         |WHERE   ds > '$target_last_ds'
+         |AND     card_num IS NOT NULL
+         |UNION ALL
+         |SELECT  t2.*
+         |FROM    (
+         |            SELECT  DISTINCT CONCAT(name,card_num) AS KEY
+         |            FROM    winhc_eci_dev.ads_deadbeat_person
+         |            WHERE   ds > '$target_last_ds'
+         |            AND     card_num IS NOT NULL
+         |        ) AS t1
+         |JOIN    (
+         |            SELECT  *
+         |            FROM    winhc_eci_dev.ads_deadbeat_person
+         |            WHERE   ds <= '$target_last_ds'
+         |        ) AS t2
+         |ON      t1.KEY = CONCAT(t2.name,t2.card_num)
+         |""".stripMargin)
+      .createOrReplaceTempView("tmp_ads_deadbeat_person_all_tmp")
+
+    sql(
+      s"""
          |INSERT OVERWRITE TABLE $target_tab PARTITION(ds='$org_last_ds')
          |SELECT  id
          |        ,name
@@ -237,24 +260,22 @@ case class deadbeat_info(s: SparkSession,
          |                    ,get_city_name(SUBSTRING(card_num,0,6)) AS city
          |                    ,get_county_name(SUBSTRING(card_num,0,6)) AS district
          |                    ,agg_label(rowkey,tn,deleted,publish_date) AS labels
-         |            FROM    winhc_eci_dev.ads_deadbeat_person
-         |            WHERE   ds > $target_last_ds
-         |            AND     card_num IS NOT NULL
+         |            FROM    tmp_ads_deadbeat_person_all_tmp
          |            GROUP BY name
          |                     ,card_num
-         |            UNION ALL
-         |            SELECT  md5(cleanup(CONCAT_WS('',rowkey,name))) AS id
-         |                    ,name
-         |                    ,card_num
-         |                    ,NULL AS birth_year
-         |                    ,NULL AS gender
-         |                    ,NULL AS province
-         |                    ,NULL AS city
-         |                    ,NULL AS district
-         |                    ,get_empty_map(rowkey,tn,deleted,publish_date) AS labels
-         |            FROM    winhc_eci_dev.ads_deadbeat_person
-         |            WHERE   ds > $target_last_ds
-         |            AND     card_num IS NULL
+         |---            UNION ALL
+         |---            SELECT  md5(cleanup(CONCAT_WS('',rowkey,name))) AS id
+         |---                    ,name
+         |---                    ,card_num
+         |---                    ,NULL AS birth_year
+         |---                    ,NULL AS gender
+         |---                    ,NULL AS province
+         |---                    ,NULL AS city
+         |---                    ,NULL AS district
+         |---                    ,get_empty_map(rowkey,tn,deleted,publish_date) AS labels
+         |---            FROM    winhc_eci_dev.ads_deadbeat_person
+         |---            WHERE   ds > $target_last_ds
+         |---            AND     card_num IS NULL
          |        )
          |""".stripMargin)
     //      .show(10000)
@@ -293,6 +314,27 @@ case class deadbeat_info(s: SparkSession,
 
     sql(
       s"""
+         |SELECT  *
+         |FROM    winhc_eci_dev.ads_deadbeat_company
+         |WHERE   ds > '$target_last_ds'
+         |UNION ALL
+         |SELECT  t2.*
+         |FROM    (
+         |            SELECT  DISTINCT cid
+         |            FROM    winhc_eci_dev.ads_deadbeat_company
+         |            WHERE   ds > '$target_last_ds'
+         |            AND     cid is not null
+         |        ) AS t1
+         |JOIN    (
+         |            SELECT  *
+         |            FROM    winhc_eci_dev.ads_deadbeat_company
+         |            WHERE   ds <= '$target_last_ds'
+         |        ) AS t2
+         |ON      t1.cid = t2.cid
+         |""".stripMargin).createOrReplaceTempView("all_deadbeat_tmp_company_tmp")
+
+    sql(
+      s"""
          |SELECT  t2.cid as id
          |        ,t2.cid
          |        ,t3.name
@@ -321,9 +363,7 @@ case class deadbeat_info(s: SparkSession,
          |            FROM    (
          |                        SELECT  cid
          |                                ,agg_label(rowkey,tn,deleted,publish_date) AS labels
-         |                        FROM    winhc_eci_dev.ads_deadbeat_company
-         |                        WHERE   ds > '$target_last_ds'
-         |                        AND     cid IS NOT NULL
+         |                        FROM    all_deadbeat_tmp_company_tmp
          |                        GROUP BY cid
          |                    ) AS t1
          |        ) AS t2