|
@@ -214,6 +214,29 @@ case class deadbeat_info(s: SparkSession,
|
|
|
|
|
|
sql(
|
|
|
s"""
|
|
|
+ |SELECT *
|
|
|
+ |FROM winhc_eci_dev.ads_deadbeat_person
|
|
|
+ |WHERE ds > '$target_last_ds'
|
|
|
+ |AND card_num IS NOT NULL
|
|
|
+ |UNION ALL
|
|
|
+ |SELECT t2.*
|
|
|
+ |FROM (
|
|
|
+ | SELECT DISTINCT CONCAT(name,card_num) AS KEY
|
|
|
+ | FROM winhc_eci_dev.ads_deadbeat_person
|
|
|
+ | WHERE ds > '$target_last_ds'
|
|
|
+ | AND card_num IS NOT NULL
|
|
|
+ | ) AS t1
|
|
|
+ |JOIN (
|
|
|
+ | SELECT *
|
|
|
+ | FROM winhc_eci_dev.ads_deadbeat_person
|
|
|
+ | WHERE ds <= '$target_last_ds'
|
|
|
+ | ) AS t2
|
|
|
+ |ON t1.KEY = CONCAT(t2.name,t2.card_num)
|
|
|
+ |""".stripMargin)
|
|
|
+ .createOrReplaceTempView("tmp_ads_deadbeat_person_all_tmp")
|
|
|
+
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
|INSERT OVERWRITE TABLE $target_tab PARTITION(ds='$org_last_ds')
|
|
|
|SELECT id
|
|
|
| ,name
|
|
@@ -237,24 +260,22 @@ case class deadbeat_info(s: SparkSession,
|
|
|
| ,get_city_name(SUBSTRING(card_num,0,6)) AS city
|
|
|
| ,get_county_name(SUBSTRING(card_num,0,6)) AS district
|
|
|
| ,agg_label(rowkey,tn,deleted,publish_date) AS labels
|
|
|
- | FROM winhc_eci_dev.ads_deadbeat_person
|
|
|
- | WHERE ds > $target_last_ds
|
|
|
- | AND card_num IS NOT NULL
|
|
|
+ | FROM tmp_ads_deadbeat_person_all_tmp
|
|
|
| GROUP BY name
|
|
|
| ,card_num
|
|
|
- | UNION ALL
|
|
|
- | SELECT md5(cleanup(CONCAT_WS('',rowkey,name))) AS id
|
|
|
- | ,name
|
|
|
- | ,card_num
|
|
|
- | ,NULL AS birth_year
|
|
|
- | ,NULL AS gender
|
|
|
- | ,NULL AS province
|
|
|
- | ,NULL AS city
|
|
|
- | ,NULL AS district
|
|
|
- | ,get_empty_map(rowkey,tn,deleted,publish_date) AS labels
|
|
|
- | FROM winhc_eci_dev.ads_deadbeat_person
|
|
|
- | WHERE ds > $target_last_ds
|
|
|
- | AND card_num IS NULL
|
|
|
+ |--- UNION ALL
|
|
|
+ |--- SELECT md5(cleanup(CONCAT_WS('',rowkey,name))) AS id
|
|
|
+ |--- ,name
|
|
|
+ |--- ,card_num
|
|
|
+ |--- ,NULL AS birth_year
|
|
|
+ |--- ,NULL AS gender
|
|
|
+ |--- ,NULL AS province
|
|
|
+ |--- ,NULL AS city
|
|
|
+ |--- ,NULL AS district
|
|
|
+ |--- ,get_empty_map(rowkey,tn,deleted,publish_date) AS labels
|
|
|
+ |--- FROM winhc_eci_dev.ads_deadbeat_person
|
|
|
+ |--- WHERE ds > $target_last_ds
|
|
|
+ |--- AND card_num IS NULL
|
|
|
| )
|
|
|
|""".stripMargin)
|
|
|
// .show(10000)
|
|
@@ -293,6 +314,27 @@ case class deadbeat_info(s: SparkSession,
|
|
|
|
|
|
sql(
|
|
|
s"""
|
|
|
+ |SELECT *
|
|
|
+ |FROM winhc_eci_dev.ads_deadbeat_company
|
|
|
+ |WHERE ds > '$target_last_ds'
|
|
|
+ |UNION ALL
|
|
|
+ |SELECT t2.*
|
|
|
+ |FROM (
|
|
|
+ | SELECT DISTINCT cid
|
|
|
+ | FROM winhc_eci_dev.ads_deadbeat_company
|
|
|
+ | WHERE ds > '$target_last_ds'
|
|
|
+ | AND cid is not null
|
|
|
+ | ) AS t1
|
|
|
+ |JOIN (
|
|
|
+ | SELECT *
|
|
|
+ | FROM winhc_eci_dev.ads_deadbeat_company
|
|
|
+ | WHERE ds <= '$target_last_ds'
|
|
|
+ | ) AS t2
|
|
|
+ |ON t1.cid = t2.cid
|
|
|
+ |""".stripMargin).createOrReplaceTempView("all_deadbeat_tmp_company_tmp")
|
|
|
+
|
|
|
+ sql(
|
|
|
+ s"""
|
|
|
|SELECT t2.cid as id
|
|
|
| ,t2.cid
|
|
|
| ,t3.name
|
|
@@ -321,9 +363,7 @@ case class deadbeat_info(s: SparkSession,
|
|
|
| FROM (
|
|
|
| SELECT cid
|
|
|
| ,agg_label(rowkey,tn,deleted,publish_date) AS labels
|
|
|
- | FROM winhc_eci_dev.ads_deadbeat_company
|
|
|
- | WHERE ds > '$target_last_ds'
|
|
|
- | AND cid IS NOT NULL
|
|
|
+ | FROM all_deadbeat_tmp_company_tmp
|
|
|
| GROUP BY cid
|
|
|
| ) AS t1
|
|
|
| ) AS t2
|