|
@@ -46,6 +46,11 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
!s.equals("ds")
|
|
!s.equals("ds")
|
|
}).seq
|
|
}).seq
|
|
|
|
|
|
|
|
+
|
|
|
|
+ val subRes = spark.table(inc_ads_company_tb).columns.filter(s => {
|
|
|
|
+ !s.equals("ds")
|
|
|
|
+ }).seq
|
|
|
|
+
|
|
//存量表ads最新分区
|
|
//存量表ads最新分区
|
|
val remainDs = BaseUtil.getPartion(ads_company_tb, spark)
|
|
val remainDs = BaseUtil.getPartion(ads_company_tb, spark)
|
|
|
|
|
|
@@ -87,9 +92,15 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
|
|
|
|
//table字段
|
|
//table字段
|
|
val columns: Seq[String] = spark.table(ads_company_tb).schema.map(_.name).filter(s => {
|
|
val columns: Seq[String] = spark.table(ads_company_tb).schema.map(_.name).filter(s => {
|
|
- !s.equals("ds") && !s.equals("cid") && !s.equals("new_cid") && !s.equals("rowkey") && !s.equals("cids") && !s.equals("new_cids")
|
|
|
|
|
|
+ !s.equals("ds") && !s.equals("cid") && !s.equals("new_cid") && !s.equals("rowkey") && !s.equals("cids") && !s.equals("new_cids") && !s.equals("new_litigant_cids") && !s.equals("litigant_cids")
|
|
})
|
|
})
|
|
|
|
|
|
|
|
+ //判断字段是否有重复字段
|
|
|
|
+ var f= "flag"
|
|
|
|
+ if(sublistTableFieldName.contains(f)){
|
|
|
|
+ f = "update_flag"
|
|
|
|
+ }
|
|
|
|
+
|
|
//mapping 映射关系
|
|
//mapping 映射关系
|
|
sql(
|
|
sql(
|
|
s"""
|
|
s"""
|
|
@@ -114,20 +125,20 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
|
|
|
|
|
|
|
|
//替换cid,去重,复制老数据
|
|
//替换cid,去重,复制老数据
|
|
- val df1 = sql(
|
|
|
|
|
|
+ sql(
|
|
s"""
|
|
s"""
|
|
|INSERT OVERWRITE TABLE $inc_ads_company_tb_list PARTITION(ds='$lastDsIncOds')
|
|
|INSERT OVERWRITE TABLE $inc_ads_company_tb_list PARTITION(ds='$lastDsIncOds')
|
|
|SELECT
|
|
|SELECT
|
|
| ${sublistRes.mkString(",")}
|
|
| ${sublistRes.mkString(",")}
|
|
|FROM (
|
|
|FROM (
|
|
| SELECT CONCAT_WS( '_',new_cid,md5(cleanup(CONCAT_WS('',${cols_md5.mkString(",")})))) AS rowkey
|
|
| SELECT CONCAT_WS( '_',new_cid,md5(cleanup(CONCAT_WS('',${cols_md5.mkString(",")})))) AS rowkey
|
|
- | ,flag
|
|
|
|
|
|
+ | ,$f
|
|
| ,new_cid
|
|
| ,new_cid
|
|
| ,cid
|
|
| ,cid
|
|
| ,${sublistTableFieldName.mkString(",")}
|
|
| ,${sublistTableFieldName.mkString(",")}
|
|
| ,ROW_NUMBER() OVER (PARTITION BY cleanup(CONCAT_WS('',${dupliCols.mkString(",")})) ORDER BY update_time DESC ) num
|
|
| ,ROW_NUMBER() OVER (PARTITION BY cleanup(CONCAT_WS('',${dupliCols.mkString(",")})) ORDER BY update_time DESC ) num
|
|
| FROM (
|
|
| FROM (
|
|
- | SELECT "0" AS flag
|
|
|
|
|
|
+ | SELECT "0" AS $f
|
|
| ,CAST(new_cid AS STRING) AS new_cid
|
|
| ,CAST(new_cid AS STRING) AS new_cid
|
|
| ,CAST(cid AS STRING) AS cid
|
|
| ,CAST(cid AS STRING) AS cid
|
|
| ,${sublistTableFieldName.mkString(",")}
|
|
| ,${sublistTableFieldName.mkString(",")}
|
|
@@ -142,7 +153,7 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
| ) e
|
|
| ) e
|
|
| ) f
|
|
| ) f
|
|
| UNION ALL
|
|
| UNION ALL
|
|
- | SELECT "1" AS flag
|
|
|
|
|
|
+ | SELECT "1" AS $f
|
|
| ,CAST(new_cid AS STRING) AS new_cid
|
|
| ,CAST(new_cid AS STRING) AS new_cid
|
|
| ,CAST(cid AS STRING) AS cid
|
|
| ,CAST(cid AS STRING) AS cid
|
|
| ,${sublistTableFieldName.mkString(",")}
|
|
| ,${sublistTableFieldName.mkString(",")}
|
|
@@ -175,15 +186,15 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
sql(
|
|
sql(
|
|
s"""
|
|
s"""
|
|
|INSERT OVERWRITE TABLE $inc_ads_company_tb PARTITION(ds='$lastDsIncOds')
|
|
|INSERT OVERWRITE TABLE $inc_ads_company_tb PARTITION(ds='$lastDsIncOds')
|
|
- |SELECT md5(cleanup(CONCAT_WS('',${cols_md5.mkString(",")}))) AS rowkey,
|
|
|
|
- | cids,${columns.mkString(",")}
|
|
|
|
|
|
+ |SELECT ${subRes.mkString(",")}
|
|
|FROM (
|
|
|FROM (
|
|
- | SELECT cids,${columns.mkString(",")}
|
|
|
|
|
|
+ | SELECT $split_cols,${sublistTableFieldName.mkString(",")}
|
|
| ,ROW_NUMBER() OVER (PARTITION BY id ORDER BY update_time DESC ) num
|
|
| ,ROW_NUMBER() OVER (PARTITION BY id ORDER BY update_time DESC ) num
|
|
|
|
+ | ,md5(cleanup(CONCAT_WS('',${cols_md5.mkString(",")}))) AS rowkey
|
|
| FROM ${inc_ods_company_tb}
|
|
| FROM ${inc_ods_company_tb}
|
|
| WHERE ds >= ${runDs}
|
|
| WHERE ds >= ${runDs}
|
|
- | AND cids IS NOT NULL
|
|
|
|
- | AND trim(cids) <> ''
|
|
|
|
|
|
+ | AND $split_cols IS NOT NULL
|
|
|
|
+ | AND trim($split_cols) <> ''
|
|
| ) a
|
|
| ) a
|
|
|WHERE num = 1
|
|
|WHERE num = 1
|
|
|""".stripMargin)
|
|
|""".stripMargin)
|
|
@@ -200,7 +211,7 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
).syn()
|
|
).syn()
|
|
|
|
|
|
//同步增量主表数据
|
|
//同步增量主表数据
|
|
- val cols = columns ++ Seq("cids")
|
|
|
|
|
|
+ val cols = columns ++ Seq(s"$split_cols")
|
|
MaxComputer2Phoenix(
|
|
MaxComputer2Phoenix(
|
|
spark,
|
|
spark,
|
|
cols,
|
|
cols,
|