|
@@ -69,29 +69,19 @@ object ChangeExtract {
|
|
//增量ads最后一个分区
|
|
//增量ads最后一个分区
|
|
val lastDsIncAds = BaseUtil.getPartion(s"$project.inc_ads_$tableName", spark)
|
|
val lastDsIncAds = BaseUtil.getPartion(s"$project.inc_ads_$tableName", spark)
|
|
|
|
|
|
- val list = sql(s"show partitions $inc_ods_company_tb").collect.toList.map(_.getString(0).split("=")(1))
|
|
|
|
- //增量ods第一个分区
|
|
|
|
- val firstDsIncOds = list.head
|
|
|
|
- //增量ods最后一个分区//落表分区
|
|
|
|
- val lastDsIncOds = list.last
|
|
|
|
- //执行分区
|
|
|
|
- var runDs = ""
|
|
|
|
- //第一次run
|
|
|
|
- if (StringUtils.isBlank(lastDsIncAds)) {
|
|
|
|
- runDs = firstDsIncOds
|
|
|
|
- } else { //非第一次分区时间 + 1天
|
|
|
|
- runDs = BaseUtil.atDaysAfter(1, lastDsIncAds)
|
|
|
|
- }
|
|
|
|
sql(
|
|
sql(
|
|
s"""
|
|
s"""
|
|
|SELECT cid,current_cid as new_cid
|
|
|SELECT cid,current_cid as new_cid
|
|
|FROM ${inc_ods_company}
|
|
|FROM ${inc_ods_company}
|
|
- |WHERE ds >= ${runDs}
|
|
|
|
|
|
+ |WHERE ds > $lastDs_ads_all and ds < $ds
|
|
|AND cid IS NOT NULL
|
|
|AND cid IS NOT NULL
|
|
|AND current_cid IS NOT NULL
|
|
|AND current_cid IS NOT NULL
|
|
|GROUP BY cid,current_cid
|
|
|GROUP BY cid,current_cid
|
|
|""".stripMargin).createOrReplaceTempView("mapping")
|
|
|""".stripMargin).createOrReplaceTempView("mapping")
|
|
|
|
|
|
|
|
+
|
|
|
|
+ val cid = getColumns(s"$project.ads_$tableName").filter(f => f.equals("cid") || f.equals("new_cid")).max
|
|
|
|
+
|
|
val rdd = sql(
|
|
val rdd = sql(
|
|
s"""
|
|
s"""
|
|
|SELECT $primaryKey,${otherAllCols.mkString(",")},'0' as change_flag
|
|
|SELECT $primaryKey,${otherAllCols.mkString(",")},'0' as change_flag
|
|
@@ -105,10 +95,14 @@ object ChangeExtract {
|
|
| WHERE ds = $ds
|
|
| WHERE ds = $ds
|
|
| ) AS t1
|
|
| ) AS t1
|
|
|JOIN (
|
|
|JOIN (
|
|
- | SELECT concat_ws('_',coalesce(mm.new_cid,tmp.cid),split(rowkey, '_')[1]) AS rowkey
|
|
|
|
- | ,${intersectCols.filter(s => {!s.equals("rowkey") && !s.equals("cid") && !s.equals("new_cid")}).mkString(",")}
|
|
|
|
- | ,coalesce(mm.new_cid,tmp.cid) AS new_cid
|
|
|
|
- | ,tmp.cid
|
|
|
|
|
|
+ | SELECT concat_ws('_',coalesce(mm.new_cid,tmp.$cid),split(rowkey, '_')[1]) AS rowkey
|
|
|
|
+ | ,${
|
|
|
|
+ intersectCols.filter(s => {
|
|
|
|
+ !s.equals("rowkey") && !s.equals("cid") && !s.equals("new_cid")
|
|
|
|
+ }).mkString(",")
|
|
|
|
+ }
|
|
|
|
+ | ,coalesce(mm.new_cid,tmp.$cid) AS new_cid
|
|
|
|
+ | ,tmp.$cid as cid
|
|
| ,c
|
|
| ,c
|
|
| FROM (
|
|
| FROM (
|
|
| SELECT a.*
|
|
| SELECT a.*
|
|
@@ -124,7 +118,7 @@ object ChangeExtract {
|
|
| ) AS a
|
|
| ) AS a
|
|
| ) AS tmp
|
|
| ) AS tmp
|
|
| LEFT JOIN mapping mm
|
|
| LEFT JOIN mapping mm
|
|
- | ON tmp.cid = mm.cid
|
|
|
|
|
|
+ | ON tmp.$cid = mm.cid
|
|
| WHERE tmp.c = 1
|
|
| WHERE tmp.c = 1
|
|
| ) AS t2
|
|
| ) AS t2
|
|
|ON t1.${primaryKey} = t2.${primaryKey}
|
|
|ON t1.${primaryKey} = t2.${primaryKey}
|
|
@@ -142,7 +136,7 @@ object ChangeExtract {
|
|
Row(res._1, res._2, tableName, res._3, res._4, res._5, res._6, res._7, res._8, update_time, res._9)
|
|
Row(res._1, res._2, tableName, res._3, res._4, res._5, res._6, res._7, res._8, update_time, res._9)
|
|
} else {
|
|
} else {
|
|
if (map_list.size > 2) {
|
|
if (map_list.size > 2) {
|
|
- logger.error("list.size greater than 2! rowkey:" + rowkey)
|
|
|
|
|
|
+ logInfo("list.size greater than 2! rowkey:" + rowkey)
|
|
}
|
|
}
|
|
val m = getDoubleDataMap(map_list)
|
|
val m = getDoubleDataMap(map_list)
|
|
|
|
|
|
@@ -241,6 +235,7 @@ object ChangeExtract {
|
|
|winhc_eci_dev company_land_transfer rowkey 20200717 num,location
|
|
|winhc_eci_dev company_land_transfer rowkey 20200717 num,location
|
|
|winhc_eci_dev company_employment rowkey 20200717 source
|
|
|winhc_eci_dev company_employment rowkey 20200717 source
|
|
|winhc_eci_dev company_env_punishment rowkey 20200717 punish_number
|
|
|winhc_eci_dev company_env_punishment rowkey 20200717 punish_number
|
|
|
|
+ |winhc_eci_dev company_icp rowkey 20200717 domain
|
|
|""".stripMargin.replace("20200717", ds)
|
|
|""".stripMargin.replace("20200717", ds)
|
|
for (r <- rows.split("\r\n")) {
|
|
for (r <- rows.split("\r\n")) {
|
|
if (StringUtils.isNotEmpty(r)) {
|
|
if (StringUtils.isNotEmpty(r)) {
|