|
@@ -12,15 +12,27 @@ import scala.annotation.meta.getter
|
|
|
*/
|
|
|
|
|
|
case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
|
- inc_ods_company: String,//每日公司基本信息增量
|
|
|
- ads_company_tb: String,//存量维度数据
|
|
|
- inc_ods_company_tb: String,//增量维度ods数据
|
|
|
- target_inc_ads_company_tb: String,//维度主表信息
|
|
|
- target_inc_ads_company_tb_list: String,//维度列表信息
|
|
|
- cols: Seq[String]) extends LoggingUtils {
|
|
|
+ project: String, //表所在工程名
|
|
|
+ mainTableName: String, //主表名(不加前辍)
|
|
|
+ sublistTableName: String, //子表(不加前辍)
|
|
|
+ dupliCols: Seq[String] // 去重列
|
|
|
+ ) extends LoggingUtils {
|
|
|
@(transient@getter) val spark: SparkSession = s
|
|
|
|
|
|
def calc(): Unit = {
|
|
|
+ val inc_ods_company = s"${project}.inc_ods_company" //每日公司基本信息增量
|
|
|
+ val ads_company_tb = s"${project}.ads_$mainTableName" //存量ads主表数据
|
|
|
+ val ads_company_tb_list = s"${project}.ads_$sublistTableName" //存量子表数据 用于读取表字段
|
|
|
+ val inc_ods_company_tb = s"${project}.inc_ods_$mainTableName" //增量数据ods 主表
|
|
|
+ val target_inc_ads_company_tb = s"${project}.inc_ads_$mainTableName" //增量数据ads 主表
|
|
|
+ val target_inc_ads_company_tb_list = s"${project}.inc_ads_$sublistTableName" //增量数据ads 子表
|
|
|
+
|
|
|
+
|
|
|
+ val sublistTableFieldName = spark.table(ads_company_tb_list).columns.filter(s => {
|
|
|
+ !s.equals("ds") && !s.equals("new_cid") && !s.equals("rowkey")
|
|
|
+ }).seq
|
|
|
+
|
|
|
+
|
|
|
println(s"${this.getClass.getSimpleName} calc start! " + new Date().toString)
|
|
|
|
|
|
val firstDs = BaseUtil.getFirstPartion("winhc_eci_dev.inc_ods_company", spark)
|
|
@@ -60,11 +72,11 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
|
|SELECT CONCAT_WS('_',new_cid,id) AS rowkey
|
|
|
| ,"0" as flag
|
|
|
| ,CAST(new_cid as string) AS new_cid
|
|
|
- | ,${columns.mkString(",")}
|
|
|
+ | ,${sublistTableFieldName.mkString(",")}
|
|
|
|FROM (
|
|
|
| SELECT
|
|
|
| *
|
|
|
- | ,ROW_NUMBER() OVER (PARTITION BY ${cols.mkString(",")} ORDER BY update_time DESC ) num
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY ${dupliCols.mkString(",")} ORDER BY update_time DESC ) num
|
|
|
| FROM (
|
|
|
| SELECT
|
|
|
| c.*
|
|
@@ -79,11 +91,11 @@ case class CompanyIncrForCidsUtils(s: SparkSession,
|
|
|
|SELECT CONCAT_WS('_',new_cid,id) AS rowkey
|
|
|
| ,"1" as flag
|
|
|
| ,CAST(new_cid as string) AS new_cid
|
|
|
- | ,${columns.mkString(",")}
|
|
|
+ | ,${sublistTableFieldName.mkString(",")}
|
|
|
|FROM (
|
|
|
| SELECT a.new_cid
|
|
|
| ,${columns.mkString(",")}
|
|
|
- | ,ROW_NUMBER() OVER (PARTITION BY ${cols.mkString(",")} ORDER BY update_time DESC ) num
|
|
|
+ | ,ROW_NUMBER() OVER (PARTITION BY ${dupliCols.mkString(",")} ORDER BY update_time DESC ) num
|
|
|
| FROM mapping a
|
|
|
| JOIN (
|
|
|
| SELECT new_cid AS cid
|