|
@@ -16,7 +16,7 @@ case class CompanyIncrForCidUtils(s: SparkSession,
|
|
project: String, //表所在工程名
|
|
project: String, //表所在工程名
|
|
tableName: String, //表名(不加前后辍)
|
|
tableName: String, //表名(不加前后辍)
|
|
dupliCols: Seq[String] // 去重列
|
|
dupliCols: Seq[String] // 去重列
|
|
- ) extends LoggingUtils with CompanyMapping{
|
|
|
|
|
|
+ ) extends LoggingUtils with CompanyMapping {
|
|
@(transient@getter) val spark: SparkSession = s
|
|
@(transient@getter) val spark: SparkSession = s
|
|
//主键字段
|
|
//主键字段
|
|
val rowKeyMapping =
|
|
val rowKeyMapping =
|
|
@@ -33,6 +33,44 @@ case class CompanyIncrForCidUtils(s: SparkSession,
|
|
val inc_ods_company_tb = s"${project}.inc_ods_$tableName" //增量ods表
|
|
val inc_ods_company_tb = s"${project}.inc_ods_$tableName" //增量ods表
|
|
val inc_ads_company_tb = s"${project}.inc_ads_$tableName" //增量ads表
|
|
val inc_ads_company_tb = s"${project}.inc_ads_$tableName" //增量ads表
|
|
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ //table字段
|
|
|
|
+ val columns: Seq[String] = spark.table(ads_company_tb).schema.map(_.name).filter(s => {
|
|
|
|
+ !s.equals("ds") && !s.equals("cid") && !s.equals("new_cid") && !s.equals("rowkey")
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ val colsSet = columns.toSet
|
|
|
|
+
|
|
|
|
+ val ddl = spark.table(inc_ods_company_tb).schema.filter(s => colsSet.contains(s.name)).map(s => {
|
|
|
|
+ val name = s.name
|
|
|
|
+ val dataType = s.dataType
|
|
|
|
+ s"$name ${DataTypeUtils.getDataType(dataType)} COMMENT '${s.getComment().getOrElse("")}'\n"
|
|
|
|
+ }).mkString(",")
|
|
|
|
+
|
|
|
|
+ println(
|
|
|
|
+ s"""
|
|
|
|
+ |CREATE TABLE IF NOT EXISTS ${inc_ads_company_tb}
|
|
|
|
+ |(
|
|
|
|
+ | rowkey STRING COMMENT 'FIELD'
|
|
|
|
+ | ,flag STRING COMMENT 'FIELD'
|
|
|
|
+ | ,new_cid STRING COMMENT 'FIELD'
|
|
|
|
+ | ,cid STRING COMMENT 'FIELD'
|
|
|
|
+ | ,$ddl
|
|
|
|
+ |)
|
|
|
|
+ |COMMENT 'TABLE COMMENT'
|
|
|
|
+ |PARTITIONED BY (ds STRING COMMENT '分区')
|
|
|
|
+ |""".stripMargin)
|
|
|
|
+
|
|
|
|
+ if (!spark.catalog.tableExists(inc_ads_company_tb)) {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
//存量表ads最新分区
|
|
//存量表ads最新分区
|
|
val remainDs = BaseUtil.getPartion(ads_company_tb, spark)
|
|
val remainDs = BaseUtil.getPartion(ads_company_tb, spark)
|
|
|
|
|
|
@@ -72,10 +110,8 @@ case class CompanyIncrForCidUtils(s: SparkSession,
|
|
|firstDsIncOds:$firstDsIncOds
|
|
|firstDsIncOds:$firstDsIncOds
|
|
|""".stripMargin)
|
|
|""".stripMargin)
|
|
|
|
|
|
- //table字段
|
|
|
|
- val columns: Seq[String] = spark.table(ads_company_tb).schema.map(_.name).filter(s => {
|
|
|
|
- !s.equals("ds") && !s.equals("cid") && !s.equals("new_cid") && !s.equals("rowkey")
|
|
|
|
- })
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
|
|
//rowkey前缀匹配
|
|
//rowkey前缀匹配
|
|
val rowKeyPre = rowKeyMapping.getOrElse(tableName,"new_cid")
|
|
val rowKeyPre = rowKeyMapping.getOrElse(tableName,"new_cid")
|