Browse Source

feat: 存量cid通用spark程序加入自动建表

许家凯 4 years ago
parent
commit
cfcd864234

+ 20 - 0
src/main/scala/com/winhc/bigdata/spark/utils/CompanyForCidUtils.scala

@@ -28,6 +28,26 @@ case class CompanyForCidUtils(s: SparkSession, space: String, sourceTable: Strin
 
     val cols_md5 = disCol.filter(!_.equals("new_cid"))
 
+   val ddl =  spark.table(odsTable).schema.filter(s=>{!"ds".equals(s.name)}).map(s=>{
+
+      val name = s.name
+      val dataType = s.dataType
+      s"$name ${DataTypeUtils.getDataType(dataType)} COMMENT '${s.getComment().getOrElse("")}'\n"
+    }).mkString(",")
+
+
+    sql(
+      s"""
+         |CREATE TABLE IF NOT EXISTS ${adsTable}
+         |(
+         |    rowkey  STRING COMMENT 'FIELD'
+         |    ,new_cid STRING COMMENT 'FIELD'
+         |    ,$ddl
+         |)
+         |COMMENT 'TABLE COMMENT'
+         |PARTITIONED BY (ds STRING COMMENT '分区')
+         |""".stripMargin)
+
     //替换字段
     sql(
       s"""

+ 32 - 0
src/main/scala/com/winhc/bigdata/spark/utils/DataTypeUtils.scala

@@ -0,0 +1,32 @@
+package com.winhc.bigdata.spark.utils
+
+import org.apache.spark.sql.types.{DataType, DoubleType, LongType, StringType, TimestampType}
+
+/**
+ * @Author: XuJiakai
+ * @Date: 2020/8/3 11:38
+ * @Description:
+ */
+object DataTypeUtils {
+  def getDataType(dataType: DataType): String = {
+    dataType match {
+      case StringType => {
+        s"STRING"
+      }
+      case LongType => {
+        s"BIGINT"
+      }
+      case TimestampType => {
+        s"DATETIME"
+      }
+      case DoubleType=>{
+        "DOUBLE"
+      }
+      case _ => {
+        println(s"other type:${dataType.typeName}")
+        throw new RuntimeException
+      }
+    }
+  }
+
+}